/home/mip/www/img/credit/datatables/tokenizer.tar
composer.lock000064400000001217151520414440007246 0ustar00{
    "_readme": [
        "This file locks the dependencies of your project to a known state",
        "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
        "This file is @generated automatically"
    ],
    "content-hash": "b010f1b3d9d47d431ee1cb54ac1de755",
    "packages": [],
    "packages-dev": [],
    "aliases": [],
    "minimum-stability": "stable",
    "stability-flags": [],
    "prefer-stable": false,
    "prefer-lowest": false,
    "platform": {
        "php": "^7.2 || ^8.0",
        "ext-xmlwriter": "*",
        "ext-dom": "*",
        "ext-tokenizer": "*"
    },
    "platform-dev": []
}
LICENSE000064400000002774151520414440005563 0ustar00Tokenizer

Copyright (c) 2017 Arne Blankerts <arne@blankerts.de> and contributors
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice,
  this list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice,
  this list of conditions and the following disclaimer in the documentation
  and/or other materials provided with the distribution.

* Neither the name of Arne Blankerts nor the names of contributors
  may be used to endorse or promote products derived from this software
  without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT  * NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
src/NamespaceUri.php000064400000001123151520414440010415 0ustar00<?php declare(strict_types = 1);
namespace TheSeer\Tokenizer;

class NamespaceUri {

    /** @var string */
    private $value;

    public function __construct(string $value) {
        $this->ensureValidUri($value);
        $this->value = $value;
    }

    public function asString(): string {
        return $this->value;
    }

    private function ensureValidUri($value): void {
        if (\strpos($value, ':') === false) {
            throw new NamespaceUriException(
                \sprintf("Namespace URI '%s' must contain at least one colon", $value)
            );
        }
    }
}
src/Token.php000064400000001204151520414440007121 0ustar00<?php declare(strict_types = 1);
namespace TheSeer\Tokenizer;

class Token {

    /** @var int */
    private $line;

    /** @var string */
    private $name;

    /** @var string */
    private $value;

    /**
     * Token constructor.
     */
    public function __construct(int $line, string $name, string $value) {
        $this->line  = $line;
        $this->name  = $name;
        $this->value = $value;
    }

    public function getLine(): int {
        return $this->line;
    }

    public function getName(): string {
        return $this->name;
    }

    public function getValue(): string {
        return $this->value;
    }
}
src/TokenCollection.php000064400000004276151520414440011151 0ustar00<?php declare(strict_types = 1);
namespace TheSeer\Tokenizer;

class TokenCollection implements \ArrayAccess, \Iterator, \Countable {

    /** @var Token[] */
    private $tokens = [];

    /** @var int */
    private $pos;

    public function addToken(Token $token): void {
        $this->tokens[] = $token;
    }

    public function current(): Token {
        return \current($this->tokens);
    }

    public function key(): int {
        return \key($this->tokens);
    }

    public function next(): void {
        \next($this->tokens);
        $this->pos++;
    }

    public function valid(): bool {
        return $this->count() > $this->pos;
    }

    public function rewind(): void {
        \reset($this->tokens);
        $this->pos = 0;
    }

    public function count(): int {
        return \count($this->tokens);
    }

    public function offsetExists($offset): bool {
        return isset($this->tokens[$offset]);
    }

    /**
     * @throws TokenCollectionException
     */
    public function offsetGet($offset): Token {
        if (!$this->offsetExists($offset)) {
            throw new TokenCollectionException(
                \sprintf('No Token at offest %s', $offset)
            );
        }

        return $this->tokens[$offset];
    }

    /**
     * @param Token $value
     *
     * @throws TokenCollectionException
     */
    public function offsetSet($offset, $value): void {
        if (!\is_int($offset)) {
            $type = \gettype($offset);

            throw new TokenCollectionException(
                \sprintf(
                    'Offset must be of type integer, %s given',
                    $type === 'object' ? \get_class($value) : $type
                )
            );
        }

        if (!$value instanceof Token) {
            $type = \gettype($value);

            throw new TokenCollectionException(
                \sprintf(
                    'Value must be of type %s, %s given',
                    Token::class,
                    $type === 'object' ? \get_class($value) : $type
                )
            );
        }
        $this->tokens[$offset] = $value;
    }

    public function offsetUnset($offset): void {
        unset($this->tokens[$offset]);
    }
}
src/NamespaceUriException.php000064400000000161151520414440012275 0ustar00<?php declare(strict_types = 1);
namespace TheSeer\Tokenizer;

class NamespaceUriException extends Exception {
}
src/TokenCollectionException.php000064400000000164151520414440013020 0ustar00<?php declare(strict_types = 1);
namespace TheSeer\Tokenizer;

class TokenCollectionException extends Exception {
}
src/Exception.php000064400000000146151520414440010003 0ustar00<?php declare(strict_types = 1);
namespace TheSeer\Tokenizer;

class Exception extends \Exception {
}
src/Tokenizer.php000064400000006775151520414440010035 0ustar00<?php declare(strict_types = 1);
namespace TheSeer\Tokenizer;

class Tokenizer {

    /**
     * Token Map for "non-tokens"
     *
     * @var array
     */
    private $map = [
        '(' => 'T_OPEN_BRACKET',
        ')' => 'T_CLOSE_BRACKET',
        '[' => 'T_OPEN_SQUARE',
        ']' => 'T_CLOSE_SQUARE',
        '{' => 'T_OPEN_CURLY',
        '}' => 'T_CLOSE_CURLY',
        ';' => 'T_SEMICOLON',
        '.' => 'T_DOT',
        ',' => 'T_COMMA',
        '=' => 'T_EQUAL',
        '<' => 'T_LT',
        '>' => 'T_GT',
        '+' => 'T_PLUS',
        '-' => 'T_MINUS',
        '*' => 'T_MULT',
        '/' => 'T_DIV',
        '?' => 'T_QUESTION_MARK',
        '!' => 'T_EXCLAMATION_MARK',
        ':' => 'T_COLON',
        '"' => 'T_DOUBLE_QUOTES',
        '@' => 'T_AT',
        '&' => 'T_AMPERSAND',
        '%' => 'T_PERCENT',
        '|' => 'T_PIPE',
        '$' => 'T_DOLLAR',
        '^' => 'T_CARET',
        '~' => 'T_TILDE',
        '`' => 'T_BACKTICK'
    ];

    public function parse(string $source): TokenCollection {
        $result = new TokenCollection();

        if ($source === '') {
            return $result;
        }

        $tokens = \token_get_all($source);

        $lastToken = new Token(
            $tokens[0][2],
            'Placeholder',
            ''
        );

        foreach ($tokens as $pos => $tok) {
            if (\is_string($tok)) {
                $token = new Token(
                    $lastToken->getLine(),
                    $this->map[$tok],
                    $tok
                );
                $result->addToken($token);
                $lastToken = $token;

                continue;
            }

            $line   = $tok[2];
            $values = \preg_split('/\R+/Uu', $tok[1]);

            if (!$values) {
                $result->addToken(
                    new Token(
                        $line,
                        \token_name($tok[0]),
                        '{binary data}'
                    )
                );

                continue;
            }

            foreach ($values as $v) {
                $token = new Token(
                    $line,
                    \token_name($tok[0]),
                    $v
                );
                $lastToken = $token;
                $line++;

                if ($v === '') {
                    continue;
                }

                $result->addToken($token);
            }
        }

        return $this->fillBlanks($result, $lastToken->getLine());
    }

    private function fillBlanks(TokenCollection $tokens, int $maxLine): TokenCollection {
        $prev = new Token(
            0,
            'Placeholder',
            ''
        );

        $final = new TokenCollection();

        foreach ($tokens as $token) {
            $gap = $token->getLine() - $prev->getLine();

            while ($gap > 1) {
                $linebreak = new Token(
                    $prev->getLine() + 1,
                    'T_WHITESPACE',
                    ''
                );
                $final->addToken($linebreak);
                $prev = $linebreak;
                $gap--;
            }

            $final->addToken($token);
            $prev = $token;
        }

        $gap = $maxLine - $prev->getLine();

        while ($gap > 0) {
            $linebreak = new Token(
                $prev->getLine() + 1,
                'T_WHITESPACE',
                ''
            );
            $final->addToken($linebreak);
            $prev = $linebreak;
            $gap--;
        }

        return $final;
    }
}
src/XMLSerializer.php000064400000004357151520414440010547 0ustar00<?php declare(strict_types = 1);
namespace TheSeer\Tokenizer;

use DOMDocument;

class XMLSerializer {

    /** @var \XMLWriter */
    private $writer;

    /** @var Token */
    private $previousToken;

    /** @var NamespaceUri */
    private $xmlns;

    /**
     * XMLSerializer constructor.
     *
     * @param NamespaceUri $xmlns
     */
    public function __construct(?NamespaceUri $xmlns = null) {
        if ($xmlns === null) {
            $xmlns = new NamespaceUri('https://github.com/theseer/tokenizer');
        }
        $this->xmlns = $xmlns;
    }

    public function toDom(TokenCollection $tokens): DOMDocument {
        $dom                     = new DOMDocument();
        $dom->preserveWhiteSpace = false;
        $dom->loadXML($this->toXML($tokens));

        return $dom;
    }

    public function toXML(TokenCollection $tokens): string {
        $this->writer = new \XMLWriter();
        $this->writer->openMemory();
        $this->writer->setIndent(true);
        $this->writer->startDocument();
        $this->writer->startElement('source');
        $this->writer->writeAttribute('xmlns', $this->xmlns->asString());

        if (\count($tokens) > 0) {
            $this->writer->startElement('line');
            $this->writer->writeAttribute('no', '1');

            $this->previousToken = $tokens[0];

            foreach ($tokens as $token) {
                $this->addToken($token);
            }
        }

        $this->writer->endElement();
        $this->writer->endElement();
        $this->writer->endDocument();

        return $this->writer->outputMemory();
    }

    private function addToken(Token $token): void {
        if ($this->previousToken->getLine() < $token->getLine()) {
            $this->writer->endElement();

            $this->writer->startElement('line');
            $this->writer->writeAttribute('no', (string)$token->getLine());
            $this->previousToken = $token;
        }

        if ($token->getValue() !== '') {
            $this->writer->startElement('token');
            $this->writer->writeAttribute('name', $token->getName());
            $this->writer->writeRaw(\htmlspecialchars($token->getValue(), \ENT_NOQUOTES | \ENT_DISALLOWED | \ENT_XML1));
            $this->writer->endElement();
        }
    }
}
CHANGELOG.md000064400000004077151520414440006365 0ustar00# Changelog

All notable changes to Tokenizer are documented in this file using the [Keep a CHANGELOG](http://keepachangelog.com/) principles.

## [1.2.3] - 2024-03-03

### Changed

* Do not use implicitly nullable parameters

## [1.2.2] - 2023-11-20

### Fixed

* [#18](https://github.com/theseer/tokenizer/issues/18): Tokenizer fails on protobuf metadata files


## [1.2.1] - 2021-07-28

### Fixed

* [#13](https://github.com/theseer/tokenizer/issues/13): Fatal error when tokenizing files that contain only a single empty line


## [1.2.0] - 2020-07-13

This release is now PHP 8.0 compliant.

### Fixed

* Whitespace handling in general (only noticable in the intermediate `TokenCollection`) is now consitent  

### Changed

* Updated `Tokenizer` to deal with changed whitespace handling in PHP 8.0
  The XMLSerializer was unaffected.


## [1.1.3] - 2019-06-14

### Changed

* Ensure XMLSerializer can deal with empty token collections

### Fixed

* [#2](https://github.com/theseer/tokenizer/issues/2): Fatal error in infection / phpunit


## [1.1.2] - 2019-04-04

### Changed

* Reverted PHPUnit 8 test update to stay PHP 7.0 compliant


## [1.1.1] - 2019-04-03

### Fixed

* [#1](https://github.com/theseer/tokenizer/issues/1): Empty file causes invalid array read 

### Changed

* Tests should now be PHPUnit 8 compliant


## [1.1.0] - 2017-04-07

### Added

* Allow use of custom namespace for XML serialization


## [1.0.0] - 2017-04-05

Initial Release

[1.2.3]: https://github.com/theseer/tokenizer/compare/1.2.2...1.2.3
[1.2.2]: https://github.com/theseer/tokenizer/compare/1.2.1...1.2.2
[1.2.1]: https://github.com/theseer/tokenizer/compare/1.2.0...1.2.1
[1.2.0]: https://github.com/theseer/tokenizer/compare/1.1.3...1.2.0
[1.1.3]: https://github.com/theseer/tokenizer/compare/1.1.2...1.1.3
[1.1.2]: https://github.com/theseer/tokenizer/compare/1.1.1...1.1.2
[1.1.1]: https://github.com/theseer/tokenizer/compare/1.1.0...1.1.1
[1.1.0]: https://github.com/theseer/tokenizer/compare/1.0.0...1.1.0
[1.0.0]: https://github.com/theseer/tokenizer/compare/b2493e57de80c1b7414219b28503fa5c6b4d0a98...1.0.0
composer.json000064400000001062151520414440007265 0ustar00{
  "name": "theseer/tokenizer",
  "description": "A small library for converting tokenized PHP source code into XML and potentially other formats",
  "license": "BSD-3-Clause",
  "authors": [
    {
      "name": "Arne Blankerts",
      "email": "arne@blankerts.de",
      "role": "Developer"
    }
  ],
  "support": {
    "issues": "https://github.com/theseer/tokenizer/issues"
  },
  "require": {
    "php": "^7.2 || ^8.0",
    "ext-xmlwriter": "*",
    "ext-dom": "*",
    "ext-tokenizer": "*"
  },
  "autoload": {
    "classmap": [
      "src/"
    ]
  }
}

README.md000064400000002653151520414440006031 0ustar00# Tokenizer

A small library for converting tokenized PHP source code into XML.

[![Test](https://github.com/theseer/tokenizer/actions/workflows/ci.yml/badge.svg)](https://github.com/theseer/tokenizer/actions/workflows/ci.yml)

## Installation

You can add this library as a local, per-project dependency to your project using [Composer](https://getcomposer.org/):

    composer require theseer/tokenizer

If you only need this library during development, for instance to run your project's test suite, then you should add it as a development-time dependency:

    composer require --dev theseer/tokenizer

## Usage examples

```php
$tokenizer = new TheSeer\Tokenizer\Tokenizer();
$tokens = $tokenizer->parse(file_get_contents(__DIR__ . '/src/XMLSerializer.php'));

$serializer = new TheSeer\Tokenizer\XMLSerializer();
$xml = $serializer->toXML($tokens);

echo $xml;
```

The generated XML structure looks something like this:

```xml
<?xml version="1.0"?>
<source xmlns="https://github.com/theseer/tokenizer">
 <line no="1">
  <token name="T_OPEN_TAG">&lt;?php </token>
  <token name="T_DECLARE">declare</token>
  <token name="T_OPEN_BRACKET">(</token>
  <token name="T_STRING">strict_types</token>
  <token name="T_WHITESPACE"> </token>
  <token name="T_EQUAL">=</token>
  <token name="T_WHITESPACE"> </token>
  <token name="T_LNUMBER">1</token>
  <token name="T_CLOSE_BRACKET">)</token>
  <token name="T_SEMICOLON">;</token>
 </line>
</source>
```