From 89518944931ce3c5e50593cba19b72c08ff2d079 Mon Sep 17 00:00:00 2001 From: Timothy J Warren Date: Thu, 14 Nov 2019 17:11:10 -0500 Subject: [PATCH] More PHP token refactoring --- src/Row.php | 2 +- src/Tokens/PHP.php | 235 +++++++++++++++++++++++++++++---------------- src/functions.php | 5 + 3 files changed, 159 insertions(+), 83 deletions(-) diff --git a/src/Row.php b/src/Row.php index 1269b13..1c2fe77 100644 --- a/src/Row.php +++ b/src/Row.php @@ -482,7 +482,7 @@ class Row { { $inComment = FALSE; array_replace_range($this->hl, 0, $commentEnd + 2, Highlight::ML_COMMENT); - $offset = $commentEnd + 2; + $offset = $commentEnd; continue; } diff --git a/src/Tokens/PHP.php b/src/Tokens/PHP.php index a791a0b..ecbddec 100644 --- a/src/Tokens/PHP.php +++ b/src/Tokens/PHP.php @@ -2,9 +2,29 @@ namespace Aviat\Kilo\Tokens; +use function Aviat\Kilo\str_contains; use function Aviat\Kilo\tabs_to_spaces; class PHP { + + private string $code; + + private array $rawLines; + + private array $tokens = []; + + private int $lineNum = 1; + + private function __construct(string $code) + { + $lines = explode("\n", $code); + array_unshift($lines, ''); + unset($lines[0]); + + $this->code = $code; + $this->rawLines = $lines; + } + /** * Use 'token_get_all' to get the tokens for a file, * organized by row number @@ -14,90 +34,16 @@ class PHP { */ public static function getTokens(string $code): array { - $rawTokens = token_get_all($code); - $tokens = []; - $lineNum = 1; - $line = []; - foreach($rawTokens as $t) - { - if (is_array($t)) - { - [$type, $rawChar, $currentLine] = $t; - $char = tabs_to_spaces($rawChar); - - $current = [ - 'type' => $type, - 'typeName' => token_name($type), - 'char' => $char, - 'line' => $currentLine, - ]; - - if ($char === "\n") - { - $line[] = $current; - $tokens[$lineNum] = $line; - $lineNum++; - $line = []; - } - - // Only return the first line of a multi-line token for this line array - if ($char !== "\n" && strpos($char, "\n") !== FALSE) - { - $chars = explode("\n", $char); - $current['original'] = [ - 'string' => $char, - 'lines' => $chars, - ]; - $current['char'] = array_shift($chars); - - // Add new lines for additional newline characters - $nextLine = $currentLine; - foreach ($chars as $char) - { - $nextLine++; - - if ( ! array_key_exists($nextLine, $tokens)) - { - $tokens[$nextLine] = []; - } - - $tokens[$nextLine][] = [ - 'type' => -1, - 'typeName' => 'RAW', - 'char' => $char, - ]; - } - } - - if ($currentLine !== $lineNum) - { - $existing = $tokens[$lineNum] ?? []; - $tokens[$lineNum] = array_merge($existing, $line); - - $lineNum = $currentLine; - $line = []; - } - - $line[] = $current; - } - else if (is_string($t)) - { - // Simple characters, usually delimiters or single character operators - $line[] = [ - 'type' => -1, - 'typeName' => 'RAW', - 'char' => tabs_to_spaces($t), - ]; - } - } - - $tokens[$lineNum] = array_merge($tokens[$lineNum] ?? [], $line); - - ksort($tokens); - - return $tokens; + return (new self($code))->organizeTokens(); } + /** + * Return tokens for the current $filename, organized + * by row number + * + * @param string $filename + * @return array + */ public static function getFileTokens(string $filename): array { $code = file_get_contents($filename); @@ -109,4 +55,129 @@ class PHP { return self::getTokens($code); } + + protected function organizeTokens(): array + { + $rawTokens = token_get_all($this->code); + foreach ($rawTokens as $t) + { + if (is_array($t)) + { + $this->processArrayToken($t); + } + else if (is_string($t)) + { + $this->processStringToken($t); + } + } + + // Add "missing" row indexes + $lineCount = count($this->rawLines); + for ($i = 1; $i <= $lineCount; $i++) + { + if ( ! array_key_exists($i, $this->tokens)) + { + $this->tokens[$i] = []; + } + } + + ksort($this->tokens); + + return $this->tokens; + } + + protected function processArrayToken(array $token): void + { + [$type, $rawChar, $currentLine] = $token; + $char = tabs_to_spaces($rawChar); + + $current = [ + 'type' => $type, + 'typeName' => token_name($type), + 'char' => $char, + 'line' => $currentLine, + ]; + + if ($char === "\n") + { + $this->tokens[$this->lineNum] = $current; + $this->lineNum++; + + if ( ! array_key_exists($this->lineNum, $this->tokens)) + { + $this->tokens[$this->lineNum] = []; + } + + return; + } + + // Only return the first line of a multi-line token for this line array + if (str_contains($char, "\n")) + { + $chars = explode("\n", $char); + $current['original'] = [ + 'string' => $char, + 'lines' => $chars, + ]; + $current['char'] = array_shift($chars); + + // Add new lines for additional newline characters + $nextLine = $currentLine; + foreach ($chars as $char) + { + $nextLine++; + + if ( ! array_key_exists($nextLine, $this->tokens)) + { + $tokens[$nextLine] = []; + } + + if ( ! empty($char)) + { + $this->processStringToken($char, $nextLine); + } + } + } + + if ($currentLine !== $this->lineNum) + { + $this->lineNum = $currentLine; + } + + $this->tokens[$this->lineNum][] = $current; + } + + protected function processStringToken(string $token, ?int $startLine = NULL): void + { + $char = tabs_to_spaces($token); + + $startLine = $startLine ?? $this->lineNum; + $lineNumber = $this->findCorrectLine($char, $startLine) ?? $startLine; + + // Simple characters, usually delimiters or single character operators + $this->tokens[$lineNumber][] = [ + 'type' => -1, + 'typeName' => 'RAW', + 'char' => tabs_to_spaces($token), + ]; + } + + private function findCorrectLine(string $search, int $rowOffset, int $searchLength = 5): ?int + { + $end = $rowOffset + $searchLength; + if ($end > count($this->rawLines)) + { + $end = count($this->rawLines); + } + + for ($i = $rowOffset; $i < $end; $i++) + { + if (str_contains($this->rawLines[$i], $search)) + { + return $i; + } + } + + return NULL; + } } \ No newline at end of file diff --git a/src/functions.php b/src/functions.php index 18d8540..12468f7 100644 --- a/src/functions.php +++ b/src/functions.php @@ -244,6 +244,11 @@ function array_replace_range(array &$array, int $offset, int $length, $value):vo function str_contains(string $haystack, string $str, ?int $offset = NULL): bool { + if (empty($str)) + { + return FALSE; + } + return ($offset !== NULL) ? strpos($haystack, $str, $offset) !== FALSE : strpos($haystack, $str) !== FALSE;