More PHP token refactoring

This commit is contained in:
Timothy Warren 2019-11-14 17:11:10 -05:00
parent 519a193a8d
commit 8951894493
3 changed files with 159 additions and 83 deletions

View File

@ -482,7 +482,7 @@ class Row {
{ {
$inComment = FALSE; $inComment = FALSE;
array_replace_range($this->hl, 0, $commentEnd + 2, Highlight::ML_COMMENT); array_replace_range($this->hl, 0, $commentEnd + 2, Highlight::ML_COMMENT);
$offset = $commentEnd + 2; $offset = $commentEnd;
continue; continue;
} }

View File

@ -2,9 +2,29 @@
namespace Aviat\Kilo\Tokens; namespace Aviat\Kilo\Tokens;
use function Aviat\Kilo\str_contains;
use function Aviat\Kilo\tabs_to_spaces; use function Aviat\Kilo\tabs_to_spaces;
class PHP { class PHP {
private string $code;
private array $rawLines;
private array $tokens = [];
private int $lineNum = 1;
private function __construct(string $code)
{
$lines = explode("\n", $code);
array_unshift($lines, '');
unset($lines[0]);
$this->code = $code;
$this->rawLines = $lines;
}
/** /**
* Use 'token_get_all' to get the tokens for a file, * Use 'token_get_all' to get the tokens for a file,
* organized by row number * organized by row number
@ -14,90 +34,16 @@ class PHP {
*/ */
public static function getTokens(string $code): array public static function getTokens(string $code): array
{ {
$rawTokens = token_get_all($code); return (new self($code))->organizeTokens();
$tokens = [];
$lineNum = 1;
$line = [];
foreach($rawTokens as $t)
{
if (is_array($t))
{
[$type, $rawChar, $currentLine] = $t;
$char = tabs_to_spaces($rawChar);
$current = [
'type' => $type,
'typeName' => token_name($type),
'char' => $char,
'line' => $currentLine,
];
if ($char === "\n")
{
$line[] = $current;
$tokens[$lineNum] = $line;
$lineNum++;
$line = [];
}
// Only return the first line of a multi-line token for this line array
if ($char !== "\n" && strpos($char, "\n") !== FALSE)
{
$chars = explode("\n", $char);
$current['original'] = [
'string' => $char,
'lines' => $chars,
];
$current['char'] = array_shift($chars);
// Add new lines for additional newline characters
$nextLine = $currentLine;
foreach ($chars as $char)
{
$nextLine++;
if ( ! array_key_exists($nextLine, $tokens))
{
$tokens[$nextLine] = [];
}
$tokens[$nextLine][] = [
'type' => -1,
'typeName' => 'RAW',
'char' => $char,
];
}
}
if ($currentLine !== $lineNum)
{
$existing = $tokens[$lineNum] ?? [];
$tokens[$lineNum] = array_merge($existing, $line);
$lineNum = $currentLine;
$line = [];
}
$line[] = $current;
}
else if (is_string($t))
{
// Simple characters, usually delimiters or single character operators
$line[] = [
'type' => -1,
'typeName' => 'RAW',
'char' => tabs_to_spaces($t),
];
}
}
$tokens[$lineNum] = array_merge($tokens[$lineNum] ?? [], $line);
ksort($tokens);
return $tokens;
} }
/**
* Return tokens for the current $filename, organized
* by row number
*
* @param string $filename
* @return array
*/
public static function getFileTokens(string $filename): array public static function getFileTokens(string $filename): array
{ {
$code = file_get_contents($filename); $code = file_get_contents($filename);
@ -109,4 +55,129 @@ class PHP {
return self::getTokens($code); return self::getTokens($code);
} }
protected function organizeTokens(): array
{
$rawTokens = token_get_all($this->code);
foreach ($rawTokens as $t)
{
if (is_array($t))
{
$this->processArrayToken($t);
}
else if (is_string($t))
{
$this->processStringToken($t);
}
}
// Add "missing" row indexes
$lineCount = count($this->rawLines);
for ($i = 1; $i <= $lineCount; $i++)
{
if ( ! array_key_exists($i, $this->tokens))
{
$this->tokens[$i] = [];
}
}
ksort($this->tokens);
return $this->tokens;
}
protected function processArrayToken(array $token): void
{
[$type, $rawChar, $currentLine] = $token;
$char = tabs_to_spaces($rawChar);
$current = [
'type' => $type,
'typeName' => token_name($type),
'char' => $char,
'line' => $currentLine,
];
if ($char === "\n")
{
$this->tokens[$this->lineNum] = $current;
$this->lineNum++;
if ( ! array_key_exists($this->lineNum, $this->tokens))
{
$this->tokens[$this->lineNum] = [];
}
return;
}
// Only return the first line of a multi-line token for this line array
if (str_contains($char, "\n"))
{
$chars = explode("\n", $char);
$current['original'] = [
'string' => $char,
'lines' => $chars,
];
$current['char'] = array_shift($chars);
// Add new lines for additional newline characters
$nextLine = $currentLine;
foreach ($chars as $char)
{
$nextLine++;
if ( ! array_key_exists($nextLine, $this->tokens))
{
$tokens[$nextLine] = [];
}
if ( ! empty($char))
{
$this->processStringToken($char, $nextLine);
}
}
}
if ($currentLine !== $this->lineNum)
{
$this->lineNum = $currentLine;
}
$this->tokens[$this->lineNum][] = $current;
}
protected function processStringToken(string $token, ?int $startLine = NULL): void
{
$char = tabs_to_spaces($token);
$startLine = $startLine ?? $this->lineNum;
$lineNumber = $this->findCorrectLine($char, $startLine) ?? $startLine;
// Simple characters, usually delimiters or single character operators
$this->tokens[$lineNumber][] = [
'type' => -1,
'typeName' => 'RAW',
'char' => tabs_to_spaces($token),
];
}
private function findCorrectLine(string $search, int $rowOffset, int $searchLength = 5): ?int
{
$end = $rowOffset + $searchLength;
if ($end > count($this->rawLines))
{
$end = count($this->rawLines);
}
for ($i = $rowOffset; $i < $end; $i++)
{
if (str_contains($this->rawLines[$i], $search))
{
return $i;
}
}
return NULL;
}
} }

View File

@ -244,6 +244,11 @@ function array_replace_range(array &$array, int $offset, int $length, $value):vo
function str_contains(string $haystack, string $str, ?int $offset = NULL): bool function str_contains(string $haystack, string $str, ?int $offset = NULL): bool
{ {
if (empty($str))
{
return FALSE;
}
return ($offset !== NULL) return ($offset !== NULL)
? strpos($haystack, $str, $offset) !== FALSE ? strpos($haystack, $str, $offset) !== FALSE
: strpos($haystack, $str) !== FALSE; : strpos($haystack, $str) !== FALSE;