Improve token mapping, fix multi-line PHP comments

This commit is contained in:
Timothy Warren 2019-11-08 13:28:24 -05:00
parent e2e4f421e0
commit 8baca1d293
2 changed files with 63 additions and 26 deletions

View File

@ -481,7 +481,7 @@ class Row {
if ($commentEnd !== FALSE) if ($commentEnd !== FALSE)
{ {
$inComment = FALSE; $inComment = FALSE;
array_replace_range($this->hl, 0, $commentEnd, Highlight::ML_COMMENT); array_replace_range($this->hl, 0, $commentEnd + 2, Highlight::ML_COMMENT);
$offset = $commentEnd + 2; $offset = $commentEnd + 2;
continue; continue;
} }
@ -517,9 +517,16 @@ class Row {
// Start of multi-line comment // Start of multi-line comment
$start = strpos($this->render, '/*', $offset); $start = strpos($this->render, '/*', $offset);
if ($start !== FALSE) $end = strpos($this->render, '*/', $offset);
if ($start !== FALSE && $end !== FALSE)
{ {
$inComment = strpos($this->render, '*/', $offset) === FALSE; $len = $end - $start + 2;
array_replace_range($this->hl, $start, $len, Highlight::ML_COMMENT);
$inComment = FALSE;
}
if ($start !== FALSE && $end === FALSE)
{
$inComment = TRUE;
array_replace_range($this->hl, $start, $charLen - $offset, Highlight::ML_COMMENT); array_replace_range($this->hl, $start, $charLen - $offset, Highlight::ML_COMMENT);
$offset = $start + $charLen - $offset; $offset = $start + $charLen - $offset;
} }

View File

@ -314,6 +314,11 @@ function syntax_to_color(int $hl): int
: Color::FG_WHITE; : Color::FG_WHITE;
} }
function tabs_to_spaces(string $str): string
{
return str_replace("\t", str_repeat(' ', KILO_TAB_STOP), $str);
}
/** /**
* Use 'token_get_all' to get the tokens for a file, * Use 'token_get_all' to get the tokens for a file,
* organized by row number * organized by row number
@ -323,30 +328,25 @@ function syntax_to_color(int $hl): int
*/ */
function get_php_tokens(string $code): array function get_php_tokens(string $code): array
{ {
$raw_tokens = token_get_all($code); $rawTokens = token_get_all($code);
$tokens = []; $tokens = [];
$lineNum = 1; $lineNum = 1;
$line = []; $line = [];
foreach($raw_tokens as $token) foreach($rawTokens as $t)
{ {
// Simple characters, usually delimiters or single character operators // Simple characters, usually delimiters or single character operators
if ( ! is_array($token)) if ( ! is_array($t))
{ {
$line[] = [ $line[] = [
'type' => -1, 'type' => -1,
'typeName' => 'RAW', 'typeName' => 'RAW',
'char' => $token, 'char' => tabs_to_spaces($t),
]; ];
continue; continue;
} }
[$type, $char, $currentLine] = $token; [$type, $rawChar, $currentLine] = $t;
$char = tabs_to_spaces($rawChar);
// Only return the first line of a multi-line token
if ($char !== "\n" && strpos($char, "\n") !== FALSE)
{
$char = explode("\n", $char)[0];
}
$current = [ $current = [
'type' => $type, 'type' => $type,
@ -355,19 +355,47 @@ function get_php_tokens(string $code): array
'line' => $currentLine, 'line' => $currentLine,
]; ];
if ($char === "\n")
{
$line[] = $current;
$tokens[$lineNum] = $line;
$lineNum++;
$line = [];
}
// Only return the first line of a multi-line token for this line array
if ($char !== "\n" && strpos($char, "\n") !== FALSE)
{
$chars = explode("\n", $char);
$current['original'] = [
'string' => $char,
'lines' => $chars,
];
$current['char'] = array_shift($chars);
// Add new lines for additional newline characters
$nextLine = $currentLine;
foreach ($chars as $char)
{
$nextLine++;
if ( ! array_key_exists($nextLine, $tokens))
{
$tokens[$nextLine] = [];
}
$tokens[$nextLine][] = [
'type' => -1,
'typeName' => 'RAW',
'char' => $char,
];
}
}
if ($currentLine !== $lineNum) if ($currentLine !== $lineNum)
{ {
$tokens[$lineNum] = $line; $existing = $tokens[$lineNum] ?? [];
$tokens[$lineNum] = array_merge($existing, $line);
// Make sure to insert empty arrays for empty lines
// So the array of tokens isn't sparse
for ($i = $lineNum; $i < $currentLine; $i++)
{
if ( ! array_key_exists($i, $tokens))
{
$tokens[$i] = [];
}
}
$lineNum = $currentLine; $lineNum = $currentLine;
$line = []; $line = [];
@ -376,7 +404,9 @@ function get_php_tokens(string $code): array
$line[] = $current; $line[] = $current;
} }
$tokens[$lineNum] = $line; $tokens[$lineNum] = array_merge($tokens[$lineNum] ?? [], $line);
ksort($tokens);
return $tokens; return $tokens;
} }