Improve token mapping, fix multi-line PHP comments

This commit is contained in:
Timothy Warren 2019-11-08 13:28:24 -05:00
parent e2e4f421e0
commit 8baca1d293
2 changed files with 63 additions and 26 deletions

View File

@ -481,7 +481,7 @@ class Row {
if ($commentEnd !== FALSE)
{
$inComment = FALSE;
array_replace_range($this->hl, 0, $commentEnd, Highlight::ML_COMMENT);
array_replace_range($this->hl, 0, $commentEnd + 2, Highlight::ML_COMMENT);
$offset = $commentEnd + 2;
continue;
}
@ -517,9 +517,16 @@ class Row {
// Start of multi-line comment
$start = strpos($this->render, '/*', $offset);
if ($start !== FALSE)
$end = strpos($this->render, '*/', $offset);
if ($start !== FALSE && $end !== FALSE)
{
$inComment = strpos($this->render, '*/', $offset) === FALSE;
$len = $end - $start + 2;
array_replace_range($this->hl, $start, $len, Highlight::ML_COMMENT);
$inComment = FALSE;
}
if ($start !== FALSE && $end === FALSE)
{
$inComment = TRUE;
array_replace_range($this->hl, $start, $charLen - $offset, Highlight::ML_COMMENT);
$offset = $start + $charLen - $offset;
}

View File

@ -314,6 +314,11 @@ function syntax_to_color(int $hl): int
: Color::FG_WHITE;
}
function tabs_to_spaces(string $str): string
{
return str_replace("\t", str_repeat(' ', KILO_TAB_STOP), $str);
}
/**
* Use 'token_get_all' to get the tokens for a file,
* organized by row number
@ -323,30 +328,25 @@ function syntax_to_color(int $hl): int
*/
function get_php_tokens(string $code): array
{
$raw_tokens = token_get_all($code);
$rawTokens = token_get_all($code);
$tokens = [];
$lineNum = 1;
$line = [];
foreach($raw_tokens as $token)
foreach($rawTokens as $t)
{
// Simple characters, usually delimiters or single character operators
if ( ! is_array($token))
if ( ! is_array($t))
{
$line[] = [
'type' => -1,
'typeName' => 'RAW',
'char' => $token,
'char' => tabs_to_spaces($t),
];
continue;
}
[$type, $char, $currentLine] = $token;
// Only return the first line of a multi-line token
if ($char !== "\n" && strpos($char, "\n") !== FALSE)
{
$char = explode("\n", $char)[0];
}
[$type, $rawChar, $currentLine] = $t;
$char = tabs_to_spaces($rawChar);
$current = [
'type' => $type,
@ -355,19 +355,47 @@ function get_php_tokens(string $code): array
'line' => $currentLine,
];
if ($char === "\n")
{
$line[] = $current;
$tokens[$lineNum] = $line;
$lineNum++;
$line = [];
}
// Only return the first line of a multi-line token for this line array
if ($char !== "\n" && strpos($char, "\n") !== FALSE)
{
$chars = explode("\n", $char);
$current['original'] = [
'string' => $char,
'lines' => $chars,
];
$current['char'] = array_shift($chars);
// Add new lines for additional newline characters
$nextLine = $currentLine;
foreach ($chars as $char)
{
$nextLine++;
if ( ! array_key_exists($nextLine, $tokens))
{
$tokens[$nextLine] = [];
}
$tokens[$nextLine][] = [
'type' => -1,
'typeName' => 'RAW',
'char' => $char,
];
}
}
if ($currentLine !== $lineNum)
{
$tokens[$lineNum] = $line;
// Make sure to insert empty arrays for empty lines
// So the array of tokens isn't sparse
for ($i = $lineNum; $i < $currentLine; $i++)
{
if ( ! array_key_exists($i, $tokens))
{
$tokens[$i] = [];
}
}
$existing = $tokens[$lineNum] ?? [];
$tokens[$lineNum] = array_merge($existing, $line);
$lineNum = $currentLine;
$line = [];
@ -376,7 +404,9 @@ function get_php_tokens(string $code): array
$line[] = $current;
}
$tokens[$lineNum] = $line;
$tokens[$lineNum] = array_merge($tokens[$lineNum] ?? [], $line);
ksort($tokens);
return $tokens;
}