More PHP token refactoring

2019-11-14 17:11:10 -05:00 · 2019-11-14 17:11:10 -05:00 · 8951894493
commit 8951894493
parent 519a193a8d
3 changed files with 159 additions and 83 deletions
--- a/src/Row.php
+++ b/src/Row.php
@ -482,7 +482,7 @@ class Row {
 				{
 					$inComment = FALSE;
 					array_replace_range($this->hl, 0, $commentEnd + 2, Highlight::ML_COMMENT);
-					$offset = $commentEnd + 2;
+					$offset = $commentEnd;
 					continue;
 				}

--- a/src/Tokens/PHP.php
+++ b/src/Tokens/PHP.php
@ -2,9 +2,29 @@

 namespace Aviat\Kilo\Tokens;

+use function Aviat\Kilo\str_contains;
 use function Aviat\Kilo\tabs_to_spaces;

 class PHP {
+
+	private string $code;
+
+	private array $rawLines;
+
+	private array $tokens = [];
+
+	private int $lineNum = 1;
+
+	private function __construct(string $code)
+	{
+		$lines = explode("\n", $code);
+		array_unshift($lines, '');
+		unset($lines[0]);
+
+		$this->code = $code;
+		$this->rawLines = $lines;
+	}
+
 	/**
 	 * Use 'token_get_all' to get the tokens for a file,
 	 * organized by row number
@ -14,15 +34,61 @@ class PHP {
 	 */
 	public static function getTokens(string $code): array
 	{
-		$rawTokens = token_get_all($code);
-		$tokens = [];
-		$lineNum = 1;
-		$line = [];
-		foreach($rawTokens as $t)
+		return (new self($code))->organizeTokens();
+	}
+
+	/**
+	 * Return tokens for the current $filename, organized
+	 * by row number
+	 *
+	 * @param string $filename
+	 * @return array
+	 */
+	public static function getFileTokens(string $filename): array
+	{
+		$code = file_get_contents($filename);
+
+		if ($code === FALSE)
+		{
+			return [];
+		}
+
+		return self::getTokens($code);
+	}
+
+	protected function organizeTokens(): array
+	{
+		$rawTokens = token_get_all($this->code);
+		foreach ($rawTokens as $t)
 		{
 			if (is_array($t))
 			{
-				[$type, $rawChar, $currentLine] = $t;
+				$this->processArrayToken($t);
+			}
+			else if (is_string($t))
+			{
+				$this->processStringToken($t);
+			}
+		}
+
+		// Add "missing" row indexes
+		$lineCount = count($this->rawLines);
+		for ($i = 1; $i <= $lineCount; $i++)
+		{
+			if ( ! array_key_exists($i, $this->tokens))
+			{
+				$this->tokens[$i] = [];
+			}
+		}
+
+		ksort($this->tokens);
+
+		return $this->tokens;
+	}
+
+	protected function processArrayToken(array $token): void
+	{
+		[$type, $rawChar, $currentLine] = $token;
 		$char = tabs_to_spaces($rawChar);

 		$current = [
@ -34,14 +100,19 @@ class PHP {

 		if ($char === "\n")
 		{
-					$line[] = $current;
-					$tokens[$lineNum] = $line;
-					$lineNum++;
-					$line = [];
+			$this->tokens[$this->lineNum] = $current;
+			$this->lineNum++;
+
+			if ( ! array_key_exists($this->lineNum, $this->tokens))
+			{
+				$this->tokens[$this->lineNum] = [];
+			}
+
+			return;
 		}

 		// Only return the first line of a multi-line token for this line array
-				if ($char !== "\n" && strpos($char, "\n") !== FALSE)
+		if (str_contains($char, "\n"))
 		{
 			$chars = explode("\n", $char);
 			$current['original'] = [
@ -56,57 +127,57 @@ class PHP {
 			{
 				$nextLine++;

-						if ( ! array_key_exists($nextLine, $tokens))
+				if ( ! array_key_exists($nextLine, $this->tokens))
 				{
 					$tokens[$nextLine] = [];
 				}

-						$tokens[$nextLine][] = [
-							'type' => -1,
-							'typeName' => 'RAW',
-							'char' => $char,
-						];
-					}
-				}
-
-				if ($currentLine !== $lineNum)
+				if ( ! empty($char))
 				{
-					$existing = $tokens[$lineNum] ?? [];
-					$tokens[$lineNum] = array_merge($existing, $line);
-
-					$lineNum = $currentLine;
-					$line = [];
+					$this->processStringToken($char, $nextLine);
+				}
+			}
 		}

-				$line[] = $current;
-			}
-			else if (is_string($t))
+		if ($currentLine !== $this->lineNum)
 		{
+			$this->lineNum = $currentLine;
+		}
+
+		$this->tokens[$this->lineNum][] = $current;
+	}
+
+	protected function processStringToken(string $token, ?int $startLine = NULL): void
+	{
+		$char = tabs_to_spaces($token);
+
+		$startLine = $startLine ?? $this->lineNum;
+		$lineNumber = $this->findCorrectLine($char, $startLine) ?? $startLine;
+
 		// Simple characters, usually delimiters or single character operators
-				$line[] = [
+		$this->tokens[$lineNumber][] = [
 			'type' => -1,
 			'typeName' => 'RAW',
-					'char' => tabs_to_spaces($t),
+			'char' => tabs_to_spaces($token),
 		];
 	}
-		}

-		$tokens[$lineNum] = array_merge($tokens[$lineNum] ?? [], $line);
-
-		ksort($tokens);
-
-		return $tokens;
-	}
-
-	public static function getFileTokens(string $filename): array
+	private function findCorrectLine(string $search, int $rowOffset, int $searchLength = 5): ?int
 	{
-		$code = file_get_contents($filename);
-
-		if ($code === FALSE)
+		$end = $rowOffset + $searchLength;
+		if ($end > count($this->rawLines))
 		{
-			return [];
+			$end = count($this->rawLines);
 		}

-		return self::getTokens($code);
+		for ($i = $rowOffset; $i < $end; $i++)
+		{
+			if (str_contains($this->rawLines[$i], $search))
+			{
+				return $i;
+			}
+		}
+
+		return NULL;
 	}
 }
--- a/src/functions.php
+++ b/src/functions.php
@ -244,6 +244,11 @@ function array_replace_range(array &$array, int $offset, int $length, $value):vo

 function str_contains(string $haystack, string $str, ?int $offset = NULL): bool
 {
+	if (empty($str))
+	{
+		return FALSE;
+	}
+
 	return ($offset !== NULL)
 		? strpos($haystack, $str, $offset) !== FALSE
 		: strpos($haystack, $str) !== FALSE;