<?php namespace BookStack\Search; /** * A custom text tokenizer which records & provides insight needed for our search indexing. * We used to use basic strtok() but this class does the following which that lacked: * - Tracks and provides the current/previous delimiter that we've stopped at. * - Returns empty tokens upon parsing a delimiter. */ class SearchTextTokenizer { protected int $currentIndex = 0; protected int $length; protected string $currentDelimiter = ''; protected string $previousDelimiter = ''; public function __construct( protected string $text, protected string $delimiters = ' ' ) { $this->length = strlen($this->text); } /** * Get the current delimiter to be found. */ public function currentDelimiter(): string { return $this->currentDelimiter; } /** * Get the previous delimiter found. */ public function previousDelimiter(): string { return $this->previousDelimiter; } /** * Get the next token between delimiters. * Returns false if there's no further tokens. */ public function next(): string|false { $token = ''; for ($i = $this->currentIndex; $i < $this->length; $i++) { $char = $this->text[$i]; if (str_contains($this->delimiters, $char)) { $this->previousDelimiter = $this->currentDelimiter; $this->currentDelimiter = $char; $this->currentIndex = $i + 1; return $token; } $token .= $char; } if ($token) { $this->currentIndex = $this->length; $this->previousDelimiter = $this->currentDelimiter; $this->currentDelimiter = ''; return $token; } return false; } }