mirror of
https://github.com/BookStackApp/BookStack.git
synced 2025-05-01 06:59:52 +00:00
71 lines
1.8 KiB
PHP
71 lines
1.8 KiB
PHP
![]() |
<?php
|
||
|
|
||
|
namespace BookStack\Search;
|
||
|
|
||
|
/**
|
||
|
* A custom text tokenizer which records & provides insight needed for our search indexing.
|
||
|
* We used to use basic strtok() but this class does the following which that lacked:
|
||
|
* - Tracks and provides the current/previous delimiter that we've stopped at.
|
||
|
* - Returns empty tokens upon parsing a delimiter.
|
||
|
*/
|
||
|
class SearchTextTokenizer
|
||
|
{
|
||
|
protected int $currentIndex = 0;
|
||
|
protected int $length;
|
||
|
protected string $currentDelimiter = '';
|
||
|
protected string $previousDelimiter = '';
|
||
|
|
||
|
public function __construct(
|
||
|
protected string $text,
|
||
|
protected string $delimiters = ' '
|
||
|
) {
|
||
|
$this->length = strlen($this->text);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Get the current delimiter to be found.
|
||
|
*/
|
||
|
public function currentDelimiter(): string
|
||
|
{
|
||
|
return $this->currentDelimiter;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Get the previous delimiter found.
|
||
|
*/
|
||
|
public function previousDelimiter(): string
|
||
|
{
|
||
|
return $this->previousDelimiter;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Get the next token between delimiters.
|
||
|
* Returns false if there's no further tokens.
|
||
|
*/
|
||
|
public function next(): string|false
|
||
|
{
|
||
|
$token = '';
|
||
|
|
||
|
for ($i = $this->currentIndex; $i < $this->length; $i++) {
|
||
|
$char = $this->text[$i];
|
||
|
if (str_contains($this->delimiters, $char)) {
|
||
|
$this->previousDelimiter = $this->currentDelimiter;
|
||
|
$this->currentDelimiter = $char;
|
||
|
$this->currentIndex = $i + 1;
|
||
|
return $token;
|
||
|
}
|
||
|
|
||
|
$token .= $char;
|
||
|
}
|
||
|
|
||
|
if ($token) {
|
||
|
$this->currentIndex = $this->length;
|
||
|
$this->previousDelimiter = $this->currentDelimiter;
|
||
|
$this->currentDelimiter = '';
|
||
|
return $token;
|
||
|
}
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
}
|