0
0
Fork 0
mirror of https://github.com/BookStackApp/BookStack.git synced 2025-05-01 06:59:52 +00:00
BookStackApp_BookStack/app/Search/SearchTextTokenizer.php

71 lines
1.8 KiB
PHP
Raw Normal View History

<?php
namespace BookStack\Search;
/**
* A custom text tokenizer which records & provides insight needed for our search indexing.
* We used to use basic strtok() but this class does the following which that lacked:
* - Tracks and provides the current/previous delimiter that we've stopped at.
* - Returns empty tokens upon parsing a delimiter.
*/
class SearchTextTokenizer
{
protected int $currentIndex = 0;
protected int $length;
protected string $currentDelimiter = '';
protected string $previousDelimiter = '';
public function __construct(
protected string $text,
protected string $delimiters = ' '
) {
$this->length = strlen($this->text);
}
/**
* Get the current delimiter to be found.
*/
public function currentDelimiter(): string
{
return $this->currentDelimiter;
}
/**
* Get the previous delimiter found.
*/
public function previousDelimiter(): string
{
return $this->previousDelimiter;
}
/**
* Get the next token between delimiters.
* Returns false if there's no further tokens.
*/
public function next(): string|false
{
$token = '';
for ($i = $this->currentIndex; $i < $this->length; $i++) {
$char = $this->text[$i];
if (str_contains($this->delimiters, $char)) {
$this->previousDelimiter = $this->currentDelimiter;
$this->currentDelimiter = $char;
$this->currentIndex = $i + 1;
return $token;
}
$token .= $char;
}
if ($token) {
$this->currentIndex = $this->length;
$this->previousDelimiter = $this->currentDelimiter;
$this->currentDelimiter = '';
return $token;
}
return false;
}
}