--- /dev/null
+<?php
+
+namespace Fluidbook\Tools\Search;
+
+use JsonException;
+use stdClass;
+
+class Page
+{
+ protected $_documentPage;
+ protected $_path;
+
+ public function __construct($documentPage, $path)
+ {
+ $this->_documentPage = $documentPage;
+ $this->_path = $path;
+ }
+
+ /**
+ * @return array
+ * @throws JsonException
+ */
+ public function getHighlights(): array
+ {
+ return json_decode(file_get_contents($this->_path . '/p' . $this->_documentPage . '.fby'), false, 512, JSON_THROW_ON_ERROR);
+ }
+
+ /**
+ * @return string
+ */
+ public function getIndex(): string
+ {
+ return trim(file_get_contents($this->_path . '/fi' . $this->_documentPage . '.txt'));
+ }
+}
namespace Fluidbook\Tools\Search;
+use JsonException;
+use stdClass;
+
class SearchIndex
{
+ /**
+ * @var Page[]
+ */
protected $_pages = [];
- public function __construct()
+ public function addPage($page, $documentPage, $path)
{
+ $this->_pages[$page] = new Page($documentPage, $path);
+ }
+
+ /**
+ * @return array
+ */
+ public function compileIndex()
+ {
+ $index = [];
+ foreach ($this->_pages as $pageNumber => $page) {
+ $twords = explode("\n", $page->getIndex());
+
+ foreach ($twords as $woadata) {
+ $w1 = explode(',', trim($woadata));
+ if (count($w1) <= 1) {
+ continue;
+ }
+ list($woa, $worddata) = $w1;
+ $e = explode("\t", $worddata, 2);
+ if (count($e) < 2) {
+ continue;
+ }
+ list($total, $wordslist) = $e;
+ if ($woa === '') {
+ continue;
+ }
+
+ if (!isset($index[$woa])) {
+ $index[$woa] = array('t' => 0, 'w' => array());
+ }
+ $index[$woa]['t'] += (int)$total;
+
+ $words = explode("\t", $wordslist);
+
+ foreach ($words as $word) {
+ list($wordwa, $count) = explode('$', $word, 2);
+ if (!isset($index[$woa]['w'][$wordwa])) {
+ $index[$woa]['w'][$wordwa] = array('t' => 0, 'p' => [$pageNumber => 0]);
+ }
+ if (!isset($index[$woa]['w'][$wordwa]['p'][$pageNumber])) {
+ $index[$woa]['w'][$wordwa]['p'][$pageNumber] = 0;
+ }
+ $index[$woa]['w'][$wordwa]['t'] += (int)$count;
+ $index[$woa]['w'][$wordwa]['p'][$pageNumber] += (int)$count;
+ }
+ }
+ }
+ return $index;
}
- public function addPage($page, $path)
+
+ /**
+ * @return stdClass
+ * @throws JsonException
+ */
+ public function compileHighlights()
{
- $this->_pages[$page] = $path;
+ $res = new stdClass();
+ foreach ($this->_pages as $pageNumber => $page) {
+ $words = $page->getHighlights();
+ foreach ($words as $i => $w) {
+ $word = $w->word;
+ $word = trim($word, "\0");
+ if ($word === '') {
+ continue;
+ }
+ unset($w->word);
+ $w->page = $pageNumber;
+ $w->idx = $i;
+ if (!isset($res->{$word})) {
+ $res->{$word} = array();
+ }
+ $res->{$word}[] = $w;
+ }
+ }
+ return $res;
}
}