]> _ Git - fluidbook_tools.git/commitdiff
wip #4666 @0.5
authorVincent Vanwaelscappel <vincent@cubedesigners.com>
Tue, 14 Sep 2021 13:59:23 +0000 (15:59 +0200)
committerVincent Vanwaelscappel <vincent@cubedesigners.com>
Tue, 14 Sep 2021 13:59:23 +0000 (15:59 +0200)
src/Search/Page.php [new file with mode: 0644]
src/Search/SearchIndex.php

diff --git a/src/Search/Page.php b/src/Search/Page.php
new file mode 100644 (file)
index 0000000..535d9f0
--- /dev/null
@@ -0,0 +1,35 @@
+<?php
+
+namespace Fluidbook\Tools\Search;
+
+use JsonException;
+use stdClass;
+
+class Page
+{
+    protected $_documentPage;
+    protected $_path;
+
+    public function __construct($documentPage, $path)
+    {
+        $this->_documentPage = $documentPage;
+        $this->_path = $path;
+    }
+
+    /**
+     * @return array
+     * @throws JsonException
+     */
+    public function getHighlights(): array
+    {
+        return json_decode(file_get_contents($this->_path . '/p' . $this->_documentPage . '.fby'), false, 512, JSON_THROW_ON_ERROR);
+    }
+
+    /**
+     * @return string
+     */
+    public function getIndex(): string
+    {
+        return trim(file_get_contents($this->_path . '/fi' . $this->_documentPage . '.txt'));
+    }
+}
index afe975972e1673795adaf3eabb48d189b7fb5688..b90066214d2b444c8c94a6c81c5db5d9ee59bb82 100644 (file)
@@ -2,17 +2,94 @@
 
 namespace Fluidbook\Tools\Search;
 
+use JsonException;
+use stdClass;
+
 class SearchIndex
 {
+    /**
+     * @var Page[]
+     */
     protected $_pages = [];
 
-    public function __construct()
+    public function addPage($page, $documentPage, $path)
     {
+        $this->_pages[$page] = new Page($documentPage, $path);
+    }
+
+    /**
+     * @return array
+     */
+    public function compileIndex()
+    {
+        $index = [];
+        foreach ($this->_pages as $pageNumber => $page) {
+            $twords = explode("\n", $page->getIndex());
+
+            foreach ($twords as $woadata) {
+                $w1 = explode(',', trim($woadata));
+                if (count($w1) <= 1) {
+                    continue;
+                }
+                list($woa, $worddata) = $w1;
+                $e = explode("\t", $worddata, 2);
+                if (count($e) < 2) {
+                    continue;
+                }
+                list($total, $wordslist) = $e;
 
+                if ($woa === '') {
+                    continue;
+                }
+
+                if (!isset($index[$woa])) {
+                    $index[$woa] = array('t' => 0, 'w' => array());
+                }
+                $index[$woa]['t'] += (int)$total;
+
+                $words = explode("\t", $wordslist);
+
+                foreach ($words as $word) {
+                    list($wordwa, $count) = explode('$', $word, 2);
+                    if (!isset($index[$woa]['w'][$wordwa])) {
+                        $index[$woa]['w'][$wordwa] = array('t' => 0, 'p' => [$pageNumber => 0]);
+                    }
+                    if (!isset($index[$woa]['w'][$wordwa]['p'][$pageNumber])) {
+                        $index[$woa]['w'][$wordwa]['p'][$pageNumber] = 0;
+                    }
+                    $index[$woa]['w'][$wordwa]['t'] += (int)$count;
+                    $index[$woa]['w'][$wordwa]['p'][$pageNumber] += (int)$count;
+                }
+            }
+        }
+        return $index;
     }
 
-    public function addPage($page, $path)
+
+    /**
+     * @return stdClass
+     * @throws JsonException
+     */
+    public function compileHighlights()
     {
-        $this->_pages[$page] = $path;
+        $res = new stdClass();
+        foreach ($this->_pages as $pageNumber => $page) {
+            $words = $page->getHighlights();
+            foreach ($words as $i => $w) {
+                $word = $w->word;
+                $word = trim($word, "\0");
+                if ($word === '') {
+                    continue;
+                }
+                unset($w->word);
+                $w->page = $pageNumber;
+                $w->idx = $i;
+                if (!isset($res->{$word})) {
+                    $res->{$word} = array();
+                }
+                $res->{$word}[] = $w;
+            }
+        }
+        return $res;
     }
 }