From: Vincent Vanwaelscappel Date: Wed, 26 Jan 2022 17:38:13 +0000 (+0100) Subject: wip #5058 @0.5 X-Git-Url: http://git.cubedesigners.com/?a=commitdiff_plain;h=b7e66d0bac3865bbdc8280cbd8a012fb85e7ef5c;p=fluidbook_tools.git wip #5058 @0.5 --- diff --git a/src/Search/SearchIndex.php b/src/Search/SearchIndex.php index b900662..a068e3d 100644 --- a/src/Search/SearchIndex.php +++ b/src/Search/SearchIndex.php @@ -17,54 +17,95 @@ class SearchIndex $this->_pages[$page] = new Page($documentPage, $path); } - /** - * @return array - */ - public function compileIndex() + protected function fillIndexWithWords(&$index, $page, $ipage) { - $index = []; - foreach ($this->_pages as $pageNumber => $page) { - $twords = explode("\n", $page->getIndex()); + $twords = explode("\n", trim($ipage)); - foreach ($twords as $woadata) { - $w1 = explode(',', trim($woadata)); - if (count($w1) <= 1) { - continue; + foreach ($twords as $woadata) { + $w1 = explode(',', trim($woadata)); + if (count($w1) <= 1) { + continue; + } + list($woa, $worddata) = $w1; + $e = explode("\t", $worddata, 2); + if (count($e) < 2) { + continue; + } + list($total, $wordslist) = $e; + + if ($woa == '') { + continue; + } + + if (!isset($index[$woa])) { + $index[$woa] = array('t' => 0, 'w' => array()); + } + $index[$woa]['t'] += $total; + + $words = explode("\t", $wordslist); + + foreach ($words as $word) { + list($wordwa, $count) = explode('$', $word, 2); + if (!isset($index[$woa]['w'][$wordwa])) { + $index[$woa]['w'][$wordwa] = array('t' => 0, 'p' => array()); } - list($woa, $worddata) = $w1; - $e = explode("\t", $worddata, 2); - if (count($e) < 2) { - continue; + if (!isset($index[$woa]['w'][$wordwa]['p'][$page])) { + $index[$woa]['w'][$wordwa]['p'][$page] = 0; } - list($total, $wordslist) = $e; + $index[$woa]['w'][$wordwa]['t'] += $count; + $index[$woa]['w'][$wordwa]['p'][$page] += $count; + } + } + } - if ($woa === '') { - continue; - } - if (!isset($index[$woa])) { - $index[$woa] = array('t' => 0, 'w' => array()); - } - $index[$woa]['t'] += (int)$total; - - $words = explode("\t", $wordslist); - - foreach ($words as $word) { - list($wordwa, $count) = explode('$', $word, 2); - if (!isset($index[$woa]['w'][$wordwa])) { - $index[$woa]['w'][$wordwa] = array('t' => 0, 'p' => [$pageNumber => 0]); - } - if (!isset($index[$woa]['w'][$wordwa]['p'][$pageNumber])) { - $index[$woa]['w'][$wordwa]['p'][$pageNumber] = 0; - } - $index[$woa]['w'][$wordwa]['t'] += (int)$count; - $index[$woa]['w'][$wordwa]['p'][$pageNumber] += (int)$count; + protected function fillIndexWithWordsSimple(&$index, $page, $ipage) + { + $twords = explode("\n", trim($ipage)); + + foreach ($twords as $woadata) { + $w1 = explode(',', trim($woadata)); + if (count($w1) <= 1) { + continue; + } + list($woa, $worddata) = $w1; + $e = explode("\t", $worddata, 2); + if (count($e) < 2) { + continue; + } + list($total, $wordslist) = $e; + + if ($woa == '') { + continue; + } + + if (!isset($index[$woa])) { + $index[$woa] = array('t' => 0, 'p' => array()); + } + $index[$woa]['t'] += $total; + + $words = explode("\t", $wordslist); + foreach ($words as $word) { + list($wordwa, $count) = explode('$', $word, 2); + if (!isset($index[$woa]['p'][$page])) { + $index[$woa]['p'][$page] = 0; } + $index[$woa]['p'][$page] += $count; } } - return $index; } + /** + * @return array + */ + public function compileIndex() + { + $index = []; + foreach ($this->_pages as $pageNumber => $page) { + $this->fillIndexWithWordsSimple($index, $pageNumber, $page->getIndex()); + } + return $index; + } /** * @return stdClass