--- /dev/null
+<?php
+
+namespace App\Fluidbook;
+
+use App\Jobs\FluidbookCompiler;
+use App\Models\FluidbookDocument;
+use Cubist\PDF\CommandLine\FWSTK;
+use Cubist\Util\Gzip;
+
+class SearchIndex
+{
+ /**
+ * @param $compiler FluidbookCompiler
+ * @param $pages
+ * @param $index
+ * @param $textes
+ * @param bool $simple
+ * @param bool $force
+ * @return void
+ */
+ public static function makeTextsIndexes($compiler, $dir, &$index, &$textes, bool $simple = false, bool $force = false)
+ {
+ if ($simple) {
+ $ifilec = $dir . '/sindex.json';
+ } else {
+ $ifilec = $dir . '/index.json';
+ }
+ $tfilec = $dir . '/textes.json';
+
+ $index = array();
+ $textes = array();
+ foreach ($compiler->pages as $page => $infos) {
+ $tfile = $compiler->getFluidbook()->getTextFile($page, FluidbookDocument::TEXT_PLAIN);
+ $ifile = $compiler->getFluidbook()->getTextFile($page, FluidbookDocument::TEXT_INDEX);;
+
+ Gzip::compressIfNotCompressed($tfile);
+ Gzip::compressIfNotCompressed($ifile);
+ $text = Gzip::file_get_contents($tfile);
+ $ipage = Gzip::file_get_contents($ifile);
+
+ if ($simple) {
+ self::fillIndexWithWordsSimple($index, $page, $ipage);
+ } else {
+ self::fillIndexWithWords($index, $page, $ipage);
+ }
+ $textes[$page] = $text;
+ }
+ ksort($index);
+
+ $textes = json_encode($textes);
+ $index = json_encode($index);
+
+ Gzip::file_put_contents($tfilec, $textes);
+ Gzip::file_put_contents($ifilec, $index);
+ }
+
+ /**
+ * @param $compiler FluidbookCompiler
+ * @return array
+ */
+ public static function makeHighlightIndex($compiler): array
+ {
+ $res = [];
+ foreach ($compiler->pages as $page => $infos) {
+ $fby = $compiler->getFluidbook()->getHightlightFile($page);
+ $words = json_decode(Gzip::file_get_contents($fby),true);
+
+ if (is_array($words)) {
+ foreach ($words as $i => $w) {
+ $word = $w['word'];
+ $word = trim($word, "\0");
+ if ($word == '') {
+ continue;
+ }
+ unset($w['word']);
+ $w['word'] = $page;
+ $w['idx'] = $i;
+ if (!isset($res[$word])) {
+ $res[$word] = array();
+ }
+ $res[$word][] = $w;
+ }
+ }
+ }
+ return $res;
+ }
+
+ protected static function _escapeIndex($str)
+ {
+ $todelete = array('\ufffd');
+ foreach ($todelete as $d) {
+ $str = str_replace($d, '', $str);
+ }
+ return $str;
+ }
+
+ protected static function fillIndexWithWordsSimple(&$index, $page, $ipage)
+ {
+ $twords = explode("\n", trim($ipage));
+
+ foreach ($twords as $woadata) {
+ $w1 = explode(',', trim($woadata));
+ if (count($w1) <= 1) {
+ continue;
+ }
+ list($woa, $worddata) = $w1;
+ $e = explode("\t", $worddata, 2);
+ if (count($e) < 2) {
+ continue;
+ }
+ list($total, $wordslist) = $e;
+
+ if ($woa == '') {
+ continue;
+ }
+
+ if (!isset($index[$woa])) {
+ $index[$woa] = array('t' => 0, 'p' => array());
+ }
+ $index[$woa]['t'] += (int)$total;
+
+ $words = explode("\t", $wordslist);
+ foreach ($words as $word) {
+ list($wordwa, $count) = explode('$', $word, 2);
+ if (!isset($index[$woa]['p'][$page])) {
+ $index[$woa]['p'][$page] = 0;
+ }
+ $index[$woa]['p'][$page] += (int)$count;
+ }
+ }
+ }
+
+ protected static function fillIndexWithWords(&$index, $page, $ipage)
+ {
+ $twords = explode("\n", trim($ipage));
+
+ foreach ($twords as $woadata) {
+ $w1 = explode(',', trim($woadata));
+ if (count($w1) <= 1) {
+ continue;
+ }
+ list($woa, $worddata) = $w1;
+ $e = explode("\t", $worddata, 2);
+ if (count($e) < 2) {
+ continue;
+ }
+ list($total, $wordslist) = $e;
+
+ if ($woa == '') {
+ continue;
+ }
+
+ if (!isset($index[$woa])) {
+ $index[$woa] = array('t' => 0, 'w' => array());
+ }
+ $index[$woa]['t'] += (int)$total;
+
+ $words = explode("\t", $wordslist);
+
+ foreach ($words as $word) {
+ list($wordwa, $count) = explode('$', $word, 2);
+ if (!isset($index[$woa]['w'][$wordwa])) {
+ $index[$woa]['w'][$wordwa] = array('t' => 0, 'p' => array());
+ }
+ if (!isset($index[$woa]['w'][$wordwa]['p'][$page])) {
+ $index[$woa]['w'][$wordwa]['p'][$page] = 0;
+ }
+ $count = (int)$count;
+ $index[$woa]['w'][$wordwa]['t'] += $count;
+ $index[$woa]['w'][$wordwa]['p'][$page] += $count;
+ }
+ }
+ }
+
+
+}
namespace App\Jobs;
+use App\Fluidbook\SearchIndex;
use App\Fluidbook\SEO\Document;
use App\Models\FluidbookPublication;
use App\Models\FluidbookTheme;
use App\Util\FluidbookLinks;
use Cubist\Locale\Country;
use Cubist\Locale\Locale;
+use Cubist\PDF\CommandLine\FWSTK;
use Cubist\Util\ArrayUtil;
use Cubist\Util\CommandLine;
use Cubist\Util\Data;
if (!isset($this->config->basketReferences[$ean])) {
continue;
}
-
-
$f = $file->getPathname();
if ($ext === 'mp4') {
public function writeTexts()
{
- $cache = sha1($this->fluidbookSettings->highlightResults . '/--/' . $this->fluidbookSettings->searchWordSelectionAlgorithm . '///' . $this->fluidbookSettings->textExtraction . '|--|' . $this->fluidbookSettings->ignoreSearchSeparators . '|||' . $this->getFluidbook()->composition_update . '()()()' . filemtime(WS_TOOLS . '/fwstk/out/artifacts/fwstk_jar/fwstk.jar'));
- $cacheDir = WS_BOOKS . '/index/' . $this->book_id . '/' . $cache . '/';
- if (!file_exists($cacheDir)) {
- mkdir($cacheDir, 0777, true);
+ $cache = sha1($this->fluidbookSettings->highlightResults . '/--/' . $this->fluidbookSettings->searchWordSelectionAlgorithm . '///' . $this->fluidbookSettings->textExtraction . '|--|' . $this->fluidbookSettings->ignoreSearchSeparators . '|||' . $this->getFluidbook()->composition_update . '()()()' . FWSTK::lastUpdate());
+ $cacheDir = Files::mkdir(protected_path('fluidbookpublication/index/' . $this->book_id . '/' . $cache . '/'));
+ $indexFile = $cacheDir . '/search.index.js';
+ $textFile = $cacheDir . '/search.texts.js';
+ $hightlightsFile = $cacheDir . '/search.highlight.js';
- $this->daoBook->makeTextsIndexes($this->book, $this->pages, $index, $textes, true);
- file_put_contents($cacheDir . '/search.index.js', 'var INDEX=' . $index . ';' . "\r");
- if ($this->fluidbookSettings->highlightResults) {
- file_put_contents($cacheDir . '/search.highlight.js', 'var HIGHLIGHTS=' . json_encode($this->daoBook->makeHighlightIndex($this->book, $this->pages)) . ";\r");
- }
- if ($this->fluidbookSettings->searchWordSelectionAlgorithm == 'expression') {
- file_put_contents($cacheDir . '/search.texts.js', 'var TEXTS=' . $textes . ";\r");
- }
+ if (!file_exists($indexFile) || !file_exists($textFile)) {
+ SearchIndex::makeTextsIndexes($this, $cacheDir, $index, $texts, true);
+ file_put_contents($indexFile, 'var INDEX=' . $index . ';' . "\r");
+ file_put_contents($textFile, 'var TEXTS=' . $texts . ";\r");
+ }
+ if ($this->fluidbookSettings->highlightResults && !file_exists($hightlightsFile)) {
+ file_put_contents($hightlightsFile, 'var HIGHLIGHTS=' . json_encode(SearchIndex::makeHighlightIndex($this)) . ";\r");
}
+
$this->vdir->copy($cacheDir . '/search.index.js', 'data/search.index.js');
if ($this->fluidbookSettings->highlightResults) {
$this->vdir->copy($cacheDir . '/search.highlight.js', 'data/search.highlight.js');
use Cubist\Backpack\Magic\Fields\Textarea;
use Cubist\PDF\CommandLine\FWSTK;
use Cubist\PDF\PDFTools;
+use Cubist\Util\Gzip;
use Illuminate\Support\Facades\Cache;
class FluidbookDocument extends ToolboxModel
public function getTextFile($page, $type = self::TEXT_PLAIN, $extractionMethod = 'fluidbook', $ignoreSeparators = '')
{
$path = $this->_getTextPath($page, $type, $extractionMethod, $ignoreSeparators);
- if (!file_exists($path)) {
+ if (!$this->_checkTextFile($path)) {
PDFTools::extractTexts($this->path('original.pdf'), $this->path(), $extractionMethod, $ignoreSeparators);
- if (!file_exists($path)) {
+ if (!Gzip::file_exists($path)) {
+ throw new \Exception('An error occured while producing file ' . $path);
+ }
+ }
+ return $path;
+ }
+
+ protected function _checkTextFile($path)
+ {
+ return Gzip::file_exists($path) && Gzip::filemtime($path) >= FWSTK::lastUpdate();
+ }
+
+ public function getHightlightFile($page)
+ {
+ $path = $this->_getHightlightFilePath($page);
+ if (!$this->_checkTextFile($path)) {
+ PDFTools::extractHighlightsData($this->path('original.pdf'), $this->path());
+ if (!Gzip::file_exists($path)) {
throw new \Exception('An error occured while producing file ' . $path);
}
}
return $map[$textExtraction];
}
+ public function _getHightlightFilePath($page)
+ {
+ return $this->path('texts/p' . $page . '.fby');
+ }
+
public function _getTextPath($page, $type = self::TEXT_PLAIN, $extractionMethod = 'fluidbook', $ignoreSeparators = '')
{
$sepFolder = '';
return self::_getDocument($compo[0])->getTextFile($compo[1], $type, $this->textExtraction, $this->ignoreSearchSeparators);
}
+ public function getHightlightFile($page)
+ {
+ $compo = $this->composition[$page];
+ return self::_getDocument($compo[0])->getHightlightFile($compo[1]);
+ }
+
public function getFile($page, $format = 'jpg', $resolution = 150, $withText = true, $withGraphics = true, $version = 'html', $force = false)
{
"source": {
"type": "git",
"url": "git://git.cubedesigners.com/cubist_pdf.git",
- "reference": "3de1229df9751a411d0c5cb5d85fcd4889a85826"
+ "reference": "a3712f58ac4d727c1a4c2e7263d39f6b7dce5b3a"
},
"dist": {
"type": "tar",
- "url": "https://composer.cubedesigners.com/dist/cubist/pdf/cubist-pdf-dev-master-ba9dfa.tar",
- "reference": "3de1229df9751a411d0c5cb5d85fcd4889a85826",
- "shasum": "8ae87c50b61ad69fe6d61390a2280823edccb6b0"
+ "url": "https://composer.cubedesigners.com/dist/cubist/pdf/cubist-pdf-dev-master-b055ea.tar",
+ "reference": "a3712f58ac4d727c1a4c2e7263d39f6b7dce5b3a",
+ "shasum": "67b078a6c6a157a4f32eacedc086cdb7e2bf9f88"
},
"require": {
"cubist/util": "dev-master",
"cubist",
"pdf"
],
- "time": "2022-08-22T20:03:01+00:00"
+ "time": "2022-08-23T07:38:41+00:00"
},
{
"name": "cubist/scorm",
},
{
"name": "spatie/temporary-directory",
- "version": "2.1.0",
+ "version": "2.1.1",
"source": {
"type": "git",
"url": "https://github.com/spatie/temporary-directory.git",
- "reference": "79f138f2b81adae583d04d3727a4538dd394023f"
+ "reference": "e2818d871783d520b319c2d38dc37c10ecdcde20"
},
"dist": {
"type": "zip",
- "url": "https://api.github.com/repos/spatie/temporary-directory/zipball/79f138f2b81adae583d04d3727a4538dd394023f",
- "reference": "79f138f2b81adae583d04d3727a4538dd394023f",
+ "url": "https://api.github.com/repos/spatie/temporary-directory/zipball/e2818d871783d520b319c2d38dc37c10ecdcde20",
+ "reference": "e2818d871783d520b319c2d38dc37c10ecdcde20",
"shasum": ""
},
"require": {
],
"support": {
"issues": "https://github.com/spatie/temporary-directory/issues",
- "source": "https://github.com/spatie/temporary-directory/tree/2.1.0"
+ "source": "https://github.com/spatie/temporary-directory/tree/2.1.1"
},
"funding": [
{
"type": "github"
}
],
- "time": "2022-03-11T08:16:01+00:00"
+ "time": "2022-08-23T07:15:15+00:00"
},
{
"name": "swayok/alternative-laravel-cache",