}
public function makeTextsIndexes($book, $pages, &$index, &$textes, $simple = false) {
-
+ global $core;
$prefix = '';
if ($book->parametres->textExtraction == 'poppler') {
$prefix = 'p';
}
$tfilec = $dir . '/' . $prefix . 'textes.json';
- if (CubeIT_Util_Gzip::file_exists($ifilec) && CubeIT_Util_Gzip::file_exists($tfilec) && (min(CubeIT_Util_Gzip::filemtime($ifilec), CubeIT_Util_Gzip::filemtime($tfilec)) >= $book->composition_update)) {
+ if (CubeIT_Util_Gzip::file_exists($ifilec) && CubeIT_Util_Gzip::file_exists($tfilec) && (min(CubeIT_Util_Gzip::filemtime($ifilec), CubeIT_Util_Gzip::filemtime($tfilec)) >= max($book->composition_update, filemtime(__FILE__), filemtime(WS_TOOLS . '/fwstk/out/artifacts/fwstk_jar/fwstk.jar')))) {
$index = CubeIT_Util_Gzip::file_get_contents($ifilec);
$textes = CubeIT_Util_Gzip::file_get_contents($tfilec);
return;
foreach ($pages as $book_page => $infos) {
$tfile = wsDocument::getDir($infos['document_id']) . $prefix . 'p' . $infos['document_page'] . '.txt';
$ifile = wsDocument::getDir($infos['document_id']) . $prefix . 'i' . $infos['document_page'] . '.txt';
+
+ if (!file_exists($tfile) || !file_exists($ifile)) {
+ $daoDoc = new wsDAODocument($core->con);
+ $doc = $daoDoc->selectById($infos['document_id']);
+ $doc->getLinksAndTexts();
+ }
+
CubeIT_Util_Gzip::compressIfNotCompressed($tfile);
CubeIT_Util_Gzip::compressIfNotCompressed($ifile);
$text = CubeIT_Util_Gzip::file_get_contents($tfile);
}
if (file_exists($fby)) {
- $words = CubeIT_Util_Json::decode(file_get_contents($fby), CubeIT_Util_Json::TYPE_OBJECT);
-
- foreach ($words as $w) {
- $word = $w->word;
- unset($w->word);
- $w->page = $book_page;
- if (!isset($res->{$word})) {
- $res->{$word} = array();
- }
- $res->{$word}[] = $w;
- }
- }
+ $words = CubeIT_Util_Json::decode(file_get_contents($fby), CubeIT_Util_Json::TYPE_OBJECT);
+
+ foreach ($words as $w) {
+ $word = $w->word;
+ $word = trim($word, "\0");
+ if($word==''){
+ continue;
+ }
+ unset($w->word);
+ $w->page = $book_page;
+ if (!isset($res->{$word})) {
+ $res->{$word} = array();
+ }
+ $res->{$word}[] = $w;
+ }
+ }
}
return $res;
}