public function getTextContent($page)
{
- $f = $this->compiler->getFluidbook()->getTextFile($page, FluidbookDocument::TEXT_HTML);
+ $f = $this->compiler->getFluidbook()->getTextFile($page, FluidbookDocument::TEXT_HTML, 'standard');
$res = trim(file_get_contents($f));
if (!str_starts_with($res, '<div>')) {
$sepFolder = 'sep_' . md5($ignoreSeparators) . '/';
}
+ if ($mode === 'robust') {
+ $extractionMethod = 'fluidbook';
+ }
+
$prefix = self::_getTextFilePrefix($extractionMethod);
$dir = $mode !== 'standard' ? $mode . '/' : '';
return $this->path('texts/' . $dir . $sepFolder) . $prefix . $type . $page . '.txt';
return self::$_docs[$documentID];
}
- public function getTextFile($page, $type = FluidbookDocument::TEXT_PLAIN)
+ public function getTextFile($page, $type = FluidbookDocument::TEXT_PLAIN, $mode = null)
{
$compo = $this->getComposition()[$page];
- return self::_getDocument($compo[0])->getTextFile($compo[1], $type, $this->search_mode ?: 'standard', $this->textExtraction, $this->ignoreSearchSeparators);
+ return self::_getDocument($compo[0])->getTextFile($compo[1], $type, $mode ?? $this->search_mode ?: 'standard', $this->textExtraction, $this->ignoreSearchSeparators);
}
public function getHightlightFile($page)
try {
return Farm::getFileFromPDF($thumbpdf, $page, $format, $resolution, $this->JPEGQuality, $withText, $withGraphics, $version, null, $force, 1);
} catch (\Exception $e) {
- throw new \Exception('Unable to generate thumbnails file from custom PDF "' . $this->pdfThumbnails . '" (original message : ' . $e->getMessage().')');
+ throw new \Exception('Unable to generate thumbnails file from custom PDF "' . $this->pdfThumbnails . '" (original message : ' . $e->getMessage() . ')');
}
} else {
}