From dbed767503644e5c434ee41acf96ea5b8bb52c86 Mon Sep 17 00:00:00 2001 From: Vincent Vanwaelscappel Date: Fri, 10 Sep 2021 10:12:31 +0200 Subject: [PATCH] wip #4666 @1 --- src/CommandLine/FWSTK.php | 14 +++++++++++ src/PDF/Document.php | 27 ++++++++++++++++++--- src/PDF/PDFTools.php | 51 ++++++++++++++++++++++++++++++++++++--- src/SVG/SVGTools.php | 17 ++++++------- 4 files changed, 94 insertions(+), 15 deletions(-) create mode 100644 src/CommandLine/FWSTK.php diff --git a/src/CommandLine/FWSTK.php b/src/CommandLine/FWSTK.php new file mode 100644 index 0000000..8ab8189 --- /dev/null +++ b/src/CommandLine/FWSTK.php @@ -0,0 +1,14 @@ +setArg('--input ' . $this->getPDFInput()); $fwstk->setArg('--infos'); $fwstk->execute(); @@ -116,10 +117,30 @@ class Document */ public function processPage($page, $dest, $files, $sync = false) { - start_measure('Process page ' . $page . ' (doc)'); $dispatchFunction = $sync ? 'dispatchSync' : 'dispatch'; ProcessPage::$dispatchFunction($this, $page, $dest, $files); - stop_measure('Process page ' . $page . ' (doc)'); + } + + public function processLinks($dest) + { + PDFTools::extractLinks($this->getPDFInput(), $dest); + } + + public function processTexts($dest) + { + start_measure('Extract texts'); + PDFTools::extractTexts($this->getPDFInput(), $dest); + stop_measure('Extract texts'); + start_measure('Get highlights data'); + PDFTools::extractHighlightsData($this->getPDFInput(), $dest); + stop_measure('Get highlights data'); + start_measure('Compile search index'); + $this->compileSearchIndex($dest); + stop_measure('Compile search index'); + } + + public function compileSearchIndex($dest){ + } public function getResolutionRatio() diff --git a/src/PDF/PDFTools.php b/src/PDF/PDFTools.php index 0318704..f801339 100644 --- a/src/PDF/PDFTools.php +++ b/src/PDF/PDFTools.php @@ -7,6 +7,7 @@ use Cubist\Util\Files\Files; use DOMDocument; use DOMNode; use DOMXPath; +use Fluidbook\Tools\CommandLine\FWSTK; class PDFTools { @@ -55,10 +56,10 @@ class PDFTools $pdf2swf->setArg('p', $page); $pdf2swf->setArg('T', 10); $pdf2swf->setArg('Q', 120); - $pdf2swf->setArg('set reordertags', '0'); - $pdf2swf->setArg('fonts'); $pdf2swf->setArg('set poly2bitmap'); $pdf2swf->setArg('set storeallcharacters'); + $pdf2swf->setArg('set reordertags', '0'); + $pdf2swf->setArg('fonts'); $pdf2swf->setArg('set subpixels', $resolution / 72); $pdf2swf->setArg('set jpegquality', $quality); $pdf2swf->setArg('set disablelinks'); @@ -205,7 +206,7 @@ class PDFTools public static function makeShotPNM($in, $out, $page, $prefix = '', $resolution = 72, $quality = 90, $antialiasing = 4, $texts = true, $width = null, $height = null, $format = 'jpg') { - $tmp =Files::tempnam(); + $tmp = Files::tempnam(); $antialiasing = $antialiasing ? 'yes' : 'no'; $freetype = $texts ? 'yes' : 'no'; @@ -271,4 +272,48 @@ class PDFTools return $image; } + public static function extractLinks($pdf, $out) + { + $out .= 'links/'; + Files::mkdir($out); + + if (file_exists($out . '/p1.csv')) { + return; + } + $fwstk = new FWSTK(); + $fwstk->setArg('--input ' . $pdf); + $fwstk->setArg('--extractLinks ' . $out . 'p%d.csv'); + $fwstk->setArg('--threads 1'); + $fwstk->execute(); + } + + public static function extractTexts($pdf, $out, $ignoreSeparators = '') + { + $out .= 'texts/'; + Files::mkdir($out); + + $fwstk = new FWSTK(); + $fwstk->setArg('--input ' . $pdf); + $fwstk->setArg('--extractTexts ' . $out . '%s%d.txt'); + $fwstk->setArg('--extractTextsMethod fluidbook'); + $fwstk->setArg('--threads 1'); + if ($ignoreSeparators !== '') { + $fwstk->setArg('--ignoreSeparators ' . $ignoreSeparators); + } + $fwstk->execute(); + } + + public static function extractHighlightsData($pdf, $out) + { + $out .= 'texts/'; + Files::mkdir($out); + + $fwstk = new FWSTK(); + $fwstk->setArg('--input ' . $pdf); + $fwstk->setArg('--layout ' . $out . 'p%d.fby'); + $fwstk->setArg('--cmaps ' . $out); + $fwstk->setArg('--fonts' . $out . 'fonts/web/'); + $fwstk->execute(); + } + } diff --git a/src/SVG/SVGTools.php b/src/SVG/SVGTools.php index de7ff93..4c08a17 100644 --- a/src/SVG/SVGTools.php +++ b/src/SVG/SVGTools.php @@ -3,6 +3,7 @@ namespace Fluidbook\Tools\SVG; use Cubist\Util\Files\Files; +use Fluidbook\Tools\FluidbookTools; class SVGTools { @@ -16,8 +17,8 @@ class SVGTools if (null === $out) { $e = explode('.', $in); $ext = array_pop($e); - array_push($e, 'o'); - array_push($e, $ext); + $e[] = 'o'; + $e[] = $ext; $out = implode('.', $e); } @@ -26,9 +27,9 @@ class SVGTools public static function _optimizeSVG($in, $out) { - $cmd = "timeout -s 1 120 /usr/local/bin/svgcleaner --allow-bigger-file --paths-coordinates-precision 3 --copy-on-error --stdout $in"; - $svg = `$cmd`; - if ($svg == '') { + + $svg = shell_exec('timeout -s 1 120 ' . FluidbookTools::tools_path('svgcleaner/svgcleaner', true) . ' --allow-bigger-file --paths-coordinates-precision 3 --copy-on-error --stdout ' . $in); + if (!$svg) { $svg = file_get_contents($in); } $svg = self::_disablePreserveRatio($svg); @@ -39,7 +40,7 @@ class SVGTools protected static function _disablePreserveRatio($in) { $str = 'preserveAspectRatio="none"'; - if (strpos($in, $str) !== false) { + if (str_contains($in, $str)) { return $in; } return str_replace("'; -- 2.39.5