From 9465884f94b7a6303fb48b809761042da2271e3c Mon Sep 17 00:00:00 2001 From: Vincent Vanwaelscappel Date: Thu, 7 Apr 2022 22:03:51 +0200 Subject: [PATCH] wip #5220 @1 --- .../bin/Fluidbook/Tools/CommandLine/FWSTK.php | 14 - .../bin/Fluidbook/Tools/FluidbookTools.php | 44 --- .../bin/Fluidbook/Tools/Jobs/ProcessFile.php | 268 --------------- .../bin/Fluidbook/Tools/Jobs/ProcessPage.php | 146 -------- .../bin/Fluidbook/Tools/PDF/Document.php | 220 ------------ .../bin/Fluidbook/Tools/PDF/PDFTools.php | 321 ------------------ .../bin/Fluidbook/Tools/SVG/SVGTools.php | 128 ------- .../fwstk/bin/Fluidbook/Tools/Search/Page.php | 35 -- .../Fluidbook/Tools/Search/SearchIndex.php | 95 ------ src/PDFTools.php | 11 +- 10 files changed, 10 insertions(+), 1272 deletions(-) delete mode 100644 resources/tools/fwstk/bin/Fluidbook/Tools/CommandLine/FWSTK.php delete mode 100644 resources/tools/fwstk/bin/Fluidbook/Tools/FluidbookTools.php delete mode 100644 resources/tools/fwstk/bin/Fluidbook/Tools/Jobs/ProcessFile.php delete mode 100644 resources/tools/fwstk/bin/Fluidbook/Tools/Jobs/ProcessPage.php delete mode 100644 resources/tools/fwstk/bin/Fluidbook/Tools/PDF/Document.php delete mode 100644 resources/tools/fwstk/bin/Fluidbook/Tools/PDF/PDFTools.php delete mode 100644 resources/tools/fwstk/bin/Fluidbook/Tools/SVG/SVGTools.php delete mode 100644 resources/tools/fwstk/bin/Fluidbook/Tools/Search/Page.php delete mode 100644 resources/tools/fwstk/bin/Fluidbook/Tools/Search/SearchIndex.php diff --git a/resources/tools/fwstk/bin/Fluidbook/Tools/CommandLine/FWSTK.php b/resources/tools/fwstk/bin/Fluidbook/Tools/CommandLine/FWSTK.php deleted file mode 100644 index 8ab8189..0000000 --- a/resources/tools/fwstk/bin/Fluidbook/Tools/CommandLine/FWSTK.php +++ /dev/null @@ -1,14 +0,0 @@ -format = $format; - $this->resolution = $resolution; - $this->withGraphics = $withGraphics; - $this->withTexts = $withTexts; - $this->version = $version; - } - - /** - * @param string $format - */ - public function setFormat(string $format) - { - $this->format = $format; - } - - /** - * @return string - */ - public function getFormat(): string - { - if ($this->format === 'jpeg') { - return 'jpg'; - } - return $this->format; - } - - /** - * @param int|string $resolution - */ - public function setResolution(int|string $resolution) - { - $this->resolution = $resolution; - } - - /** - * @return int|string - */ - public function getResolution(): int|string - { - return $this->resolution; - } - - /** - * @return bool - */ - public function isWithGraphics(): bool - { - return $this->withGraphics; - } - - /** - * @param bool $withGraphics - */ - public function setWithGraphics(bool $withGraphics) - { - $this->withGraphics = $withGraphics; - } - - /** - * @return bool - */ - public function isWithTexts(): bool - { - return $this->withTexts; - } - - /** - * @param bool $withTexts - */ - public function setWithTexts(bool $withTexts) - { - $this->withTexts = $withTexts; - } - - /** - * @return int - */ - public function getPage(): int - { - return $this->job->getPage(); - } - - /** - * @return mixed - */ - public function getOut() - { - return $this->job->getOut(); - } - - - /** - * @return string - */ - public function getVersion(): string - { - if ($this->getFormat() === 'svg') { - return 'html'; - } else if ($this->getFormat() === 'swf') { - return ''; - } - - return $this->version; - } - - /** - * @param string $version - */ - public function setVersion(string $version) - { - $this->version = $version; - } - - /** - * @return ProcessPage - */ - public function getJob(): ProcessPage - { - return $this->job; - } - - /** - * @param ProcessPage $job - */ - public function setJob(ProcessPage $job) - { - $this->job = $job; - return $this; - } - - public function getPath($force = false) - { - $dir = rtrim($this->getOut() . $this->getVersion(), '/') . '/'; - $minsize = 1; - $res = ''; - if ($this->getFormat() === 'svg') { - $prefix = $this->isWithGraphics() ? 'f' : 't'; - $res = $dir . $prefix . 'o' . $this->getPage(); - if ($this->isWithGraphics()) { - $res .= '-' . $this->getResolution(); - } - $res .= '.svg'; - $reffile = $this->makeSVGFile(); - $minsize = 100; - } else if (in_array($this->getFormat(), ['png', 'jpg'])) { - $prefix = $this->isWithTexts() ? 't' : 'h'; - if ($this->getResolution() === 'thumb') { - $res = $dir . 'p' . $this->getPage() . '.' . $this->getFormat(); - } else { - $res = $dir . $prefix . $this->getPage() . '-' . $this->getResolution() . '.' . $this->getFormat(); - } - } else if ($this->getFormat() === 'swf') { - $res = $dir . 'p' . $this->getPage() . '.' . $this->getFormat(); - } - - $do = false; - if (!file_exists($res) || filesize($res) < $minsize) { - $do = true; - } else if (isset($reffile) && filemtime($res) < filemtime($reffile)) { - $do = true; - } - - if ($do || $force) { - $this->makeFile($res); - } - return $res; - } - - public function makeFile($file) - { - $lock = $file . '.lock'; - if (file_exists($lock) && filemtime($lock) > time() - 300) { - sleep(10); - return $this->getPath(); - } - Files::mkdir(dirname($lock)); - touch($lock); - if ($this->getFormat() === 'svg') { - if ($this->isWithGraphics()) { - $this->makeOptimizedSVGFile($file); - } else { - $this->makeTextSVGFile($file); - } - } else if (in_array($this->getFormat(), ['png', 'jpg'])) { - if ($this->getResolution() === 'thumb') { - PDFTools::makeMiniShot($this->getSplittedPDFPage(), $file, 1, $this->getFormat()); - } else { - $rr = $this->getVersion() === 'html' ? $this->getJob()->getResolutionRatio() : $this->getJob()->getMobileFirstRatio(); - PDFTools::makeShotPNM($this->getSplittedPDFPage(), $file, 1, '', $this->getResolution() * $rr, 85, 4, $this->isWithTexts(), null, null, $this->getFormat()); - } - } else if ($this->getFormat() === 'swf') { - PDFTools::makeSWF($this->getSplittedPDFPage(), $file, 1, $this->getResolution(), 80); - } - unlink($lock); - if (!file_exists($file)) { - throw new \Exception(sprintf('Failed to generate %s', $file)); - } - } - - public function makeSVGFile($force = false) - { - $svgFile = $this->getOut() . '/html/fp' . $this->getPage() . '.svg'; - if (!$force && file_exists($svgFile) && filesize($svgFile) > 0) { - return $svgFile; - } - PDFTools::makeBaseSVGFile($this->getSplittedPDFPage(), $svgFile, 1); - return $svgFile; - } - - public function makeTextSVGFile($out) - { - $in = $this->makeSVGFile(); - $inter = str_replace('/to', '/tp', $out); - PDFTools::makeTextSVGFile($in, $inter); - SVGTools::optimizeSVG($inter, $out); - } - - public function makeOptimizedSVGFile($out) - { - $in = $this->makeSVGFile(); - SVGTools::optimizeSVGImages($in, $out, $this->getResolution()); - SVGTools::optimizeSVG($out, $out); - } - - protected function getSplittedPDFPage() - { - $res = $this->getOut() . 'pdf/p' . $this->getPage() . '.pdf'; - if (!file_exists($res)) { - $this->getJob()->splitDoc(); - } - return $res; - } -} diff --git a/resources/tools/fwstk/bin/Fluidbook/Tools/Jobs/ProcessPage.php b/resources/tools/fwstk/bin/Fluidbook/Tools/Jobs/ProcessPage.php deleted file mode 100644 index 37d3627..0000000 --- a/resources/tools/fwstk/bin/Fluidbook/Tools/Jobs/ProcessPage.php +++ /dev/null @@ -1,146 +0,0 @@ -document = $document; - $this->page = $page; - $this->files = $files; - } - - /** - * @param int $page - */ - public function setPage(int $page): void - { - $this->page = $page; - } - - /** - * @return int - */ - public function getPage(): int - { - return $this->page; - } - - /** - * @return string - */ - public function getOut(): string - { - return $this->document->getConvertPath(); - } - - public function handle() - { - start_measure('Process page ' . $this->page); - foreach ($this->files as $file) { - $this->getFile($this->page, $file, false); - } - stop_measure('Process page ' . $this->page); - } - - - /** - * @param $page - * @param $file ProcessFile - * @param false $force - * @return string - */ - public function getFile($page, $file, $force = false) - { - $this->setPage($page); - $file->setJob($this); - return $file->getPath($force); - } - - public function getResolutionRatio() - { - return $this->document->getResolutionRatio(); - } - - public function getMobileFirstRatio() - { - return $this->document->getMobileFirstRatio(); - } - - public function splitDoc() - { - start_measure('Split PDF'); - - Files::mkdir($this->getOut() . '/pdf'); - $pdftk = new CommandLine('pdftk'); - $pdftk->setArg(null, $this->getPDFInput()); - $pdftk->setArg(null, 'burst'); - $pdftk->setArg(null, 'uncompress'); - $pdftk->setArg(null, 'output'); - $pdftk->setArg(null, $this->getOut() . 'pdf/p%d.pdf'); - $pdftk->execute(); - - - for ($i = 1; $i <= $this->getPagesNumber(); $i++) { - // Remove annotations : https://gist.github.com/stefanschmidt/5248592 - $file = sprintf($this->getOut() . 'pdf/p%d.pdf', $i); - $to = sprintf($this->getOut() . 'pdf/s%d.pdf', $i); - `LANG=C LC_CTYPE=C sed -n '/^\/Annots/!p' $file > $to`; - if (file_exists($to)) { - if (filesize($to) > 0) { - unlink($file); - rename($to, $file); - } else { - unlink($to); - } - } - } - stop_measure('Split PDF'); - } - - /** - * @return int - */ - public function getPagesNumber() - { - return $this->document->getPages(); - } - - public function getPDFInput() - { - return $this->document->getPDFInput(); - } -} diff --git a/resources/tools/fwstk/bin/Fluidbook/Tools/PDF/Document.php b/resources/tools/fwstk/bin/Fluidbook/Tools/PDF/Document.php deleted file mode 100644 index f9844b6..0000000 --- a/resources/tools/fwstk/bin/Fluidbook/Tools/PDF/Document.php +++ /dev/null @@ -1,220 +0,0 @@ -in = $in; - } - - /** - * @return string - */ - public function getHash(): string - { - if (null === $this->hash) { - $this->hash = hash_file('sha256', $this->in, false); - } - return $this->hash; - } - - public function getConvertPath(): string - { - return storage_path('fluidbook/convert/' . $this->getHash() . '/'); - } - - /** - * @throws \Exception - */ - public function getInfos($force = false) - { - if (!$force && $this->pages > 0) { - return; - } - - $fwstk = new FWSTK(); - $fwstk->setArg('--input ' . $this->getPDFInput()); - $fwstk->setArg('--infos'); - $fwstk->execute(); - - $infos = $fwstk->getOutput(); - - - if (preg_match('/Pages:\s*(\d+)/', $infos, $matches)) { - $this->pages = (int)$matches[1]; - if ($this->pages <= 0) { - throw new \Exception('Unable to get pages number'); - } - } - if (preg_match('/Page 1 size:\s*([0-9.]+)[ptsx\s]+([0-9.]+)/', $infos, $matches)) { - $this->width = (float)$matches[1]; - $this->height = (float)$matches[2]; - } - - if (preg_match('/NumberSections:\s*(.*)/', $infos, $matches)) { - $this->pageNumbers = explode(',', $matches[1]); - } else { - $this->pageNumbers = range(1, $this->pages); - } - - if ($this->pages <= 0) { - throw new \Exception(sprintf('Error running FWSTK %s', $infos)); - } - - $lines = Text::explodeNewLines($infos); - $bookmark_id = -1; - - foreach ($lines as $line) { - $line = trim($line); - [$k, $v] = explode(':', $line); - $k = trim($k); - $v = trim($v); - - if ($k === 'BookmarkTitle') { - $bookmark_id++; - $this->chapters[$bookmark_id] = new \stdClass(); - $this->chapters[$bookmark_id]->label = html_entity_decode($v); - } elseif ($k === 'BookmarkLevel') { - $this->chapters[$bookmark_id]->level = (int)$v - 1; - } elseif ($k === 'BookmarkPage') { - $this->chapters[$bookmark_id]->page = $v; - } - } - } - - /** - * @return int - */ - public function getPages() - { - $this->getInfos(); - return $this->pages; - } - - /** - * @return float - */ - public function getHeight() - { - $this->getInfos(); - return $this->height; - } - - /** - * @return float - */ - public function getWidth() - { - $this->getInfos(); - return $this->width; - } - - /** - * @param ProcessFile[] $files - * @param array $pages - * @param bool $sync - */ - public function processPages($files, $pages, $sync = false) - { - start_measure('Process pages (doc)'); - foreach ($pages as $i) { - $this->processPage($i, $files, $sync); - } - stop_measure('Process pages (doc)'); - } - - /** - * @param $page int - * @param $dest string - * @param $files ProcessFile[] - * @param false $sync - */ - public function processPage($page, $files, $sync = false) - { - $dispatchFunction = $sync ? 'dispatchSync' : 'dispatch'; - ProcessPage::$dispatchFunction($this, $page, $files); - } - - public function processLinks() - { - PDFTools::extractLinks($this->getPDFInput(), $this->getConvertPath()); - } - - public function processTexts() - { - start_measure('Extract texts'); - PDFTools::extractTexts($this->getPDFInput(), $this->getConvertPath()); - stop_measure('Extract texts'); - start_measure('Get highlights data'); - PDFTools::extractHighlightsData($this->getPDFInput(), $this->getConvertPath()); - stop_measure('Get highlights data'); - } - - public function getPageNumbers() - { - $this->getInfos(); - return $this->pageNumbers; - } - - public function getChapters() - { - $this->getInfos(); - return $this->chapters; - } - - public function getResolutionRatio() - { - $a4surface = 500990; // en pt² - $docSurface = $this->getWidth() * $this->getHeight(); // en pt² - // to have the same surface resulting in px, we have to sqrt the ratio between the two surfaces defined above - return sqrt($a4surface / $docSurface); - } - - public function getMobileFirstRatio() - { - return 620 / $this->getWidth(); - } - - public function getPDFInput() - { - return $this->in; - } -} diff --git a/resources/tools/fwstk/bin/Fluidbook/Tools/PDF/PDFTools.php b/resources/tools/fwstk/bin/Fluidbook/Tools/PDF/PDFTools.php deleted file mode 100644 index 13f126d..0000000 --- a/resources/tools/fwstk/bin/Fluidbook/Tools/PDF/PDFTools.php +++ /dev/null @@ -1,321 +0,0 @@ -setArg('p', $page); - $pdf2swf->setArg('T', 10); - $pdf2swf->setArg('Q', 30); - $pdf2swf->setArg('set reordertags', '0'); - $pdf2swf->setArg('fonts'); - $pdf2swf->setArg('set storeallcharacters'); - $pdf2swf->setArg('set subpixels', $resolution / 72); - $pdf2swf->setArg('set jpegquality', $quality); - $pdf2swf->setArg('set disablelinks'); - $pdf2swf->setArg('set dots'); - $pdf2swf->setArg(null, $in); - $pdf2swf->setArg('output', $out); - $pdf2swf->execute(); - $pdf2swf->debug(); - - if (file_exists($out)) { - return; - } - $pdf2swf = new CommandLine('pdf2swf', null, true); - $pdf2swf->setArg('p', $page); - $pdf2swf->setArg('T', 10); - $pdf2swf->setArg('Q', 120); - $pdf2swf->setArg('set poly2bitmap'); - $pdf2swf->setArg('set storeallcharacters'); - $pdf2swf->setArg('set reordertags', '0'); - $pdf2swf->setArg('fonts'); - $pdf2swf->setArg('set subpixels', $resolution / 72); - $pdf2swf->setArg('set jpegquality', $quality); - $pdf2swf->setArg('set disablelinks'); - $pdf2swf->setArg('set dots'); - $pdf2swf->setArg(null, $in); - $pdf2swf->setArg('output', $out); - $pdf2swf->execute(); - $pdf2swf->debug(); - if (file_exists($out)) { - return; - } - $pdf2swf = new CommandLine('pdf2swf', null, true); - $pdf2swf->setArg('p', $page); - $pdf2swf->setArg('T', 10); - $pdf2swf->setArg('set reordertags', '0'); - $pdf2swf->setArg('fonts'); - $pdf2swf->setArg('set bitmap'); - $pdf2swf->setArg('set storeallcharacters'); - $pdf2swf->setArg('set subpixels', $resolution / 72); - $pdf2swf->setArg('set jpegquality', $quality); - $pdf2swf->setArg('set disablelinks'); - $pdf2swf->setArg('set dots'); - $pdf2swf->setArg(null, $in); - $pdf2swf->setArg('output', $out); - $pdf2swf->execute(); - $pdf2swf->debug(); - } - - - public static function makeBaseSVGFile($in, $out, $page) - { - $pdftocairo = new CommandLine('pdftocairo'); - $pdftocairo->setArg('f', $page); - $pdftocairo->setArg('l', $page); - $pdftocairo->setArg('r', 300); - $pdftocairo->setArg(null, '-expand'); - $pdftocairo->setArg(null, '-svg'); - $pdftocairo->setArg(null, $in); - $pdftocairo->setArg(null, $out); - $pdftocairo->execute(); - } - - public static function makeTextSVGFile($in, $out) - { - $svg = new DOMDocument(); - $svg->preserveWhiteSpace = false; - $svg->load($in, LIBXML_PARSEHUGE); - - // Operations to delete - $xpath = new DOMXPath($svg); - $xpath->registerNamespace('svg', 'http://www.w3.org/2000/svg'); - $xpath->registerNamespace('xlink', 'http://www.w3.org/1999/xlink'); - $xpath->registerNamespace("php", "http://php.net/xpath"); - $toDelete = array( -// '//svg:defs/svg:clipPath', -// '//svg:defs/svg:image', -// '//svg:defs/svg:path', -// '//svg:defs/svg:pattern', - '//svg:defs/svg:g[starts-with(@id, "surface")]//svg:path', - '//svg:defs/svg:g[starts-with(@id, "surface")]//svg:rect', - '//svg:defs/svg:g[starts-with(@id, "surface")]//svg:use[starts-with(@xlink:href, "#image")]', - '/svg:svg/svg:g[@id="surface1"]//svg:path', - '/svg:svg/svg:g[@id="surface1"]//svg:rect', - '/svg:svg/svg:g[@id="surface1"]//svg:filter', - '/svg:svg/svg:g[@id="surface1"]//svg:use[starts-with(@xlink:href, "#image")]', - '//svg:svg/svg:g[@id="surface1"]//svg:use[starts-with(@xlink:href, "#image")]', - //'//svg:image', - ); - - //global $svglog; - //$svglog = array('XPATH : ' . print_r($xpath, true)); - foreach ($toDelete as $q) { - $list = $xpath->query($q); - // $svglog[] = "Evaluate xpath query " . $q; - // $svglog[] = 'Give ' . $list->length . ' results'; - // $svglog[] = 'Deleting Nodes in ' . print_r($list, true); - if (count($list)) { - foreach ($list as $node) { - /* @var $node DOMNode */ - $parent = $node->parentNode; - $parent->removeChild($node); - } - } - } - file_put_contents($out, $svg->saveXML()); - } - - public static function makeShot($in, $out, $page, $prefix = '', $resolution = 72, $quality = 90, $antialiasing = 4, $method = 'PNM', $width = null, $height = null, $format = 'jpg') - { - $error = false; - if ($method === 'GS') { - self::makeShotGS($in, $out, $page, $prefix, $resolution, $quality, $antialiasing, $width, $height, $format); - } elseif ($method === 'PNM') { - self::makeShotPNM($in, $out, $page, $prefix, $resolution, $quality, $antialiasing, true, $width, $height, $format); - } - // Test the result by checking all files - if (!file_exists($out)) { - $error = true; - } - // If error, we try to make thumbs with other method - if ($error) { - if ($method === 'GS') { - self::makeShotPNM($in, $out, $page, $prefix, $resolution, $quality, $antialiasing, true, $width, $height, $format); - } elseif ($method === 'PNM') { - self::makeShotGS($in, $out, $page, $prefix, $resolution, $quality, $antialiasing, $width, $height, $format); - } - } - } - - protected static function makeShotGS($in, $out, $page, $prefix = '', $resolution = 72, $quality = 90, $antialiasing = 4, $width = null, $height = null, $format = 'jpg') - { - // Fabrication des thumbnails avec ghostscript - $gs = new CommandLine('gs', null, true); - $gs->setArg('-dBATCH'); - $gs->setArg('-dNOPAUSE'); - $gs->setArg('-dNOPROMPT'); - // Antialias - $gs->setArg('-dDOINTERPOLATE'); - $gs->setArg('-dTextAlphaBits=' . $antialiasing); - $gs->setArg('-dGraphicsAlphaBits=' . $antialiasing); - // Device - $device = $format === 'jpg' ? 'jpeg' : 'png16m'; - $gs->setArg('-sDEVICE=' . $device); - // Dispotion & colors - // $gs->setArg('-dUseCIEColor'); - $gs->setArg('-dAutoRotatePages=/None'); - $gs->setArg('-dUseCropBox'); - // Resolution & Quality - $gs->setArg('-r' . round($resolution)); - if ($format === 'jpg') { - $gs->setArg('-dJPEGQ=' . $quality); - } - // Performances - $gs->setArg('-dNumRenderingThreads=4'); - // Page range - $gs->setArg('-dFirstPage=' . $page); - $gs->setArg('-dLastPage=' . $page); - // Files - $gs->setArg('-sOutputFile=' . $out); - - $gs->setArg(null, $in); - $gs->execute(); - } - - public static function makeShotPNM($in, $out, $page, $prefix = '', $resolution = 72, $quality = 90, $antialiasing = 4, $texts = true, $width = null, $height = null, $format = 'jpg') - { - $tmp = Files::tempnam(); - - $antialiasing = $antialiasing ? 'yes' : 'no'; - $freetype = $texts ? 'yes' : 'no'; - // Exporte les fichiers - $pdftoppm = new CommandLine('pdftoppm', null, true); - $pdftoppm->setArg('f', $page); - $pdftoppm->setArg('l', $page); - $pdftoppm->setArg('-cropbox'); - $pdftoppm->setArg('-freetype ' . $freetype); - $pdftoppm->setArg('-singlefile'); - $pdftoppm->setArg('-aa ' . $antialiasing); - $pdftoppm->setArg('-aaVector ' . $antialiasing); - if (null !== $resolution) { - $pdftoppm->setArg('r', $resolution); - } - if (null !== $width) { - $pdftoppm->setArg('-scale-to-x ' . $width); - } - if (null !== $height) { - $pdftoppm->setArg('-scale-to-y ' . $height); - } - $pdftoppm->setArg(null, $in); - $pdftoppm->setArg(null, $tmp); - $pdftoppm->execute(); - $tmp .= '.ppm'; - - - if (file_exists($tmp)) { - if ($format === 'jpg') { - $cjpeg = new CommandLine('cjpeg', null, true); - $cjpeg->setArg('-quality ' . ($quality + 6)); - $cjpeg->setArg('-outfile ' . $out); - $cjpeg->setArg(null, $tmp); - $cjpeg->execute(); - } else if ($format === 'png') { - $pnmtopng = new CommandLine('pnmtopng', $out, false); - $pnmtopng->setArg('-background white'); - $pnmtopng->setArg(null, $tmp); - $pnmtopng->execute(); - } - unlink($tmp); - } else { - $pdftoppm->debug(); - } - } - - public static function getThumbFromPDF($pdf, $page, $format = 'jpg') - { - if (!file_exists($pdf)) { - return false; - } - $dir = WS_CACHE . '/thumbs/' . sha1($pdf) . '/'; - if (!file_exists($dir)) { - mkdir($dir, 0777, true); - } - $image = $dir . '/p' . $page . '.' . $format; - $mtime = filemtime($image); - - if (!file_exists($image) || $mtime < filemtime(__FILE__) || $mtime < filemtime($pdf)) { - self::makeMiniShot($pdf, $image, $page, $format); - } - - return $image; - } - - public static function extractLinks($pdf, $out) - { - $out .= 'links/'; - Files::mkdir($out); - - if (file_exists($out . '/p1.csv')) { - return; - } - $fwstk = new FWSTK(); - $fwstk->setArg('--input ' . $pdf); - $fwstk->setArg('--extractLinks ' . $out . 'p%d.csv'); - $fwstk->setArg('--threads 1'); - $fwstk->execute(); - } - - public static function extractTexts($pdf, $out, $ignoreSeparators = '') - { - $out .= 'texts/'; - Files::mkdir($out); - - $fwstk = new FWSTK(); - $fwstk->setArg('--input ' . $pdf); - $fwstk->setArg('--extractTexts ' . $out . '%s%d.txt'); - $fwstk->setArg('--extractTextsMethod fluidbook'); - $fwstk->setArg('--threads 1'); - if ($ignoreSeparators !== '') { - $fwstk->setArg('--ignoreSeparators ' . $ignoreSeparators); - } - $fwstk->execute(); - } - - public static function extractHighlightsData($pdf, $out) - { - $out .= 'texts/'; - Files::mkdir($out); - - $fwstk = new FWSTK(); - $fwstk->setArg('--input ' . $pdf); - $fwstk->setArg('--layout ' . $out . 'p%d.fby'); - $fwstk->setArg('--cmaps ' . $out); - $fwstk->setArg('--fonts' . $out . 'fonts/web/'); - $fwstk->execute(); - $fwstk->dd(); - - } - -} diff --git a/resources/tools/fwstk/bin/Fluidbook/Tools/SVG/SVGTools.php b/resources/tools/fwstk/bin/Fluidbook/Tools/SVG/SVGTools.php deleted file mode 100644 index 4c08a17..0000000 --- a/resources/tools/fwstk/bin/Fluidbook/Tools/SVG/SVGTools.php +++ /dev/null @@ -1,128 +0,0 @@ -/', '', $svg); - $svg = self::_disablePreserveRatio($svg); - - $osvg = preg_replace_callback('|\]*)\>|', function ($matches) use ($resolution) { - return self::optimizeRaster($matches, $resolution); - }, $svg); - $osvg = preg_replace('/^]*>/', '$0', $osvg); - file_put_contents($out, $osvg); - } - - protected static function _svg($c, $p) - { - self::$_i = 0; - self::$_e = 0; - self::$_u = 0; - - $c = str_replace(array('id="', 'url(#', 'xlink:href="#'), array('id="p' . $p . '-', 'url(#p' . $p . '-', 'xlink:href="#p' . $p . '-'), $c); - $c = preg_replace_callback('/\]*)\>/m', function ($matches) use ($p) { - self::$_i++; - return ''; - }, $c); - $c = str_replace('', '', $c); - - $c = preg_replace_callback('/ $key) { - $attrs[$key] = $m[2][$i]; - } - $e = explode(',', $attrs['xlink:href'], 2); - $image = imagecreatefromstring(base64_decode($e[1])); - $iw = imagesx($image); - $ih = imagesy($image); - $scale = 1; - if (isset($attrs['transform']) && strpos($attrs['transform'], 'matrix(') === 0) { - preg_match_all('/([\d\-\.]+)/', $attrs['transform'], $ma); - $values = $ma[0]; - $scale = max($values[0], $values[1]); - } - - $resolutionScale = ($iw / $attrs['width']) * $scale * ($resolution / 72); - - $dw = round($resolutionScale * $iw); - $dh = round($resolutionScale * $ih); - - $im = imagecreatetruecolor($dw, $dh); - imagecopyresampled($im, $image, 0, 0, 0, 0, $dw, $dh, $iw, $ih); - imagedestroy($image); - - $tmp = Files::tempnam(); - imagejpeg($im, $tmp, 85); - imagedestroy($im); - - $attrs['xlink:href'] = 'data:image/jpeg;base64,' . base64_encode(file_get_contents($tmp)); - unlink($tmp); - - $res = ' $v) { - $res .= $k . '="' . $v . '" '; - } - $res .= '/>'; - return $res; - } -} diff --git a/resources/tools/fwstk/bin/Fluidbook/Tools/Search/Page.php b/resources/tools/fwstk/bin/Fluidbook/Tools/Search/Page.php deleted file mode 100644 index 535d9f0..0000000 --- a/resources/tools/fwstk/bin/Fluidbook/Tools/Search/Page.php +++ /dev/null @@ -1,35 +0,0 @@ -_documentPage = $documentPage; - $this->_path = $path; - } - - /** - * @return array - * @throws JsonException - */ - public function getHighlights(): array - { - return json_decode(file_get_contents($this->_path . '/p' . $this->_documentPage . '.fby'), false, 512, JSON_THROW_ON_ERROR); - } - - /** - * @return string - */ - public function getIndex(): string - { - return trim(file_get_contents($this->_path . '/fi' . $this->_documentPage . '.txt')); - } -} diff --git a/resources/tools/fwstk/bin/Fluidbook/Tools/Search/SearchIndex.php b/resources/tools/fwstk/bin/Fluidbook/Tools/Search/SearchIndex.php deleted file mode 100644 index b900662..0000000 --- a/resources/tools/fwstk/bin/Fluidbook/Tools/Search/SearchIndex.php +++ /dev/null @@ -1,95 +0,0 @@ -_pages[$page] = new Page($documentPage, $path); - } - - /** - * @return array - */ - public function compileIndex() - { - $index = []; - foreach ($this->_pages as $pageNumber => $page) { - $twords = explode("\n", $page->getIndex()); - - foreach ($twords as $woadata) { - $w1 = explode(',', trim($woadata)); - if (count($w1) <= 1) { - continue; - } - list($woa, $worddata) = $w1; - $e = explode("\t", $worddata, 2); - if (count($e) < 2) { - continue; - } - list($total, $wordslist) = $e; - - if ($woa === '') { - continue; - } - - if (!isset($index[$woa])) { - $index[$woa] = array('t' => 0, 'w' => array()); - } - $index[$woa]['t'] += (int)$total; - - $words = explode("\t", $wordslist); - - foreach ($words as $word) { - list($wordwa, $count) = explode('$', $word, 2); - if (!isset($index[$woa]['w'][$wordwa])) { - $index[$woa]['w'][$wordwa] = array('t' => 0, 'p' => [$pageNumber => 0]); - } - if (!isset($index[$woa]['w'][$wordwa]['p'][$pageNumber])) { - $index[$woa]['w'][$wordwa]['p'][$pageNumber] = 0; - } - $index[$woa]['w'][$wordwa]['t'] += (int)$count; - $index[$woa]['w'][$wordwa]['p'][$pageNumber] += (int)$count; - } - } - } - return $index; - } - - - /** - * @return stdClass - * @throws JsonException - */ - public function compileHighlights() - { - $res = new stdClass(); - foreach ($this->_pages as $pageNumber => $page) { - $words = $page->getHighlights(); - foreach ($words as $i => $w) { - $word = $w->word; - $word = trim($word, "\0"); - if ($word === '') { - continue; - } - unset($w->word); - $w->page = $pageNumber; - $w->idx = $i; - if (!isset($res->{$word})) { - $res->{$word} = array(); - } - $res->{$word}[] = $w; - } - } - return $res; - } -} diff --git a/src/PDFTools.php b/src/PDFTools.php index 4c88e3a..9bf4bb2 100644 --- a/src/PDFTools.php +++ b/src/PDFTools.php @@ -183,7 +183,16 @@ class PDFTools } } } - file_put_contents($out, $svg->saveXML()); + $res = $svg->saveXML(); + $res = preg_replace('//', '', $res); + while (true) { + $res = preg_replace('/<\/g>/', '', $res, -1, $count); + if (!$count) { + break; + } + } + + file_put_contents($out, $res); } public static function makeShot($in, $out, $page, $prefix = '', $resolution = 72, $quality = 90, $antialiasing = 4, $method = 'PNM', $width = null, $height = null, $format = 'jpg') -- 2.39.5