use DOMXPath;
use Cubist\PDF\CommandLine\FWSTK;
-class PDFTools
-{
- /**
- * @param $path string
- * @return string
- */
-
- public static function resource_path($path)
- {
- return __DIR__ . '/../resources/' . self::_cleanPath($path);
- }
-
- /**
- * @param $path string
- * @return string
- */
- public static function tools_path($path, $chmod = false)
- {
- $res = self::resource_path('tools/' . self::_cleanPath($path));
- if ($chmod) {
- self::chmodExec($res);
- }
- return $res;
- }
-
- public static function chmodExec($path)
- {
- if (is_file($path)) {
- @chmod($path, 0755);
- }
- }
-
- protected static function parseInfos($data)
- {
- $res = [];
-
- // This function get general infos (pages sizes, boxes, number sections and
- // bookmarks
- // Init arrays
- $res['infos'] = [];
- $res['infos']['size'] = [0, 0];
- $res['bookmarks'] = [];
- $res['numberSections'] = '';
- $bookmark_id = 0;
-
- $res['size'] = array(0, 0);
- $lines = explode("\n", $data);
- foreach ($lines as $line) {
- $line = trim(Text::condenseWhite($line));
- $e = explode(':', $line, 2);
- $k = trim($e[0]);
- if (count($e) < 2) {
- continue;
- }
- $v = trim($e[1]);
- if ($k == 'Pages' || $k == 'NumberOfPages') {
- $res['pages'] = $res['infos']['pages'] = $v;
- $res['infos']['page'] = [];
- for ($i = 1; $i <= $res['pages']; $i++) {
- $res['infos']['page'][$i] = [];
- }
- } elseif (preg_match('|Page\s+([0-9]+)\s+(.*)Box:\s+([0-9.]*)\s+([0-9.]*)\s+([0-9.]*)\s+([0-9.]*)|iu', $line, $m)) {
- $res['infos']['page'][$m[1]][strtolower($m[2])] = [$m[3], $m[4], $m[5], $m[6]];
- } elseif (preg_match('|Page\s+([0-9]+)\s+size:\s+([0-9.]*)[pts[:space:]]+x\s+([0-9.]*)\s+pts|iu', $line, $m)) {
- $res['infos']['page'][$m[1]]['size'] = array($m[2], $m[3]);
- if ($m[1] == 1) {
- $res['infos']['size'][0] = $m[2];
- $res['infos']['size'][1] = $m[3];
- }
- } elseif ($k == 'BookmarkTitle') {
- $res['bookmarks'][$bookmark_id] = array('titre' => str_replace(' ', '', trim($v)));
- } elseif ($k == 'BookmarkLevel') {
- $res['bookmarks'][$bookmark_id]['level'] = $v;
- } elseif ($k == 'BookmarkPage') {
- $res['bookmarks'][$bookmark_id]['page'] = $v;
- $bookmark_id++;
- } elseif ($k == 'NumberSections') {
- $res['numberSections'] = $v;
- $res['infos']['pagenumbers'] = $v;
- }
- }
- return $res;
- }
-
- public static function infos($pdf)
- {
- $fwstk = new FWSTK();
- $fwstk->setArg('--input ' . $pdf);
- $fwstk->setArg('--infos');
- $fwstk->execute();
- $out = $fwstk->getOutput();
-
- $pdfinfo = new CommandLine('pdfinfo');
- $pdfinfo->setArg('-box');
- $pdfinfo->setArg('f', 1);
- $pdfinfo->setArg('l', 100000);
- $pdfinfo->setArg(null, $pdf);
- $pdfinfo->execute();
- $out .= "\n";
- $out .= $pdfinfo->getOutput();
-
- return self::parseInfos($out);
- }
-
- /**
- * @param $path string
- * @return string
- */
- protected static function _cleanPath($path)
- {
- return trim($path, '/');
- }
-
- public static function makeMiniShot($in, $out, $page, $format = 'jpg')
- {
- self::makeShotFixedWidth($in, $out, $page, 'p', 500, 65, 4, 'PNM', $format);
- }
-
- public static function makeShotFixedWidth($in, $out, $page, $prefix = '', $w = 100, $quality = 90, $antialiasing = 4, $method = 'PNM', $format = 'jpg')
- {
- // Make thumbs of $w width
- self::makeShot($in, $out, $page, $prefix, null, $quality, $antialiasing, $method, $w, -1, $format);
- }
-
- public static function makeShotFixedHeight($in, $out, $page, $prefix = '', $h = '', $quality = 90, $antialiasing = 4, $method = 'PNM', $format = 'jpg')
- {
- // Make thumbs of $h height
- self::makeShot($in, $out, $page, $prefix, null, $quality, $antialiasing, $method, -1, $h, $format);
- }
-
- public static function makeSWF($in, $out, $page, $resolution = 100, $quality = 90)
- {
- if (file_exists($out)) {
- unlink($out);
- }
- $pdf2swf = new CommandLine('pdf2swf', null, true);
- $pdf2swf->setArg('p', $page);
- $pdf2swf->setArg('T', 10);
- $pdf2swf->setArg('Q', 30);
- $pdf2swf->setArg('set reordertags', '0');
- $pdf2swf->setArg('fonts');
- $pdf2swf->setArg('set storeallcharacters');
- $pdf2swf->setArg('set subpixels', $resolution / 72);
- $pdf2swf->setArg('set jpegquality', $quality);
- $pdf2swf->setArg('set disablelinks');
- $pdf2swf->setArg('set dots');
- $pdf2swf->setArg(null, $in);
- $pdf2swf->setArg('output', $out);
- $pdf2swf->execute();
- $pdf2swf->debug();
-
- if (file_exists($out)) {
- return;
- }
- $pdf2swf = new CommandLine('pdf2swf', null, true);
- $pdf2swf->setArg('p', $page);
- $pdf2swf->setArg('T', 10);
- $pdf2swf->setArg('Q', 120);
- $pdf2swf->setArg('set poly2bitmap');
- $pdf2swf->setArg('set storeallcharacters');
- $pdf2swf->setArg('set reordertags', '0');
- $pdf2swf->setArg('fonts');
- $pdf2swf->setArg('set subpixels', $resolution / 72);
- $pdf2swf->setArg('set jpegquality', $quality);
- $pdf2swf->setArg('set disablelinks');
- $pdf2swf->setArg('set dots');
- $pdf2swf->setArg(null, $in);
- $pdf2swf->setArg('output', $out);
- $pdf2swf->execute();
- $pdf2swf->debug();
- if (file_exists($out)) {
- return;
- }
- $pdf2swf = new CommandLine('pdf2swf', null, true);
- $pdf2swf->setArg('p', $page);
- $pdf2swf->setArg('T', 10);
- $pdf2swf->setArg('set reordertags', '0');
- $pdf2swf->setArg('fonts');
- $pdf2swf->setArg('set bitmap');
- $pdf2swf->setArg('set storeallcharacters');
- $pdf2swf->setArg('set subpixels', $resolution / 72);
- $pdf2swf->setArg('set jpegquality', $quality);
- $pdf2swf->setArg('set disablelinks');
- $pdf2swf->setArg('set dots');
- $pdf2swf->setArg(null, $in);
- $pdf2swf->setArg('output', $out);
- $pdf2swf->execute();
- $pdf2swf->debug();
- }
-
-
- public static function makeBaseSVGFile($in, $out, $page)
- {
- $pdftocairo = new CommandLine('pdftocairo');
- $pdftocairo->setArg('f', $page);
- $pdftocairo->setArg('l', $page);
- $pdftocairo->setArg('r', 300);
- $pdftocairo->setArg(null, '-expand');
- $pdftocairo->setArg(null, '-svg');
- $pdftocairo->setArg(null, $in);
- $pdftocairo->setArg(null, $out);
- $pdftocairo->execute();
- }
-
- public static function makeTextSVGFile($in, $out)
- {
- $svg = new DOMDocument();
- $svg->preserveWhiteSpace = false;
- $svg->load($in, LIBXML_PARSEHUGE);
-
- // Operations to delete
- $xpath = new DOMXPath($svg);
- $xpath->registerNamespace('svg', 'http://www.w3.org/2000/svg');
- $xpath->registerNamespace('xlink', 'http://www.w3.org/1999/xlink');
- $xpath->registerNamespace("php", "http://php.net/xpath");
- $toDelete = [
- '//svg:defs/svg:g[starts-with(@id, "surface")]//svg:path',
- '//svg:defs/svg:g[starts-with(@id, "surface")]//svg:rect',
- '//svg:defs/svg:g[starts-with(@id, "surface")]//svg:use[starts-with(@xlink:href, "#image")]',
- '/svg:svg/svg:g[@id="surface1"]//svg:path',
- '/svg:svg/svg:g[@id="surface1"]//svg:rect',
- '/svg:svg/svg:g[@id="surface1"]//svg:filter',
- '/svg:svg/svg:g[@id="surface1"]//svg:use[starts-with(@xlink:href, "#image")]',
- '//svg:svg/svg:g[@id="surface1"]//svg:use[starts-with(@xlink:href, "#image")]',
- ];
- $toDeleteIfOrphan = [
- '//svg:image',
- ];
-
- foreach ($toDelete as $q) {
- $list = $xpath->query($q);
- if (count($list)) {
- foreach ($list as $node) {
- /* @var $node DOMNode */
- $parent = $node->parentNode;
- $parent->removeChild($node);
- }
- }
- }
-
- foreach ($toDeleteIfOrphan as $q) {
- $list = $xpath->query($q);
- if (count($list)) {
- foreach ($list as $node) {
- /* @var $node DOMElement */
- $id = $node->getAttribute('id');
- if ($xpath->query('//*[@id="' . $id . '"]')->count() > 0) {
- $parent = $node->parentNode;
- $parent->removeChild($node);
- }
- }
- }
- }
- $res = $svg->saveXML();
- $res = preg_replace('/<g clip-path="url\(#clip\d+\)" clip-rule="nonzero"\/>/', '', $res);
- while (true) {
- $res = preg_replace('/<g clip-path="url\(#clip\d+\)" clip-rule="nonzero"><\/g>/', '', $res, -1, $count);
- if (!$count) {
- break;
- }
- }
-
- file_put_contents($out, $res);
- }
-
- public static function makeShot($in, $out, $page, $prefix = '', $resolution = 72, $quality = 90, $antialiasing = 4, $method = 'PNM', $width = null, $height = null, $format = 'jpg')
- {
- $error = false;
- if ($method === 'GS') {
- self::makeShotGS($in, $out, $page, $prefix, $resolution, $quality, $antialiasing, $width, $height, $format);
- } elseif ($method === 'PNM') {
- self::makeShotPNM($in, $out, $page, $prefix, $resolution, $quality, $antialiasing, true, $width, $height, $format);
- }
- // Test the result by checking all files
- if (!file_exists($out)) {
- $error = true;
- }
- // If error, we try to make thumbs with other method
- if ($error) {
- if ($method === 'GS') {
- self::makeShotPNM($in, $out, $page, $prefix, $resolution, $quality, $antialiasing, true, $width, $height, $format);
- } elseif ($method === 'PNM') {
- self::makeShotGS($in, $out, $page, $prefix, $resolution, $quality, $antialiasing, $width, $height, $format);
- }
- }
- }
-
- protected static function makeShotGS($in, $out, $page, $prefix = '', $resolution = 72, $quality = 90, $antialiasing = 4, $width = null, $height = null, $format = 'jpg')
- {
- // Fabrication des thumbnails avec ghostscript
- $gs = new CommandLine('gs', null, true);
- $gs->setArg('-dBATCH');
- $gs->setArg('-dNOPAUSE');
- $gs->setArg('-dNOPROMPT');
- // Antialias
- $gs->setArg('-dDOINTERPOLATE');
- $gs->setArg('-dTextAlphaBits=' . $antialiasing);
- $gs->setArg('-dGraphicsAlphaBits=' . $antialiasing);
- // Device
- $device = $format === 'jpg' ? 'jpeg' : 'png16m';
- $gs->setArg('-sDEVICE=' . $device);
- // Dispotion & colors
- // $gs->setArg('-dUseCIEColor');
- $gs->setArg('-dAutoRotatePages=/None');
- $gs->setArg('-dUseCropBox');
- // Resolution & Quality
- $gs->setArg('-r' . round($resolution));
- if ($format === 'jpg') {
- $gs->setArg('-dJPEGQ=' . $quality);
- }
- // Performances
- $gs->setArg('-dNumRenderingThreads=4');
- // Page range
- $gs->setArg('-dFirstPage=' . $page);
- $gs->setArg('-dLastPage=' . $page);
- // Files
- $gs->setArg('-sOutputFile=' . $out);
-
- $gs->setArg(null, $in);
- $gs->execute();
- }
-
- public static function makeShotPNM($in, $out, $page, $prefix = '', $resolution = 72, $quality = 90, $antialiasing = 4, $texts = true, $width = null, $height = null, $format = 'jpg')
- {
- $tmp = Files::tempnam();
-
- $antialiasing = $antialiasing ? 'yes' : 'no';
- $freetype = $texts ? 'yes' : 'no';
- // Exporte les fichiers
- $pdftoppm = new CommandLine('pdftoppm', null, true);
- $pdftoppm->setArg('f', $page);
- $pdftoppm->setArg('l', $page);
- $pdftoppm->setArg('-cropbox');
- $pdftoppm->setArg('-freetype ' . $freetype);
- $pdftoppm->setArg('-singlefile');
- $pdftoppm->setArg('-aa ' . $antialiasing);
- $pdftoppm->setArg('-aaVector ' . $antialiasing);
- if (null !== $resolution) {
- $pdftoppm->setArg('r', $resolution);
- }
- if (null !== $width) {
- $pdftoppm->setArg('-scale-to-x ' . $width);
- }
- if (null !== $height) {
- $pdftoppm->setArg('-scale-to-y ' . $height);
- }
- $pdftoppm->setArg(null, $in);
- $pdftoppm->setArg(null, $tmp);
- $pdftoppm->execute();
- $tmp .= '.ppm';
-
-
- if (file_exists($tmp)) {
- if ($format === 'jpg') {
- $cjpeg = new CommandLine('cjpeg', null, true);
- $cjpeg->setArg('-quality ' . ($quality + 6));
- $cjpeg->setArg('-outfile ' . $out);
- $cjpeg->setArg(null, $tmp);
- $cjpeg->execute();
- } else if ($format === 'png') {
- $pnmtopng = new CommandLine('pnmtopng', $out, false);
- $pnmtopng->setArg('-background white');
- $pnmtopng->setArg(null, $tmp);
- $pnmtopng->execute();
- }
- unlink($tmp);
- } else {
- $pdftoppm->debug();
- }
- }
-
- public static function getThumbFromPDF($pdf, $page, $format = 'jpg')
- {
- if (!file_exists($pdf)) {
- return false;
- }
- $dir = WS_CACHE . '/thumbs/' . sha1($pdf) . '/';
- if (!file_exists($dir)) {
- mkdir($dir, 0777, true);
- }
- $image = $dir . '/p' . $page . '.' . $format;
- $mtime = filemtime($image);
-
- if (!file_exists($image) || $mtime < filemtime(__FILE__) || $mtime < filemtime($pdf)) {
- self::makeMiniShot($pdf, $image, $page, $format);
- }
-
- return $image;
- }
-
- public static function extractLinks($pdf, $out)
- {
- $out .= 'links/';
- Files::mkdir($out);
-
- if (file_exists($out . '/p1.csv')) {
- return;
- }
- $fwstk = new FWSTK();
- $fwstk->setArg('--input ' . $pdf);
- $fwstk->setArg('--extractLinks ' . $out . 'p%d.csv');
- $fwstk->setArg('--threads 1');
- $fwstk->execute();
- }
-
- public static function extractTexts($pdf, $out, $textExtraction = 'fluidbook', $ignoreSeparators = '')
- {
- $out .= 'texts';
- if ($ignoreSeparators) {
- $out .= '/sep_' . md5($ignoreSeparators);
- }
- $out = Files::mkdir($out);
-
- $fwstk = new FWSTK();
- $fwstk->setArg('--input ' . $pdf);
- $fwstk->setArg('--extractTexts ' . $out . '%s%d.txt');
- $fwstk->setArg('--extractTextsMethod ' . $textExtraction);
- $fwstk->setArg('--threads 1');
- if ($ignoreSeparators) {
- $fwstk->setArg('--ignoreSeparators ' . $ignoreSeparators);
- }
- $fwstk->execute();
- }
-
-
- public static function extractHighlightsData($pdf, $out)
- {
- $out .= 'texts/';
- Files::mkdir($out);
-
- $fwstk = new FWSTK();
- $fwstk->setArg('--input ' . $pdf);
- $fwstk->setArg('--layout ' . $out . 'p%d.fby');
- $fwstk->setArg('--cmaps ' . $out);
- $fwstk->setArg('--fonts' . $out . 'fonts/web/');
- $fwstk->execute();
- }
-
- public static function fixPDF($in, $out)
- {
- if (file_exists($out)) {
- unlink($out);
- }
-
- $pdftk = new CommandLine('pdftk');
- $pdftk->setArg(null, $in);
- $pdftk->setArg(null, 'output');
- $pdftk->setArg(null, $out);
- $pdftk->execute();
-
- if (!file_exists($out)) {
- $pdftocairo = new CommandLine('pdftocairo');
- $pdftocairo->setPath(CONVERTER_PATH);
- $pdftocairo->setArg(null, '-pdf');
- $pdftocairo->setArg(null, $in);
- $pdftocairo->setArg(null, $out);
- $pdftocairo->execute();
- }
- }
-
- public static function split($pdf, $out)
- {
-
- $lock = $pdf . '.split.lock';
-
- $returnAfterSleep = false;
-
- usleep(rand(100000, 2000000));
-
- while (file_exists($lock)) {
- if (filemtime($lock) < time() - 300) {
- unlink($lock);
- }
- $returnAfterSleep = true;
- sleep(5);
- }
- if ($returnAfterSleep) {
- return;
- }
-
- touch($lock);
-
- try {
- Files::mkdir($out);
- $pdftk = new CommandLine('pdftk');
- $pdftk->setArg(null, $pdf);
- $pdftk->setArg(null, 'burst');
- $pdftk->setArg(null, 'uncompress');
- $pdftk->setArg(null, 'output');
- $pdftk->setArg(null, $out . '/p%d.pdf');
- $pdftk->execute();
-
-
- for ($i = 1; true; $i++) {
- // Remove annotations : https://gist.github.com/stefanschmidt/5248592
- $file = sprintf($out . '/p%d.pdf', $i);
- if (!file_exists($file)) {
- break;
- }
- $to = sprintf($out . '/s%d.pdf', $i);
- `LANG=C LC_CTYPE=C sed -n '/^\/Annots/!p' $file > $to`;
- if (file_exists($to)) {
- if (filesize($to) > 0) {
- unlink($file);
- rename($to, $file);
- } else {
- unlink($to);
- }
- }
- }
- } catch (\Exception $e) {
-
- }
- unlink($lock);
- }
-
- public static function compressPDF($source, $dest, $resolution = 72)
- {
- $gs = new CommandLine('gs');
- $gs->setArg('-dBATCH');
- $gs->setArg('-dNOPAUSE');
- $gs->setArg('-dNOPROMPT');
- $gs->setArg('-sOutputFile=' . $dest);
- $gs->setArg('-sDEVICE=pdfwrite');
- $gs->setArg('-dPDFSETTINGS=/ebook');
- $gs->setArg('-dColorImageResolution=' . $resolution);
- $gs->setArg('-dAutoRotatePages=/None');
- $gs->setArg('-dColorConversionStrategy=/LeaveColorUnchanged');
- $gs->setArg(null, $source);
- $gs->execute();
- }
+class PDFTools {
+ /**
+ * @param $path string
+ * @return string
+ */
+
+ public static function resource_path($path) {
+ return __DIR__ . '/../resources/' . self::_cleanPath($path);
+ }
+
+ /**
+ * @param $path string
+ * @return string
+ */
+ public static function tools_path($path, $chmod = false) {
+ $res = self::resource_path('tools/' . self::_cleanPath($path));
+ if ($chmod) {
+ self::chmodExec($res);
+ }
+ return $res;
+ }
+
+ public static function chmodExec($path) {
+ if (is_file($path)) {
+ @chmod($path, 0755);
+ }
+ }
+
+ protected static function parseInfos($data) {
+ $res = [];
+
+ // This function get general infos (pages sizes, boxes, number sections and
+ // bookmarks
+ // Init arrays
+ $res['raw'] = $data;
+ $res['infos'] = [];
+ $res['infos']['size'] = [0, 0];
+ $res['bookmarks'] = [];
+ $res['numberSections'] = '';
+ $bookmark_id = 0;
+
+ $res['size'] = array(0, 0);
+ $lines = explode("\n", $data);
+ foreach ($lines as $line) {
+ $line = trim(Text::condenseWhite($line));
+ $e = explode(':', $line, 2);
+ $k = trim($e[0]);
+ if (count($e) < 2) {
+ continue;
+ }
+ $v = trim($e[1]);
+ if ($k == 'Pages' || $k == 'NumberOfPages') {
+ $res['pages'] = $res['infos']['pages'] = $v;
+ $res['infos']['page'] = [];
+ for ($i = 1; $i <= $res['pages']; $i++) {
+ $res['infos']['page'][$i] = [];
+ }
+ } elseif (preg_match('|Page\s+([0-9]+)\s+(.*)Box:\s+([0-9.]*)\s+([0-9.]*)\s+([0-9.]*)\s+([0-9.]*)|iu', $line, $m)) {
+ $res['infos']['page'][$m[1]][strtolower($m[2])] = [$m[3], $m[4], $m[5], $m[6]];
+ } elseif (preg_match('|Page\s+([0-9]+)\s+size:\s+([0-9.]*)[pts[:space:]]+x\s+([0-9.]*)\s+pts|iu', $line, $m)) {
+ $res['infos']['page'][$m[1]]['size'] = array($m[2], $m[3]);
+ if ($m[1] == 1) {
+ $res['infos']['size'][0] = $m[2];
+ $res['infos']['size'][1] = $m[3];
+ }
+ } elseif ($k == 'BookmarkTitle') {
+ $res['bookmarks'][$bookmark_id] = array('titre' => str_replace(' ', '', trim($v)));
+ } elseif ($k == 'BookmarkLevel') {
+ $res['bookmarks'][$bookmark_id]['level'] = $v;
+ } elseif ($k == 'BookmarkPage') {
+ $res['bookmarks'][$bookmark_id]['page'] = $v;
+ $bookmark_id++;
+ } elseif ($k == 'NumberSections') {
+ $res['numberSections'] = $v;
+ $res['infos']['pagenumbers'] = $v;
+ }
+ }
+ return $res;
+ }
+
+ /**
+ * @throws \Exception
+ */
+ public static function infos($pdf) {
+ if (!file_exists($pdf)) {
+ throw new \Exception('Unable to parse infos of ' . $pdf . ' : file not found');
+ }
+ $fwstk = new FWSTK();
+ $fwstk->setArg('--input ' . $pdf);
+ $fwstk->setArg('--infos');
+ $fwstk->execute();
+ $out = $fwstk->getOutput();
+
+ $pdfinfo = new CommandLine('pdfinfo');
+ $pdfinfo->setArg('-box');
+ $pdfinfo->setArg('f', 1);
+ $pdfinfo->setArg('l', 100000);
+ $pdfinfo->setArg(null, $pdf);
+ $pdfinfo->execute();
+ $out .= "\n";
+ $out .= $pdfinfo->getOutput();
+
+ return self::parseInfos($out);
+ }
+
+ /**
+ * @param $path string
+ * @return string
+ */
+ protected static function _cleanPath($path) {
+ return trim($path, '/');
+ }
+
+ public static function makeMiniShot($in, $out, $page, $format = 'jpg') {
+ self::makeShotFixedWidth($in, $out, $page, 'p', 500, 65, 4, 'PNM', $format);
+ }
+
+ public static function makeShotFixedWidth($in, $out, $page, $prefix = '', $w = 100, $quality = 90, $antialiasing = 4, $method = 'PNM', $format = 'jpg') {
+ // Make thumbs of $w width
+ self::makeShot($in, $out, $page, $prefix, null, $quality, $antialiasing, $method, $w, -1, $format);
+ }
+
+ public static function makeShotFixedHeight($in, $out, $page, $prefix = '', $h = '', $quality = 90, $antialiasing = 4, $method = 'PNM', $format = 'jpg') {
+ // Make thumbs of $h height
+ self::makeShot($in, $out, $page, $prefix, null, $quality, $antialiasing, $method, -1, $h, $format);
+ }
+
+ public static function makeSWF($in, $out, $page, $resolution = 100, $quality = 90) {
+ if (file_exists($out)) {
+ unlink($out);
+ }
+ $pdf2swf = new CommandLine('pdf2swf', null, true);
+ $pdf2swf->setArg('p', $page);
+ $pdf2swf->setArg('T', 10);
+ $pdf2swf->setArg('Q', 30);
+ $pdf2swf->setArg('set reordertags', '0');
+ $pdf2swf->setArg('fonts');
+ $pdf2swf->setArg('set storeallcharacters');
+ $pdf2swf->setArg('set subpixels', $resolution / 72);
+ $pdf2swf->setArg('set jpegquality', $quality);
+ $pdf2swf->setArg('set disablelinks');
+ $pdf2swf->setArg('set dots');
+ $pdf2swf->setArg(null, $in);
+ $pdf2swf->setArg('output', $out);
+ $pdf2swf->execute();
+ $pdf2swf->debug();
+
+ if (file_exists($out)) {
+ return;
+ }
+ $pdf2swf = new CommandLine('pdf2swf', null, true);
+ $pdf2swf->setArg('p', $page);
+ $pdf2swf->setArg('T', 10);
+ $pdf2swf->setArg('Q', 120);
+ $pdf2swf->setArg('set poly2bitmap');
+ $pdf2swf->setArg('set storeallcharacters');
+ $pdf2swf->setArg('set reordertags', '0');
+ $pdf2swf->setArg('fonts');
+ $pdf2swf->setArg('set subpixels', $resolution / 72);
+ $pdf2swf->setArg('set jpegquality', $quality);
+ $pdf2swf->setArg('set disablelinks');
+ $pdf2swf->setArg('set dots');
+ $pdf2swf->setArg(null, $in);
+ $pdf2swf->setArg('output', $out);
+ $pdf2swf->execute();
+ $pdf2swf->debug();
+ if (file_exists($out)) {
+ return;
+ }
+ $pdf2swf = new CommandLine('pdf2swf', null, true);
+ $pdf2swf->setArg('p', $page);
+ $pdf2swf->setArg('T', 10);
+ $pdf2swf->setArg('set reordertags', '0');
+ $pdf2swf->setArg('fonts');
+ $pdf2swf->setArg('set bitmap');
+ $pdf2swf->setArg('set storeallcharacters');
+ $pdf2swf->setArg('set subpixels', $resolution / 72);
+ $pdf2swf->setArg('set jpegquality', $quality);
+ $pdf2swf->setArg('set disablelinks');
+ $pdf2swf->setArg('set dots');
+ $pdf2swf->setArg(null, $in);
+ $pdf2swf->setArg('output', $out);
+ $pdf2swf->execute();
+ $pdf2swf->debug();
+ }
+
+
+ public static function makeBaseSVGFile($in, $out, $page) {
+ $pdftocairo = new CommandLine('pdftocairo');
+ $pdftocairo->setArg('f', $page);
+ $pdftocairo->setArg('l', $page);
+ $pdftocairo->setArg('r', 300);
+ $pdftocairo->setArg(null, '-expand');
+ $pdftocairo->setArg(null, '-svg');
+ $pdftocairo->setArg(null, $in);
+ $pdftocairo->setArg(null, $out);
+ $pdftocairo->execute();
+ }
+
+ public static function makeTextSVGFile($in, $out) {
+ $svg = new DOMDocument();
+ $svg->preserveWhiteSpace = false;
+ $svg->load($in, LIBXML_PARSEHUGE);
+
+ // Operations to delete
+ $xpath = new DOMXPath($svg);
+ $xpath->registerNamespace('svg', 'http://www.w3.org/2000/svg');
+ $xpath->registerNamespace('xlink', 'http://www.w3.org/1999/xlink');
+ $xpath->registerNamespace("php", "http://php.net/xpath");
+ $toDelete = [
+ '//svg:defs/svg:g[starts-with(@id, "surface")]//svg:path',
+ '//svg:defs/svg:g[starts-with(@id, "surface")]//svg:rect',
+ '//svg:defs/svg:g[starts-with(@id, "surface")]//svg:use[starts-with(@xlink:href, "#image")]',
+ '/svg:svg/svg:g[@id="surface1"]//svg:path',
+ '/svg:svg/svg:g[@id="surface1"]//svg:rect',
+ '/svg:svg/svg:g[@id="surface1"]//svg:filter',
+ '/svg:svg/svg:g[@id="surface1"]//svg:use[starts-with(@xlink:href, "#image")]',
+ '//svg:svg/svg:g[@id="surface1"]//svg:use[starts-with(@xlink:href, "#image")]',
+ ];
+ $toDeleteIfOrphan = [
+ '//svg:image',
+ ];
+
+ foreach ($toDelete as $q) {
+ $list = $xpath->query($q);
+ if (count($list)) {
+ foreach ($list as $node) {
+ /* @var $node DOMNode */
+ $parent = $node->parentNode;
+ $parent->removeChild($node);
+ }
+ }
+ }
+
+ foreach ($toDeleteIfOrphan as $q) {
+ $list = $xpath->query($q);
+ if (count($list)) {
+ foreach ($list as $node) {
+ /* @var $node DOMElement */
+ $id = $node->getAttribute('id');
+ if ($xpath->query('//*[@id="' . $id . '"]')->count() > 0) {
+ $parent = $node->parentNode;
+ $parent->removeChild($node);
+ }
+ }
+ }
+ }
+ $res = $svg->saveXML();
+ $res = preg_replace('/<g clip-path="url\(#clip\d+\)" clip-rule="nonzero"\/>/', '', $res);
+ while (true) {
+ $res = preg_replace('/<g clip-path="url\(#clip\d+\)" clip-rule="nonzero"><\/g>/', '', $res, -1, $count);
+ if (!$count) {
+ break;
+ }
+ }
+
+ file_put_contents($out, $res);
+ }
+
+ public static function makeShot($in, $out, $page, $prefix = '', $resolution = 72, $quality = 90, $antialiasing = 4, $method = 'PNM', $width = null, $height = null, $format = 'jpg') {
+ $error = false;
+ if ($method === 'GS') {
+ self::makeShotGS($in, $out, $page, $prefix, $resolution, $quality, $antialiasing, $width, $height, $format);
+ } elseif ($method === 'PNM') {
+ self::makeShotPNM($in, $out, $page, $prefix, $resolution, $quality, $antialiasing, true, $width, $height, $format);
+ }
+ // Test the result by checking all files
+ if (!file_exists($out)) {
+ $error = true;
+ }
+ // If error, we try to make thumbs with other method
+ if ($error) {
+ if ($method === 'GS') {
+ self::makeShotPNM($in, $out, $page, $prefix, $resolution, $quality, $antialiasing, true, $width, $height, $format);
+ } elseif ($method === 'PNM') {
+ self::makeShotGS($in, $out, $page, $prefix, $resolution, $quality, $antialiasing, $width, $height, $format);
+ }
+ }
+ }
+
+ protected static function makeShotGS($in, $out, $page, $prefix = '', $resolution = 72, $quality = 90, $antialiasing = 4, $width = null, $height = null, $format = 'jpg') {
+ // Fabrication des thumbnails avec ghostscript
+ $gs = new CommandLine('gs', null, true);
+ $gs->setArg('-dBATCH');
+ $gs->setArg('-dNOPAUSE');
+ $gs->setArg('-dNOPROMPT');
+ // Antialias
+ $gs->setArg('-dDOINTERPOLATE');
+ $gs->setArg('-dTextAlphaBits=' . $antialiasing);
+ $gs->setArg('-dGraphicsAlphaBits=' . $antialiasing);
+ // Device
+ $device = $format === 'jpg' ? 'jpeg' : 'png16m';
+ $gs->setArg('-sDEVICE=' . $device);
+ // Dispotion & colors
+ // $gs->setArg('-dUseCIEColor');
+ $gs->setArg('-dAutoRotatePages=/None');
+ $gs->setArg('-dUseCropBox');
+ // Resolution & Quality
+ $gs->setArg('-r' . round($resolution));
+ if ($format === 'jpg') {
+ $gs->setArg('-dJPEGQ=' . $quality);
+ }
+ // Performances
+ $gs->setArg('-dNumRenderingThreads=4');
+ // Page range
+ $gs->setArg('-dFirstPage=' . $page);
+ $gs->setArg('-dLastPage=' . $page);
+ // Files
+ $gs->setArg('-sOutputFile=' . $out);
+
+ $gs->setArg(null, $in);
+ $gs->execute();
+ }
+
+ public static function makeShotPNM($in, $out, $page, $prefix = '', $resolution = 72, $quality = 90, $antialiasing = 4, $texts = true, $width = null, $height = null, $format = 'jpg') {
+ $tmp = Files::tempnam();
+
+ $antialiasing = $antialiasing ? 'yes' : 'no';
+ $freetype = $texts ? 'yes' : 'no';
+ // Exporte les fichiers
+ $pdftoppm = new CommandLine('pdftoppm', null, true);
+ $pdftoppm->setArg('f', $page);
+ $pdftoppm->setArg('l', $page);
+ $pdftoppm->setArg('-cropbox');
+ $pdftoppm->setArg('-freetype ' . $freetype);
+ $pdftoppm->setArg('-singlefile');
+ $pdftoppm->setArg('-aa ' . $antialiasing);
+ $pdftoppm->setArg('-aaVector ' . $antialiasing);
+ if (null !== $resolution) {
+ $pdftoppm->setArg('r', $resolution);
+ }
+ if (null !== $width) {
+ $pdftoppm->setArg('-scale-to-x ' . $width);
+ }
+ if (null !== $height) {
+ $pdftoppm->setArg('-scale-to-y ' . $height);
+ }
+ $pdftoppm->setArg(null, $in);
+ $pdftoppm->setArg(null, $tmp);
+ $pdftoppm->execute();
+ $tmp .= '.ppm';
+
+
+ if (file_exists($tmp)) {
+ if ($format === 'jpg') {
+ $cjpeg = new CommandLine('cjpeg', null, true);
+ $cjpeg->setArg('-quality ' . ($quality + 6));
+ $cjpeg->setArg('-outfile ' . $out);
+ $cjpeg->setArg(null, $tmp);
+ $cjpeg->execute();
+ } else if ($format === 'png') {
+ $pnmtopng = new CommandLine('pnmtopng', $out, false);
+ $pnmtopng->setArg('-background white');
+ $pnmtopng->setArg(null, $tmp);
+ $pnmtopng->execute();
+ }
+ unlink($tmp);
+ } else {
+ $pdftoppm->debug();
+ }
+ }
+
+ public static function getThumbFromPDF($pdf, $page, $format = 'jpg') {
+ if (!file_exists($pdf)) {
+ return false;
+ }
+ $dir = WS_CACHE . '/thumbs/' . sha1($pdf) . '/';
+ if (!file_exists($dir)) {
+ mkdir($dir, 0777, true);
+ }
+ $image = $dir . '/p' . $page . '.' . $format;
+ $mtime = filemtime($image);
+
+ if (!file_exists($image) || $mtime < filemtime(__FILE__) || $mtime < filemtime($pdf)) {
+ self::makeMiniShot($pdf, $image, $page, $format);
+ }
+
+ return $image;
+ }
+
+ public static function extractLinks($pdf, $out) {
+ $out .= 'links/';
+ Files::mkdir($out);
+
+ if (file_exists($out . '/p1.csv')) {
+ return;
+ }
+ $fwstk = new FWSTK();
+ $fwstk->setArg('--input ' . $pdf);
+ $fwstk->setArg('--extractLinks ' . $out . 'p%d.csv');
+ $fwstk->setArg('--threads 1');
+ $fwstk->execute();
+ }
+
+ public static function extractTexts($pdf, $out, $textExtraction = 'fluidbook', $ignoreSeparators = '') {
+ $out .= 'texts';
+ if ($ignoreSeparators) {
+ $out .= '/sep_' . md5($ignoreSeparators);
+ }
+ $out = Files::mkdir($out);
+
+ $fwstk = new FWSTK();
+ $fwstk->setArg('--input ' . $pdf);
+ $fwstk->setArg('--extractTexts ' . $out . '%s%d.txt');
+ $fwstk->setArg('--extractTextsMethod ' . $textExtraction);
+ $fwstk->setArg('--threads 1');
+ if ($ignoreSeparators) {
+ $fwstk->setArg('--ignoreSeparators ' . $ignoreSeparators);
+ }
+ $fwstk->execute();
+ }
+
+
+ public static function extractHighlightsData($pdf, $out) {
+ $out .= 'texts/';
+ Files::mkdir($out);
+
+ $fwstk = new FWSTK();
+ $fwstk->setArg('--input ' . $pdf);
+ $fwstk->setArg('--layout ' . $out . 'p%d.fby');
+ $fwstk->setArg('--cmaps ' . $out);
+ $fwstk->setArg('--fonts' . $out . 'fonts/web/');
+ $fwstk->execute();
+ }
+
+ public static function fixPDF($in, $out) {
+ if (file_exists($out)) {
+ unlink($out);
+ }
+
+ $pdftk = new CommandLine('pdftk');
+ $pdftk->setArg(null, $in);
+ $pdftk->setArg(null, 'output');
+ $pdftk->setArg(null, $out);
+ $pdftk->execute();
+
+ if (!file_exists($out)) {
+ $pdftocairo = new CommandLine('pdftocairo');
+ $pdftocairo->setPath(CONVERTER_PATH);
+ $pdftocairo->setArg(null, '-pdf');
+ $pdftocairo->setArg(null, $in);
+ $pdftocairo->setArg(null, $out);
+ $pdftocairo->execute();
+ }
+ }
+
+ public static function split($pdf, $out) {
+
+ $lock = $pdf . '.split.lock';
+
+ $returnAfterSleep = false;
+
+ usleep(rand(100000, 2000000));
+
+ while (file_exists($lock)) {
+ if (filemtime($lock) < time() - 300) {
+ unlink($lock);
+ }
+ $returnAfterSleep = true;
+ sleep(5);
+ }
+ if ($returnAfterSleep) {
+ return;
+ }
+
+ touch($lock);
+
+ try {
+ Files::mkdir($out);
+ $pdftk = new CommandLine('pdftk');
+ $pdftk->setArg(null, $pdf);
+ $pdftk->setArg(null, 'burst');
+ $pdftk->setArg(null, 'uncompress');
+ $pdftk->setArg(null, 'output');
+ $pdftk->setArg(null, $out . '/p%d.pdf');
+ $pdftk->execute();
+
+
+ for ($i = 1; true; $i++) {
+ // Remove annotations : https://gist.github.com/stefanschmidt/5248592
+ $file = sprintf($out . '/p%d.pdf', $i);
+ if (!file_exists($file)) {
+ break;
+ }
+ $to = sprintf($out . '/s%d.pdf', $i);
+ `LANG=C LC_CTYPE=C sed -n '/^\/Annots/!p' $file > $to`;
+ if (file_exists($to)) {
+ if (filesize($to) > 0) {
+ unlink($file);
+ rename($to, $file);
+ } else {
+ unlink($to);
+ }
+ }
+ }
+ } catch (\Exception $e) {
+
+ }
+ unlink($lock);
+ }
+
+ public static function compressPDF($source, $dest, $resolution = 72) {
+ $gs = new CommandLine('gs');
+ $gs->setArg('-dBATCH');
+ $gs->setArg('-dNOPAUSE');
+ $gs->setArg('-dNOPROMPT');
+ $gs->setArg('-sOutputFile=' . $dest);
+ $gs->setArg('-sDEVICE=pdfwrite');
+ $gs->setArg('-dPDFSETTINGS=/ebook');
+ $gs->setArg('-dColorImageResolution=' . $resolution);
+ $gs->setArg('-dAutoRotatePages=/None');
+ $gs->setArg('-dColorConversionStrategy=/LeaveColorUnchanged');
+ $gs->setArg(null, $source);
+ $gs->execute();
+ }
}