]> _ Git - cubist_pdf.git/commitdiff
wip #5770 @0.25
authorVincent Vanwaelscappel <vincent@cubedesigners.com>
Thu, 2 Mar 2023 16:43:11 +0000 (17:43 +0100)
committerVincent Vanwaelscappel <vincent@cubedesigners.com>
Thu, 2 Mar 2023 16:43:11 +0000 (17:43 +0100)
src/PDFTools.php

index aa97a4bcfdb2678fc9b742d29a98db8aca0c1d0b..f4a2f4a13ed64e09bacfd8e48ef083d652474b87 100644 (file)
@@ -12,536 +12,520 @@ use DOMNode;
 use DOMXPath;
 use Cubist\PDF\CommandLine\FWSTK;
 
-class PDFTools
-{
-    /**
-     * @param $path string
-     * @return string
-     */
-
-    public static function resource_path($path)
-    {
-        return __DIR__ . '/../resources/' . self::_cleanPath($path);
-    }
-
-    /**
-     * @param $path string
-     * @return string
-     */
-    public static function tools_path($path, $chmod = false)
-    {
-        $res = self::resource_path('tools/' . self::_cleanPath($path));
-        if ($chmod) {
-            self::chmodExec($res);
-        }
-        return $res;
-    }
-
-    public static function chmodExec($path)
-    {
-        if (is_file($path)) {
-            @chmod($path, 0755);
-        }
-    }
-
-    protected static function parseInfos($data)
-    {
-        $res = [];
-
-        // This function get general infos (pages sizes, boxes, number sections and
-        // bookmarks
-        // Init arrays
-        $res['infos'] = [];
-        $res['infos']['size'] = [0, 0];
-        $res['bookmarks'] = [];
-        $res['numberSections'] = '';
-        $bookmark_id = 0;
-
-        $res['size'] = array(0, 0);
-        $lines = explode("\n", $data);
-        foreach ($lines as $line) {
-            $line = trim(Text::condenseWhite($line));
-            $e = explode(':', $line, 2);
-            $k = trim($e[0]);
-            if (count($e) < 2) {
-                continue;
-            }
-            $v = trim($e[1]);
-            if ($k == 'Pages' || $k == 'NumberOfPages') {
-                $res['pages'] = $res['infos']['pages'] = $v;
-                $res['infos']['page'] = [];
-                for ($i = 1; $i <= $res['pages']; $i++) {
-                    $res['infos']['page'][$i] = [];
-                }
-            } elseif (preg_match('|Page\s+([0-9]+)\s+(.*)Box:\s+([0-9.]*)\s+([0-9.]*)\s+([0-9.]*)\s+([0-9.]*)|iu', $line, $m)) {
-                $res['infos']['page'][$m[1]][strtolower($m[2])] = [$m[3], $m[4], $m[5], $m[6]];
-            } elseif (preg_match('|Page\s+([0-9]+)\s+size:\s+([0-9.]*)[pts[:space:]]+x\s+([0-9.]*)\s+pts|iu', $line, $m)) {
-                $res['infos']['page'][$m[1]]['size'] = array($m[2], $m[3]);
-                if ($m[1] == 1) {
-                    $res['infos']['size'][0] = $m[2];
-                    $res['infos']['size'][1] = $m[3];
-                }
-            } elseif ($k == 'BookmarkTitle') {
-                $res['bookmarks'][$bookmark_id] = array('titre' => str_replace('&#13;', '', trim($v)));
-            } elseif ($k == 'BookmarkLevel') {
-                $res['bookmarks'][$bookmark_id]['level'] = $v;
-            } elseif ($k == 'BookmarkPage') {
-                $res['bookmarks'][$bookmark_id]['page'] = $v;
-                $bookmark_id++;
-            } elseif ($k == 'NumberSections') {
-                $res['numberSections'] = $v;
-                $res['infos']['pagenumbers'] = $v;
-            }
-        }
-        return $res;
-    }
-
-    public static function infos($pdf)
-    {
-        $fwstk = new FWSTK();
-        $fwstk->setArg('--input ' . $pdf);
-        $fwstk->setArg('--infos');
-        $fwstk->execute();
-        $out = $fwstk->getOutput();
-
-        $pdfinfo = new CommandLine('pdfinfo');
-        $pdfinfo->setArg('-box');
-        $pdfinfo->setArg('f', 1);
-        $pdfinfo->setArg('l', 100000);
-        $pdfinfo->setArg(null, $pdf);
-        $pdfinfo->execute();
-        $out .= "\n";
-        $out .= $pdfinfo->getOutput();
-
-        return self::parseInfos($out);
-    }
-
-    /**
-     * @param $path string
-     * @return string
-     */
-    protected static function _cleanPath($path)
-    {
-        return trim($path, '/');
-    }
-
-    public static function makeMiniShot($in, $out, $page, $format = 'jpg')
-    {
-        self::makeShotFixedWidth($in, $out, $page, 'p', 500, 65, 4, 'PNM', $format);
-    }
-
-    public static function makeShotFixedWidth($in, $out, $page, $prefix = '', $w = 100, $quality = 90, $antialiasing = 4, $method = 'PNM', $format = 'jpg')
-    {
-        // Make thumbs of $w width
-        self::makeShot($in, $out, $page, $prefix, null, $quality, $antialiasing, $method, $w, -1, $format);
-    }
-
-    public static function makeShotFixedHeight($in, $out, $page, $prefix = '', $h = '', $quality = 90, $antialiasing = 4, $method = 'PNM', $format = 'jpg')
-    {
-        // Make thumbs of $h height
-        self::makeShot($in, $out, $page, $prefix, null, $quality, $antialiasing, $method, -1, $h, $format);
-    }
-
-    public static function makeSWF($in, $out, $page, $resolution = 100, $quality = 90)
-    {
-        if (file_exists($out)) {
-            unlink($out);
-        }
-        $pdf2swf = new CommandLine('pdf2swf', null, true);
-        $pdf2swf->setArg('p', $page);
-        $pdf2swf->setArg('T', 10);
-        $pdf2swf->setArg('Q', 30);
-        $pdf2swf->setArg('set reordertags', '0');
-        $pdf2swf->setArg('fonts');
-        $pdf2swf->setArg('set storeallcharacters');
-        $pdf2swf->setArg('set subpixels', $resolution / 72);
-        $pdf2swf->setArg('set jpegquality', $quality);
-        $pdf2swf->setArg('set disablelinks');
-        $pdf2swf->setArg('set dots');
-        $pdf2swf->setArg(null, $in);
-        $pdf2swf->setArg('output', $out);
-        $pdf2swf->execute();
-        $pdf2swf->debug();
-
-        if (file_exists($out)) {
-            return;
-        }
-        $pdf2swf = new CommandLine('pdf2swf', null, true);
-        $pdf2swf->setArg('p', $page);
-        $pdf2swf->setArg('T', 10);
-        $pdf2swf->setArg('Q', 120);
-        $pdf2swf->setArg('set poly2bitmap');
-        $pdf2swf->setArg('set storeallcharacters');
-        $pdf2swf->setArg('set reordertags', '0');
-        $pdf2swf->setArg('fonts');
-        $pdf2swf->setArg('set subpixels', $resolution / 72);
-        $pdf2swf->setArg('set jpegquality', $quality);
-        $pdf2swf->setArg('set disablelinks');
-        $pdf2swf->setArg('set dots');
-        $pdf2swf->setArg(null, $in);
-        $pdf2swf->setArg('output', $out);
-        $pdf2swf->execute();
-        $pdf2swf->debug();
-        if (file_exists($out)) {
-            return;
-        }
-        $pdf2swf = new CommandLine('pdf2swf', null, true);
-        $pdf2swf->setArg('p', $page);
-        $pdf2swf->setArg('T', 10);
-        $pdf2swf->setArg('set reordertags', '0');
-        $pdf2swf->setArg('fonts');
-        $pdf2swf->setArg('set bitmap');
-        $pdf2swf->setArg('set storeallcharacters');
-        $pdf2swf->setArg('set subpixels', $resolution / 72);
-        $pdf2swf->setArg('set jpegquality', $quality);
-        $pdf2swf->setArg('set disablelinks');
-        $pdf2swf->setArg('set dots');
-        $pdf2swf->setArg(null, $in);
-        $pdf2swf->setArg('output', $out);
-        $pdf2swf->execute();
-        $pdf2swf->debug();
-    }
-
-
-    public static function makeBaseSVGFile($in, $out, $page)
-    {
-        $pdftocairo = new CommandLine('pdftocairo');
-        $pdftocairo->setArg('f', $page);
-        $pdftocairo->setArg('l', $page);
-        $pdftocairo->setArg('r', 300);
-        $pdftocairo->setArg(null, '-expand');
-        $pdftocairo->setArg(null, '-svg');
-        $pdftocairo->setArg(null, $in);
-        $pdftocairo->setArg(null, $out);
-        $pdftocairo->execute();
-    }
-
-    public static function makeTextSVGFile($in, $out)
-    {
-        $svg = new DOMDocument();
-        $svg->preserveWhiteSpace = false;
-        $svg->load($in, LIBXML_PARSEHUGE);
-
-        // Operations to delete
-        $xpath = new DOMXPath($svg);
-        $xpath->registerNamespace('svg', 'http://www.w3.org/2000/svg');
-        $xpath->registerNamespace('xlink', 'http://www.w3.org/1999/xlink');
-        $xpath->registerNamespace("php", "http://php.net/xpath");
-        $toDelete = [
-            '//svg:defs/svg:g[starts-with(@id, "surface")]//svg:path',
-            '//svg:defs/svg:g[starts-with(@id, "surface")]//svg:rect',
-            '//svg:defs/svg:g[starts-with(@id, "surface")]//svg:use[starts-with(@xlink:href, "#image")]',
-            '/svg:svg/svg:g[@id="surface1"]//svg:path',
-            '/svg:svg/svg:g[@id="surface1"]//svg:rect',
-            '/svg:svg/svg:g[@id="surface1"]//svg:filter',
-            '/svg:svg/svg:g[@id="surface1"]//svg:use[starts-with(@xlink:href, "#image")]',
-            '//svg:svg/svg:g[@id="surface1"]//svg:use[starts-with(@xlink:href, "#image")]',
-        ];
-        $toDeleteIfOrphan = [
-            '//svg:image',
-        ];
-
-        foreach ($toDelete as $q) {
-            $list = $xpath->query($q);
-            if (count($list)) {
-                foreach ($list as $node) {
-                    /* @var $node DOMNode */
-                    $parent = $node->parentNode;
-                    $parent->removeChild($node);
-                }
-            }
-        }
-
-        foreach ($toDeleteIfOrphan as $q) {
-            $list = $xpath->query($q);
-            if (count($list)) {
-                foreach ($list as $node) {
-                    /* @var $node DOMElement */
-                    $id = $node->getAttribute('id');
-                    if ($xpath->query('//*[@id="' . $id . '"]')->count() > 0) {
-                        $parent = $node->parentNode;
-                        $parent->removeChild($node);
-                    }
-                }
-            }
-        }
-        $res = $svg->saveXML();
-        $res = preg_replace('/<g clip-path="url\(#clip\d+\)" clip-rule="nonzero"\/>/', '', $res);
-        while (true) {
-            $res = preg_replace('/<g clip-path="url\(#clip\d+\)" clip-rule="nonzero"><\/g>/', '', $res, -1, $count);
-            if (!$count) {
-                break;
-            }
-        }
-
-        file_put_contents($out, $res);
-    }
-
-    public static function makeShot($in, $out, $page, $prefix = '', $resolution = 72, $quality = 90, $antialiasing = 4, $method = 'PNM', $width = null, $height = null, $format = 'jpg')
-    {
-        $error = false;
-        if ($method === 'GS') {
-            self::makeShotGS($in, $out, $page, $prefix, $resolution, $quality, $antialiasing, $width, $height, $format);
-        } elseif ($method === 'PNM') {
-            self::makeShotPNM($in, $out, $page, $prefix, $resolution, $quality, $antialiasing, true, $width, $height, $format);
-        }
-        // Test the result by checking all files
-        if (!file_exists($out)) {
-            $error = true;
-        }
-        // If error, we try to make thumbs with other method
-        if ($error) {
-            if ($method === 'GS') {
-                self::makeShotPNM($in, $out, $page, $prefix, $resolution, $quality, $antialiasing, true, $width, $height, $format);
-            } elseif ($method === 'PNM') {
-                self::makeShotGS($in, $out, $page, $prefix, $resolution, $quality, $antialiasing, $width, $height, $format);
-            }
-        }
-    }
-
-    protected static function makeShotGS($in, $out, $page, $prefix = '', $resolution = 72, $quality = 90, $antialiasing = 4, $width = null, $height = null, $format = 'jpg')
-    {
-        // Fabrication des thumbnails avec ghostscript
-        $gs = new CommandLine('gs', null, true);
-        $gs->setArg('-dBATCH');
-        $gs->setArg('-dNOPAUSE');
-        $gs->setArg('-dNOPROMPT');
-        // Antialias
-        $gs->setArg('-dDOINTERPOLATE');
-        $gs->setArg('-dTextAlphaBits=' . $antialiasing);
-        $gs->setArg('-dGraphicsAlphaBits=' . $antialiasing);
-        // Device
-        $device = $format === 'jpg' ? 'jpeg' : 'png16m';
-        $gs->setArg('-sDEVICE=' . $device);
-        // Dispotion & colors
-        // $gs->setArg('-dUseCIEColor');
-        $gs->setArg('-dAutoRotatePages=/None');
-        $gs->setArg('-dUseCropBox');
-        // Resolution & Quality
-        $gs->setArg('-r' . round($resolution));
-        if ($format === 'jpg') {
-            $gs->setArg('-dJPEGQ=' . $quality);
-        }
-        // Performances
-        $gs->setArg('-dNumRenderingThreads=4');
-        // Page range
-        $gs->setArg('-dFirstPage=' . $page);
-        $gs->setArg('-dLastPage=' . $page);
-        // Files
-        $gs->setArg('-sOutputFile=' . $out);
-
-        $gs->setArg(null, $in);
-        $gs->execute();
-    }
-
-    public static function makeShotPNM($in, $out, $page, $prefix = '', $resolution = 72, $quality = 90, $antialiasing = 4, $texts = true, $width = null, $height = null, $format = 'jpg')
-    {
-        $tmp = Files::tempnam();
-
-        $antialiasing = $antialiasing ? 'yes' : 'no';
-        $freetype = $texts ? 'yes' : 'no';
-        // Exporte les fichiers
-        $pdftoppm = new CommandLine('pdftoppm', null, true);
-        $pdftoppm->setArg('f', $page);
-        $pdftoppm->setArg('l', $page);
-        $pdftoppm->setArg('-cropbox');
-        $pdftoppm->setArg('-freetype ' . $freetype);
-        $pdftoppm->setArg('-singlefile');
-        $pdftoppm->setArg('-aa ' . $antialiasing);
-        $pdftoppm->setArg('-aaVector ' . $antialiasing);
-        if (null !== $resolution) {
-            $pdftoppm->setArg('r', $resolution);
-        }
-        if (null !== $width) {
-            $pdftoppm->setArg('-scale-to-x ' . $width);
-        }
-        if (null !== $height) {
-            $pdftoppm->setArg('-scale-to-y ' . $height);
-        }
-        $pdftoppm->setArg(null, $in);
-        $pdftoppm->setArg(null, $tmp);
-        $pdftoppm->execute();
-        $tmp .= '.ppm';
-
-
-        if (file_exists($tmp)) {
-            if ($format === 'jpg') {
-                $cjpeg = new CommandLine('cjpeg', null, true);
-                $cjpeg->setArg('-quality ' . ($quality + 6));
-                $cjpeg->setArg('-outfile ' . $out);
-                $cjpeg->setArg(null, $tmp);
-                $cjpeg->execute();
-            } else if ($format === 'png') {
-                $pnmtopng = new CommandLine('pnmtopng', $out, false);
-                $pnmtopng->setArg('-background white');
-                $pnmtopng->setArg(null, $tmp);
-                $pnmtopng->execute();
-            }
-            unlink($tmp);
-        } else {
-            $pdftoppm->debug();
-        }
-    }
-
-    public static function getThumbFromPDF($pdf, $page, $format = 'jpg')
-    {
-        if (!file_exists($pdf)) {
-            return false;
-        }
-        $dir = WS_CACHE . '/thumbs/' . sha1($pdf) . '/';
-        if (!file_exists($dir)) {
-            mkdir($dir, 0777, true);
-        }
-        $image = $dir . '/p' . $page . '.' . $format;
-        $mtime = filemtime($image);
-
-        if (!file_exists($image) || $mtime < filemtime(__FILE__) || $mtime < filemtime($pdf)) {
-            self::makeMiniShot($pdf, $image, $page, $format);
-        }
-
-        return $image;
-    }
-
-    public static function extractLinks($pdf, $out)
-    {
-        $out .= 'links/';
-        Files::mkdir($out);
-
-        if (file_exists($out . '/p1.csv')) {
-            return;
-        }
-        $fwstk = new FWSTK();
-        $fwstk->setArg('--input ' . $pdf);
-        $fwstk->setArg('--extractLinks ' . $out . 'p%d.csv');
-        $fwstk->setArg('--threads 1');
-        $fwstk->execute();
-    }
-
-    public static function extractTexts($pdf, $out, $textExtraction = 'fluidbook', $ignoreSeparators = '')
-    {
-        $out .= 'texts';
-        if ($ignoreSeparators) {
-            $out .= '/sep_' . md5($ignoreSeparators);
-        }
-        $out = Files::mkdir($out);
-
-        $fwstk = new FWSTK();
-        $fwstk->setArg('--input ' . $pdf);
-        $fwstk->setArg('--extractTexts ' . $out . '%s%d.txt');
-        $fwstk->setArg('--extractTextsMethod ' . $textExtraction);
-        $fwstk->setArg('--threads 1');
-        if ($ignoreSeparators) {
-            $fwstk->setArg('--ignoreSeparators ' . $ignoreSeparators);
-        }
-        $fwstk->execute();
-    }
-
-
-    public static function extractHighlightsData($pdf, $out)
-    {
-        $out .= 'texts/';
-        Files::mkdir($out);
-
-        $fwstk = new FWSTK();
-        $fwstk->setArg('--input ' . $pdf);
-        $fwstk->setArg('--layout ' . $out . 'p%d.fby');
-        $fwstk->setArg('--cmaps ' . $out);
-        $fwstk->setArg('--fonts' . $out . 'fonts/web/');
-        $fwstk->execute();
-    }
-
-    public static function fixPDF($in, $out)
-    {
-        if (file_exists($out)) {
-            unlink($out);
-        }
-
-        $pdftk = new CommandLine('pdftk');
-        $pdftk->setArg(null, $in);
-        $pdftk->setArg(null, 'output');
-        $pdftk->setArg(null, $out);
-        $pdftk->execute();
-
-        if (!file_exists($out)) {
-            $pdftocairo = new CommandLine('pdftocairo');
-            $pdftocairo->setPath(CONVERTER_PATH);
-            $pdftocairo->setArg(null, '-pdf');
-            $pdftocairo->setArg(null, $in);
-            $pdftocairo->setArg(null, $out);
-            $pdftocairo->execute();
-        }
-    }
-
-    public static function split($pdf, $out)
-    {
-
-        $lock = $pdf . '.split.lock';
-
-        $returnAfterSleep = false;
-
-        usleep(rand(100000, 2000000));
-
-        while (file_exists($lock)) {
-            if (filemtime($lock) < time() - 300) {
-                unlink($lock);
-            }
-            $returnAfterSleep = true;
-            sleep(5);
-        }
-        if ($returnAfterSleep) {
-            return;
-        }
-
-        touch($lock);
-
-        try {
-            Files::mkdir($out);
-            $pdftk = new CommandLine('pdftk');
-            $pdftk->setArg(null, $pdf);
-            $pdftk->setArg(null, 'burst');
-            $pdftk->setArg(null, 'uncompress');
-            $pdftk->setArg(null, 'output');
-            $pdftk->setArg(null, $out . '/p%d.pdf');
-            $pdftk->execute();
-
-
-            for ($i = 1; true; $i++) {
-                // Remove annotations : https://gist.github.com/stefanschmidt/5248592
-                $file = sprintf($out . '/p%d.pdf', $i);
-                if (!file_exists($file)) {
-                    break;
-                }
-                $to = sprintf($out . '/s%d.pdf', $i);
-                `LANG=C LC_CTYPE=C sed -n '/^\/Annots/!p' $file > $to`;
-                if (file_exists($to)) {
-                    if (filesize($to) > 0) {
-                        unlink($file);
-                        rename($to, $file);
-                    } else {
-                        unlink($to);
-                    }
-                }
-            }
-        } catch (\Exception $e) {
-
-        }
-        unlink($lock);
-    }
-
-    public static function compressPDF($source, $dest, $resolution = 72)
-    {
-        $gs = new CommandLine('gs');
-        $gs->setArg('-dBATCH');
-        $gs->setArg('-dNOPAUSE');
-        $gs->setArg('-dNOPROMPT');
-        $gs->setArg('-sOutputFile=' . $dest);
-        $gs->setArg('-sDEVICE=pdfwrite');
-        $gs->setArg('-dPDFSETTINGS=/ebook');
-        $gs->setArg('-dColorImageResolution=' . $resolution);
-        $gs->setArg('-dAutoRotatePages=/None');
-        $gs->setArg('-dColorConversionStrategy=/LeaveColorUnchanged');
-        $gs->setArg(null, $source);
-        $gs->execute();
-    }
+class PDFTools {
+       /**
+        * @param $path string
+        * @return string
+        */
+
+       public static function resource_path($path) {
+               return __DIR__ . '/../resources/' . self::_cleanPath($path);
+       }
+
+       /**
+        * @param $path string
+        * @return string
+        */
+       public static function tools_path($path, $chmod = false) {
+               $res = self::resource_path('tools/' . self::_cleanPath($path));
+               if ($chmod) {
+                       self::chmodExec($res);
+               }
+               return $res;
+       }
+
+       public static function chmodExec($path) {
+               if (is_file($path)) {
+                       @chmod($path, 0755);
+               }
+       }
+
+       protected static function parseInfos($data) {
+               $res = [];
+
+               // This function get general infos (pages sizes, boxes, number sections and
+               // bookmarks
+               // Init arrays
+               $res['raw'] = $data;
+               $res['infos'] = [];
+               $res['infos']['size'] = [0, 0];
+               $res['bookmarks'] = [];
+               $res['numberSections'] = '';
+               $bookmark_id = 0;
+
+               $res['size'] = array(0, 0);
+               $lines = explode("\n", $data);
+               foreach ($lines as $line) {
+                       $line = trim(Text::condenseWhite($line));
+                       $e = explode(':', $line, 2);
+                       $k = trim($e[0]);
+                       if (count($e) < 2) {
+                               continue;
+                       }
+                       $v = trim($e[1]);
+                       if ($k == 'Pages' || $k == 'NumberOfPages') {
+                               $res['pages'] = $res['infos']['pages'] = $v;
+                               $res['infos']['page'] = [];
+                               for ($i = 1; $i <= $res['pages']; $i++) {
+                                       $res['infos']['page'][$i] = [];
+                               }
+                       } elseif (preg_match('|Page\s+([0-9]+)\s+(.*)Box:\s+([0-9.]*)\s+([0-9.]*)\s+([0-9.]*)\s+([0-9.]*)|iu', $line, $m)) {
+                               $res['infos']['page'][$m[1]][strtolower($m[2])] = [$m[3], $m[4], $m[5], $m[6]];
+                       } elseif (preg_match('|Page\s+([0-9]+)\s+size:\s+([0-9.]*)[pts[:space:]]+x\s+([0-9.]*)\s+pts|iu', $line, $m)) {
+                               $res['infos']['page'][$m[1]]['size'] = array($m[2], $m[3]);
+                               if ($m[1] == 1) {
+                                       $res['infos']['size'][0] = $m[2];
+                                       $res['infos']['size'][1] = $m[3];
+                               }
+                       } elseif ($k == 'BookmarkTitle') {
+                               $res['bookmarks'][$bookmark_id] = array('titre' => str_replace('&#13;', '', trim($v)));
+                       } elseif ($k == 'BookmarkLevel') {
+                               $res['bookmarks'][$bookmark_id]['level'] = $v;
+                       } elseif ($k == 'BookmarkPage') {
+                               $res['bookmarks'][$bookmark_id]['page'] = $v;
+                               $bookmark_id++;
+                       } elseif ($k == 'NumberSections') {
+                               $res['numberSections'] = $v;
+                               $res['infos']['pagenumbers'] = $v;
+                       }
+               }
+               return $res;
+       }
+
+       /**
+        * @throws \Exception
+        */
+       public static function infos($pdf) {
+               if (!file_exists($pdf)) {
+                       throw new \Exception('Unable to parse infos of ' . $pdf . ' : file not found');
+               }
+               $fwstk = new FWSTK();
+               $fwstk->setArg('--input ' . $pdf);
+               $fwstk->setArg('--infos');
+               $fwstk->execute();
+               $out = $fwstk->getOutput();
+
+               $pdfinfo = new CommandLine('pdfinfo');
+               $pdfinfo->setArg('-box');
+               $pdfinfo->setArg('f', 1);
+               $pdfinfo->setArg('l', 100000);
+               $pdfinfo->setArg(null, $pdf);
+               $pdfinfo->execute();
+               $out .= "\n";
+               $out .= $pdfinfo->getOutput();
+
+               return self::parseInfos($out);
+       }
+
+       /**
+        * @param $path string
+        * @return string
+        */
+       protected static function _cleanPath($path) {
+               return trim($path, '/');
+       }
+
+       public static function makeMiniShot($in, $out, $page, $format = 'jpg') {
+               self::makeShotFixedWidth($in, $out, $page, 'p', 500, 65, 4, 'PNM', $format);
+       }
+
+       public static function makeShotFixedWidth($in, $out, $page, $prefix = '', $w = 100, $quality = 90, $antialiasing = 4, $method = 'PNM', $format = 'jpg') {
+               // Make thumbs of $w width
+               self::makeShot($in, $out, $page, $prefix, null, $quality, $antialiasing, $method, $w, -1, $format);
+       }
+
+       public static function makeShotFixedHeight($in, $out, $page, $prefix = '', $h = '', $quality = 90, $antialiasing = 4, $method = 'PNM', $format = 'jpg') {
+               // Make thumbs of $h height
+               self::makeShot($in, $out, $page, $prefix, null, $quality, $antialiasing, $method, -1, $h, $format);
+       }
+
+       public static function makeSWF($in, $out, $page, $resolution = 100, $quality = 90) {
+               if (file_exists($out)) {
+                       unlink($out);
+               }
+               $pdf2swf = new CommandLine('pdf2swf', null, true);
+               $pdf2swf->setArg('p', $page);
+               $pdf2swf->setArg('T', 10);
+               $pdf2swf->setArg('Q', 30);
+               $pdf2swf->setArg('set reordertags', '0');
+               $pdf2swf->setArg('fonts');
+               $pdf2swf->setArg('set storeallcharacters');
+               $pdf2swf->setArg('set subpixels', $resolution / 72);
+               $pdf2swf->setArg('set jpegquality', $quality);
+               $pdf2swf->setArg('set disablelinks');
+               $pdf2swf->setArg('set dots');
+               $pdf2swf->setArg(null, $in);
+               $pdf2swf->setArg('output', $out);
+               $pdf2swf->execute();
+               $pdf2swf->debug();
+
+               if (file_exists($out)) {
+                       return;
+               }
+               $pdf2swf = new CommandLine('pdf2swf', null, true);
+               $pdf2swf->setArg('p', $page);
+               $pdf2swf->setArg('T', 10);
+               $pdf2swf->setArg('Q', 120);
+               $pdf2swf->setArg('set poly2bitmap');
+               $pdf2swf->setArg('set storeallcharacters');
+               $pdf2swf->setArg('set reordertags', '0');
+               $pdf2swf->setArg('fonts');
+               $pdf2swf->setArg('set subpixels', $resolution / 72);
+               $pdf2swf->setArg('set jpegquality', $quality);
+               $pdf2swf->setArg('set disablelinks');
+               $pdf2swf->setArg('set dots');
+               $pdf2swf->setArg(null, $in);
+               $pdf2swf->setArg('output', $out);
+               $pdf2swf->execute();
+               $pdf2swf->debug();
+               if (file_exists($out)) {
+                       return;
+               }
+               $pdf2swf = new CommandLine('pdf2swf', null, true);
+               $pdf2swf->setArg('p', $page);
+               $pdf2swf->setArg('T', 10);
+               $pdf2swf->setArg('set reordertags', '0');
+               $pdf2swf->setArg('fonts');
+               $pdf2swf->setArg('set bitmap');
+               $pdf2swf->setArg('set storeallcharacters');
+               $pdf2swf->setArg('set subpixels', $resolution / 72);
+               $pdf2swf->setArg('set jpegquality', $quality);
+               $pdf2swf->setArg('set disablelinks');
+               $pdf2swf->setArg('set dots');
+               $pdf2swf->setArg(null, $in);
+               $pdf2swf->setArg('output', $out);
+               $pdf2swf->execute();
+               $pdf2swf->debug();
+       }
+
+
+       public static function makeBaseSVGFile($in, $out, $page) {
+               $pdftocairo = new CommandLine('pdftocairo');
+               $pdftocairo->setArg('f', $page);
+               $pdftocairo->setArg('l', $page);
+               $pdftocairo->setArg('r', 300);
+               $pdftocairo->setArg(null, '-expand');
+               $pdftocairo->setArg(null, '-svg');
+               $pdftocairo->setArg(null, $in);
+               $pdftocairo->setArg(null, $out);
+               $pdftocairo->execute();
+       }
+
+       public static function makeTextSVGFile($in, $out) {
+               $svg = new DOMDocument();
+               $svg->preserveWhiteSpace = false;
+               $svg->load($in, LIBXML_PARSEHUGE);
+
+               // Operations to delete
+               $xpath = new DOMXPath($svg);
+               $xpath->registerNamespace('svg', 'http://www.w3.org/2000/svg');
+               $xpath->registerNamespace('xlink', 'http://www.w3.org/1999/xlink');
+               $xpath->registerNamespace("php", "http://php.net/xpath");
+               $toDelete = [
+                       '//svg:defs/svg:g[starts-with(@id, "surface")]//svg:path',
+                       '//svg:defs/svg:g[starts-with(@id, "surface")]//svg:rect',
+                       '//svg:defs/svg:g[starts-with(@id, "surface")]//svg:use[starts-with(@xlink:href, "#image")]',
+                       '/svg:svg/svg:g[@id="surface1"]//svg:path',
+                       '/svg:svg/svg:g[@id="surface1"]//svg:rect',
+                       '/svg:svg/svg:g[@id="surface1"]//svg:filter',
+                       '/svg:svg/svg:g[@id="surface1"]//svg:use[starts-with(@xlink:href, "#image")]',
+                       '//svg:svg/svg:g[@id="surface1"]//svg:use[starts-with(@xlink:href, "#image")]',
+               ];
+               $toDeleteIfOrphan = [
+                       '//svg:image',
+               ];
+
+               foreach ($toDelete as $q) {
+                       $list = $xpath->query($q);
+                       if (count($list)) {
+                               foreach ($list as $node) {
+                                       /* @var $node DOMNode */
+                                       $parent = $node->parentNode;
+                                       $parent->removeChild($node);
+                               }
+                       }
+               }
+
+               foreach ($toDeleteIfOrphan as $q) {
+                       $list = $xpath->query($q);
+                       if (count($list)) {
+                               foreach ($list as $node) {
+                                       /* @var $node DOMElement */
+                                       $id = $node->getAttribute('id');
+                                       if ($xpath->query('//*[@id="' . $id . '"]')->count() > 0) {
+                                               $parent = $node->parentNode;
+                                               $parent->removeChild($node);
+                                       }
+                               }
+                       }
+               }
+               $res = $svg->saveXML();
+               $res = preg_replace('/<g clip-path="url\(#clip\d+\)" clip-rule="nonzero"\/>/', '', $res);
+               while (true) {
+                       $res = preg_replace('/<g clip-path="url\(#clip\d+\)" clip-rule="nonzero"><\/g>/', '', $res, -1, $count);
+                       if (!$count) {
+                               break;
+                       }
+               }
+
+               file_put_contents($out, $res);
+       }
+
+       public static function makeShot($in, $out, $page, $prefix = '', $resolution = 72, $quality = 90, $antialiasing = 4, $method = 'PNM', $width = null, $height = null, $format = 'jpg') {
+               $error = false;
+               if ($method === 'GS') {
+                       self::makeShotGS($in, $out, $page, $prefix, $resolution, $quality, $antialiasing, $width, $height, $format);
+               } elseif ($method === 'PNM') {
+                       self::makeShotPNM($in, $out, $page, $prefix, $resolution, $quality, $antialiasing, true, $width, $height, $format);
+               }
+               // Test the result by checking all files
+               if (!file_exists($out)) {
+                       $error = true;
+               }
+               // If error, we try to make thumbs with other method
+               if ($error) {
+                       if ($method === 'GS') {
+                               self::makeShotPNM($in, $out, $page, $prefix, $resolution, $quality, $antialiasing, true, $width, $height, $format);
+                       } elseif ($method === 'PNM') {
+                               self::makeShotGS($in, $out, $page, $prefix, $resolution, $quality, $antialiasing, $width, $height, $format);
+                       }
+               }
+       }
+
+       protected static function makeShotGS($in, $out, $page, $prefix = '', $resolution = 72, $quality = 90, $antialiasing = 4, $width = null, $height = null, $format = 'jpg') {
+               // Fabrication des thumbnails avec ghostscript
+               $gs = new CommandLine('gs', null, true);
+               $gs->setArg('-dBATCH');
+               $gs->setArg('-dNOPAUSE');
+               $gs->setArg('-dNOPROMPT');
+               // Antialias
+               $gs->setArg('-dDOINTERPOLATE');
+               $gs->setArg('-dTextAlphaBits=' . $antialiasing);
+               $gs->setArg('-dGraphicsAlphaBits=' . $antialiasing);
+               // Device
+               $device = $format === 'jpg' ? 'jpeg' : 'png16m';
+               $gs->setArg('-sDEVICE=' . $device);
+               // Dispotion & colors
+               // $gs->setArg('-dUseCIEColor');
+               $gs->setArg('-dAutoRotatePages=/None');
+               $gs->setArg('-dUseCropBox');
+               // Resolution & Quality
+               $gs->setArg('-r' . round($resolution));
+               if ($format === 'jpg') {
+                       $gs->setArg('-dJPEGQ=' . $quality);
+               }
+               // Performances
+               $gs->setArg('-dNumRenderingThreads=4');
+               // Page range
+               $gs->setArg('-dFirstPage=' . $page);
+               $gs->setArg('-dLastPage=' . $page);
+               // Files
+               $gs->setArg('-sOutputFile=' . $out);
+
+               $gs->setArg(null, $in);
+               $gs->execute();
+       }
+
+       public static function makeShotPNM($in, $out, $page, $prefix = '', $resolution = 72, $quality = 90, $antialiasing = 4, $texts = true, $width = null, $height = null, $format = 'jpg') {
+               $tmp = Files::tempnam();
+
+               $antialiasing = $antialiasing ? 'yes' : 'no';
+               $freetype = $texts ? 'yes' : 'no';
+               // Exporte les fichiers
+               $pdftoppm = new CommandLine('pdftoppm', null, true);
+               $pdftoppm->setArg('f', $page);
+               $pdftoppm->setArg('l', $page);
+               $pdftoppm->setArg('-cropbox');
+               $pdftoppm->setArg('-freetype ' . $freetype);
+               $pdftoppm->setArg('-singlefile');
+               $pdftoppm->setArg('-aa ' . $antialiasing);
+               $pdftoppm->setArg('-aaVector ' . $antialiasing);
+               if (null !== $resolution) {
+                       $pdftoppm->setArg('r', $resolution);
+               }
+               if (null !== $width) {
+                       $pdftoppm->setArg('-scale-to-x ' . $width);
+               }
+               if (null !== $height) {
+                       $pdftoppm->setArg('-scale-to-y ' . $height);
+               }
+               $pdftoppm->setArg(null, $in);
+               $pdftoppm->setArg(null, $tmp);
+               $pdftoppm->execute();
+               $tmp .= '.ppm';
+
+
+               if (file_exists($tmp)) {
+                       if ($format === 'jpg') {
+                               $cjpeg = new CommandLine('cjpeg', null, true);
+                               $cjpeg->setArg('-quality ' . ($quality + 6));
+                               $cjpeg->setArg('-outfile ' . $out);
+                               $cjpeg->setArg(null, $tmp);
+                               $cjpeg->execute();
+                       } else if ($format === 'png') {
+                               $pnmtopng = new CommandLine('pnmtopng', $out, false);
+                               $pnmtopng->setArg('-background white');
+                               $pnmtopng->setArg(null, $tmp);
+                               $pnmtopng->execute();
+                       }
+                       unlink($tmp);
+               } else {
+                       $pdftoppm->debug();
+               }
+       }
+
+       public static function getThumbFromPDF($pdf, $page, $format = 'jpg') {
+               if (!file_exists($pdf)) {
+                       return false;
+               }
+               $dir = WS_CACHE . '/thumbs/' . sha1($pdf) . '/';
+               if (!file_exists($dir)) {
+                       mkdir($dir, 0777, true);
+               }
+               $image = $dir . '/p' . $page . '.' . $format;
+               $mtime = filemtime($image);
+
+               if (!file_exists($image) || $mtime < filemtime(__FILE__) || $mtime < filemtime($pdf)) {
+                       self::makeMiniShot($pdf, $image, $page, $format);
+               }
+
+               return $image;
+       }
+
+       public static function extractLinks($pdf, $out) {
+               $out .= 'links/';
+               Files::mkdir($out);
+
+               if (file_exists($out . '/p1.csv')) {
+                       return;
+               }
+               $fwstk = new FWSTK();
+               $fwstk->setArg('--input ' . $pdf);
+               $fwstk->setArg('--extractLinks ' . $out . 'p%d.csv');
+               $fwstk->setArg('--threads 1');
+               $fwstk->execute();
+       }
+
+       public static function extractTexts($pdf, $out, $textExtraction = 'fluidbook', $ignoreSeparators = '') {
+               $out .= 'texts';
+               if ($ignoreSeparators) {
+                       $out .= '/sep_' . md5($ignoreSeparators);
+               }
+               $out = Files::mkdir($out);
+
+               $fwstk = new FWSTK();
+               $fwstk->setArg('--input ' . $pdf);
+               $fwstk->setArg('--extractTexts ' . $out . '%s%d.txt');
+               $fwstk->setArg('--extractTextsMethod ' . $textExtraction);
+               $fwstk->setArg('--threads 1');
+               if ($ignoreSeparators) {
+                       $fwstk->setArg('--ignoreSeparators ' . $ignoreSeparators);
+               }
+               $fwstk->execute();
+       }
+
+
+       public static function extractHighlightsData($pdf, $out) {
+               $out .= 'texts/';
+               Files::mkdir($out);
+
+               $fwstk = new FWSTK();
+               $fwstk->setArg('--input ' . $pdf);
+               $fwstk->setArg('--layout ' . $out . 'p%d.fby');
+               $fwstk->setArg('--cmaps ' . $out);
+               $fwstk->setArg('--fonts' . $out . 'fonts/web/');
+               $fwstk->execute();
+       }
+
+       public static function fixPDF($in, $out) {
+               if (file_exists($out)) {
+                       unlink($out);
+               }
+
+               $pdftk = new CommandLine('pdftk');
+               $pdftk->setArg(null, $in);
+               $pdftk->setArg(null, 'output');
+               $pdftk->setArg(null, $out);
+               $pdftk->execute();
+
+               if (!file_exists($out)) {
+                       $pdftocairo = new CommandLine('pdftocairo');
+                       $pdftocairo->setPath(CONVERTER_PATH);
+                       $pdftocairo->setArg(null, '-pdf');
+                       $pdftocairo->setArg(null, $in);
+                       $pdftocairo->setArg(null, $out);
+                       $pdftocairo->execute();
+               }
+       }
+
+       public static function split($pdf, $out) {
+
+               $lock = $pdf . '.split.lock';
+
+               $returnAfterSleep = false;
+
+               usleep(rand(100000, 2000000));
+
+               while (file_exists($lock)) {
+                       if (filemtime($lock) < time() - 300) {
+                               unlink($lock);
+                       }
+                       $returnAfterSleep = true;
+                       sleep(5);
+               }
+               if ($returnAfterSleep) {
+                       return;
+               }
+
+               touch($lock);
+
+               try {
+                       Files::mkdir($out);
+                       $pdftk = new CommandLine('pdftk');
+                       $pdftk->setArg(null, $pdf);
+                       $pdftk->setArg(null, 'burst');
+                       $pdftk->setArg(null, 'uncompress');
+                       $pdftk->setArg(null, 'output');
+                       $pdftk->setArg(null, $out . '/p%d.pdf');
+                       $pdftk->execute();
+
+
+                       for ($i = 1; true; $i++) {
+                               // Remove annotations : https://gist.github.com/stefanschmidt/5248592
+                               $file = sprintf($out . '/p%d.pdf', $i);
+                               if (!file_exists($file)) {
+                                       break;
+                               }
+                               $to = sprintf($out . '/s%d.pdf', $i);
+                               `LANG=C LC_CTYPE=C sed -n '/^\/Annots/!p' $file > $to`;
+                               if (file_exists($to)) {
+                                       if (filesize($to) > 0) {
+                                               unlink($file);
+                                               rename($to, $file);
+                                       } else {
+                                               unlink($to);
+                                       }
+                               }
+                       }
+               } catch (\Exception $e) {
+
+               }
+               unlink($lock);
+       }
+
+       public static function compressPDF($source, $dest, $resolution = 72) {
+               $gs = new CommandLine('gs');
+               $gs->setArg('-dBATCH');
+               $gs->setArg('-dNOPAUSE');
+               $gs->setArg('-dNOPROMPT');
+               $gs->setArg('-sOutputFile=' . $dest);
+               $gs->setArg('-sDEVICE=pdfwrite');
+               $gs->setArg('-dPDFSETTINGS=/ebook');
+               $gs->setArg('-dColorImageResolution=' . $resolution);
+               $gs->setArg('-dAutoRotatePages=/None');
+               $gs->setArg('-dColorConversionStrategy=/LeaveColorUnchanged');
+               $gs->setArg(null, $source);
+               $gs->execute();
+       }
 
 }