&& apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 4F4EA0AAE5267A6C \
&& apt-get update \
&& apt-get -y --no-install-recommends install x11-common pdftk poppler-utils ghostscript netpbm sshfs mupdf-tools php8.2-cli php8.2-fpm php8.2-curl php8.2-exif php8.2-xml php8.2-gd php8.2-ssh php8.2-zip libjpeg-turbo-progs zip unzip \
- less nano wget bash lynx
+ less nano wget bash lynx rsync
COPY --from=composer:2 /usr/bin/composer /usr/bin/composer
COPY --chmod=755 startup /usr/bin/startup
<?php
+use Fluidbook\Farmer\PDFOperations;
use Fluidbook\Farmer\ProcessFile;
use Fluidbook\Farmer\ProcessToolboxDocumentFile;
use Fluidbook\Farmer\ProcessToolboxPDFFile;
require_once __DIR__ . "/vendor/autoload.php";
+if (isset($argv)) {
+ parse_str(implode('&', array_slice($argv, 1)), $_POST);
+}
+
if (!isset($_POST['quality'])) {
$_POST['quality'] = 85;
}
+if (!isset($_POST['operation'])) {
+ $_POST['operation'] = 'unknown';
+}
+
try {
- if (!isset($_POST['pdf'])) {
- if (isset($_POST['toolbox'])) {
- $p = new ProcessToolboxDocumentFile($_POST['out'], $_POST['page'], $_POST['resolutionRatio'], $_POST['mobileRatio'], $_POST['format'], $_POST['resolution'], (int)$_POST['quality'], $_POST['withGraphics'], $_POST['withText'], $_POST['version'], $_POST['force']);
+ if ($_POST['operation'] === 'fixpdf') {
+ $res = PDFOperations::fixPDF($_POST['pdf'], $_POST['out']);
+ } else if ($_POST['operation'] === 'cutpdf') {
+ $res = PDFOperations::cutPDF($_POST['pdf'], $_POST['mode'], $_POST['out']);
+ } else if ($_POST['operation'] === 'trimpdf') {
+ $res = PDFOperations::trimPDF($_POST['pdf'], $_POST['out']);
+ } else if ($_POST['operation'] === 'splitpdf') {
+ $res = PDFOperations::splitPDF($_POST['pdf'], $_POST['out']);
+ } else if ($_POST['operation'] === 'extractlinks') {
+ $res = PDFOperations::extractLinks($_POST['pdf'], $_POST['out']);
+ } else if ($_POST['operation'] === 'extracttexts') {
+ $res = PDFOperations::extractTexts($_POST['pdf'], $_POST['out'], $_POST['mode'] ?? 'standard', $_POST['method'] ?? 'fluidbook', $_POST['ignoreseparators'] ?? '');
+ } else if ($_POST['operation'] === 'extracthightlightsdata') {
+ $res = PDFOperations::extractHighlightsData($_POST['pdf'], $_POST['out'], $_POST['mode'], $_POST['ignoreseparators']);
+ } else {
+ if (!isset($_POST['pdf'])) {
+ if (isset($_POST['toolbox'])) {
+ $p = new ProcessToolboxDocumentFile($_POST['out'], $_POST['page'], $_POST['resolutionRatio'], $_POST['mobileRatio'], $_POST['format'], $_POST['resolution'], (int)$_POST['quality'], $_POST['withGraphics'], $_POST['withText'], $_POST['version'], $_POST['force']);
+ } else {
+ $p = new ProcessFile($_POST['out'], $_POST['page'], $_POST['resolutionRatio'], $_POST['mobileRatio'], $_POST['format'], $_POST['resolution'], (int)$_POST['quality'], $_POST['withGraphics'], $_POST['withText'], $_POST['version'], $_POST['force']);
+ }
} else {
- $p = new ProcessFile($_POST['out'], $_POST['page'], $_POST['resolutionRatio'], $_POST['mobileRatio'], $_POST['format'], $_POST['resolution'], (int)$_POST['quality'], $_POST['withGraphics'], $_POST['withText'], $_POST['version'], $_POST['force']);
+ $p = new ProcessToolboxPDFFile($_POST['pdf'], $_POST['out'], $_POST['page'], $_POST['format'], $_POST['resolution'], (int)$_POST['quality'], $_POST['withGraphics'], $_POST['withText'], $_POST['version'], $_POST['force']);
}
- } else {
- $p = new ProcessToolboxPDFFile($_POST['pdf'], $_POST['out'], $_POST['page'], $_POST['format'], $_POST['resolution'], (int)$_POST['quality'], $_POST['withGraphics'], $_POST['withText'], $_POST['version'], $_POST['force']);
+ $res = $p->process();
}
- $res = $p->process();
- ob_end_clean();
+
+ @ob_end_clean();
die($res);
} catch (Exception $e) {
- ob_end_clean();
- die('!!'.$e->getMessage());
+ @ob_end_clean();
+ die('!!' . $e->getMessage());
}
\ No newline at end of file
--- /dev/null
+<?php
+
+namespace Fluidbook\Farmer;
+
+use Cubist\Util\CommandLine\Rsync;
+use Cubist\Util\Files\Files;
+
+class Buffer
+{
+ protected static $_out = [];
+
+ public static function getOut($path)
+ {
+ self::getDirAndFile($path, $dir, $file);
+ if (!isset(self::$_out[$path])) {
+ self::$_out[$path] = ['local' => Files::mkdir('/tmp/out/' . sha1($path)), 'dir' => $dir];
+ }
+ return self::$_out[$path]['local'] . $file;
+ }
+
+ public static function getIn($path, $force = false)
+ {
+ self::getDirAndFile($path, $dir, $file);
+ if ($file) {
+ $e = explode('.', $file);
+ $ext = array_pop($e);
+ }
+ $in = Files::mkdir('/tmp/in/' . sha1($path)) . sha1($path) . '.' . $ext;
+ if (file_exists($path)) {
+ copy($path, $in);
+ }
+ return $in;
+ }
+
+ public static function syncOut()
+ {
+ foreach (self::$_out as $o) {
+ $rsync = new Rsync($o['local'], $o['dir']);
+ $rsync->execute();
+ }
+ }
+
+ protected static function getDirAndFile($path, &$dir, &$file)
+ {
+ $path = rtrim($path, '/');
+ $e = explode('/', $path);
+ $last = $e[count($e) - 1];
+ $file = '';
+ if (stristr($last, '.')) {
+ $file = array_pop($e);
+ }
+ $dir = implode('/', $e);
+ }
+}
\ No newline at end of file
--- /dev/null
+<?php
+
+namespace Fluidbook\Farmer;
+
+use Cubist\PDF\CommandLine\FWSTK;
+use Cubist\PDF\PDFTools;
+
+class PDFOperations
+{
+ public static function fixPDF($pdf, $out)
+ {
+ PDFTools::fixPDF(Buffer::getIn($pdf), Buffer::getOut($out));
+ Buffer::syncOut();
+ return $out;
+ }
+
+ public static function cutPDF($pdf, $mode, $out)
+ {
+ $fwstk = new FWSTK();
+ $fwstk->setArg('--input ' . Buffer::getIn($pdf));
+ $fwstk->setArg('--cut ' . $mode);
+ $fwstk->setArg('--output ' . Buffer::getOut($out));
+ $fwstk->execute();
+
+ Buffer::syncOut();
+
+ return $out;
+ }
+
+ public static function trimPDF($pdf, $out)
+ {
+ $fwstk = new FWSTK();
+ $fwstk->setArg('--input ' . Buffer::getIn($pdf));
+ $fwstk->setArg('--trim');
+ $fwstk->setArg('--output ' . Buffer::getOut($out));
+ $fwstk->execute();
+
+ Buffer::syncOut();
+
+ return $out;
+ }
+
+ public static function splitPDF($pdf, $out)
+ {
+ PDFTools::split(Buffer::getIn($pdf), Buffer::getOut($out));
+ Buffer::syncOut();
+ return $out;
+ }
+
+ /**
+ * @throws \Exception
+ */
+ public static function extractTexts($pdf, $out, $mode, $method, $ignoreSeparators)
+ {
+ PDFTools::extractTexts(Buffer::getIn($pdf), $out, $mode, $method, $ignoreSeparators);
+ Buffer::syncOut();
+ return $out;
+ }
+
+
+ public static function extractHighlightsData($pdf, $out, $mode, $ignoreSeparators)
+ {
+ PDFTools::extractHighlightsData(Buffer::getIn($pdf), $out, $mode, $ignoreSeparators);
+ Buffer::syncOut();
+ return $out;
+ }
+
+ public static function extractLinks($pdf, $out)
+ {
+ PDFTools::extractLinks(Buffer::getIn($pdf), $out);
+ Buffer::syncOut();
+ return $out;
+ }
+
+}
\ No newline at end of file
public function getOut()
{
if ($this->getRegion() === 'UE') {
- return str_replace(self::DISTANT_ROOT, self::LOCAL_ROOT, $this->getDistantOut());
+ $res = str_replace(self::DISTANT_ROOT, self::LOCAL_ROOT, $this->getDistantOut());
} else {
- return str_replace(self::US_DISTANT_ROOT, self::US_LOCAL_ROOT, $this->getDistantOut());
+ $res = str_replace(self::US_DISTANT_ROOT, self::US_LOCAL_ROOT, $this->getDistantOut());
}
+
+ return Buffer::getOut($res);
}
/**
public function process()
{
if ($this->getRegion() === 'UE') {
- return str_replace(self::LOCAL_ROOT, self::DISTANT_ROOT, $this->getPath($this->isForce()));
+ $res = str_replace(self::LOCAL_ROOT, self::DISTANT_ROOT, $this->getPath($this->isForce()));
} else {
- return str_replace(self::US_LOCAL_ROOT, self::US_DISTANT_ROOT, $this->getPath($this->isForce()));
+ $res = str_replace(self::US_LOCAL_ROOT, self::US_DISTANT_ROOT, $this->getPath($this->isForce()));
}
+ Buffer::syncOut();
+ return $res;
}
public function getPdf(): string|null
if ($this->getPage() > $this->getPDFPagesNumber()) {
throw new \Exception('PDF document has ' . $this->getPDFPagesNumber() . ' pages and you are requesting the page ' . $this->getPage());
}
- PDFTools::split($this->getPdf(), $this->getOut() . '/pdf');
+ PDFOperations::splitPDF($this->getPdf(), $this->getOut() . '/pdf');
}
- return $res;
+ return Buffer::getIn($res);
}
+
+
}
\ No newline at end of file