From: Vincent Vanwaelscappel Date: Thu, 21 Dec 2023 18:42:09 +0000 (+0100) Subject: wip #6598 @2 X-Git-Url: http://git.cubedesigners.com/?a=commitdiff_plain;h=29b603e8654084d424a2e584593775a4573c20f7;p=fluidbook_processfarm.git wip #6598 @2 --- diff --git a/Dockerfile b/Dockerfile index 333570a..dbdc28f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,7 +9,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends gnupg \ && apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 4F4EA0AAE5267A6C \ && apt-get update \ && apt-get -y --no-install-recommends install x11-common pdftk poppler-utils ghostscript netpbm sshfs mupdf-tools php8.2-cli php8.2-fpm php8.2-curl php8.2-exif php8.2-xml php8.2-gd php8.2-ssh php8.2-zip libjpeg-turbo-progs zip unzip \ - less nano wget bash lynx + less nano wget bash lynx rsync COPY --from=composer:2 /usr/bin/composer /usr/bin/composer COPY --chmod=755 startup /usr/bin/startup diff --git a/app/process.php b/app/process.php index 8db4271..87eb1ab 100644 --- a/app/process.php +++ b/app/process.php @@ -1,29 +1,55 @@ process(); } - $res = $p->process(); - ob_end_clean(); + + @ob_end_clean(); die($res); } catch (Exception $e) { - ob_end_clean(); - die('!!'.$e->getMessage()); + @ob_end_clean(); + die('!!' . $e->getMessage()); } \ No newline at end of file diff --git a/app/src/Buffer.php b/app/src/Buffer.php new file mode 100644 index 0000000..a660109 --- /dev/null +++ b/app/src/Buffer.php @@ -0,0 +1,54 @@ + Files::mkdir('/tmp/out/' . sha1($path)), 'dir' => $dir]; + } + return self::$_out[$path]['local'] . $file; + } + + public static function getIn($path, $force = false) + { + self::getDirAndFile($path, $dir, $file); + if ($file) { + $e = explode('.', $file); + $ext = array_pop($e); + } + $in = Files::mkdir('/tmp/in/' . sha1($path)) . sha1($path) . '.' . $ext; + if (file_exists($path)) { + copy($path, $in); + } + return $in; + } + + public static function syncOut() + { + foreach (self::$_out as $o) { + $rsync = new Rsync($o['local'], $o['dir']); + $rsync->execute(); + } + } + + protected static function getDirAndFile($path, &$dir, &$file) + { + $path = rtrim($path, '/'); + $e = explode('/', $path); + $last = $e[count($e) - 1]; + $file = ''; + if (stristr($last, '.')) { + $file = array_pop($e); + } + $dir = implode('/', $e); + } +} \ No newline at end of file diff --git a/app/src/PDFOperations.php b/app/src/PDFOperations.php new file mode 100644 index 0000000..567f23c --- /dev/null +++ b/app/src/PDFOperations.php @@ -0,0 +1,75 @@ +setArg('--input ' . Buffer::getIn($pdf)); + $fwstk->setArg('--cut ' . $mode); + $fwstk->setArg('--output ' . Buffer::getOut($out)); + $fwstk->execute(); + + Buffer::syncOut(); + + return $out; + } + + public static function trimPDF($pdf, $out) + { + $fwstk = new FWSTK(); + $fwstk->setArg('--input ' . Buffer::getIn($pdf)); + $fwstk->setArg('--trim'); + $fwstk->setArg('--output ' . Buffer::getOut($out)); + $fwstk->execute(); + + Buffer::syncOut(); + + return $out; + } + + public static function splitPDF($pdf, $out) + { + PDFTools::split(Buffer::getIn($pdf), Buffer::getOut($out)); + Buffer::syncOut(); + return $out; + } + + /** + * @throws \Exception + */ + public static function extractTexts($pdf, $out, $mode, $method, $ignoreSeparators) + { + PDFTools::extractTexts(Buffer::getIn($pdf), $out, $mode, $method, $ignoreSeparators); + Buffer::syncOut(); + return $out; + } + + + public static function extractHighlightsData($pdf, $out, $mode, $ignoreSeparators) + { + PDFTools::extractHighlightsData(Buffer::getIn($pdf), $out, $mode, $ignoreSeparators); + Buffer::syncOut(); + return $out; + } + + public static function extractLinks($pdf, $out) + { + PDFTools::extractLinks(Buffer::getIn($pdf), $out); + Buffer::syncOut(); + return $out; + } + +} \ No newline at end of file diff --git a/app/src/ProcessFile.php b/app/src/ProcessFile.php index ca89658..6a252bd 100644 --- a/app/src/ProcessFile.php +++ b/app/src/ProcessFile.php @@ -113,10 +113,12 @@ class ProcessFile extends \Fluidbook\Tools\Jobs\ProcessFile public function getOut() { if ($this->getRegion() === 'UE') { - return str_replace(self::DISTANT_ROOT, self::LOCAL_ROOT, $this->getDistantOut()); + $res = str_replace(self::DISTANT_ROOT, self::LOCAL_ROOT, $this->getDistantOut()); } else { - return str_replace(self::US_DISTANT_ROOT, self::US_LOCAL_ROOT, $this->getDistantOut()); + $res = str_replace(self::US_DISTANT_ROOT, self::US_LOCAL_ROOT, $this->getDistantOut()); } + + return Buffer::getOut($res); } /** @@ -146,10 +148,12 @@ class ProcessFile extends \Fluidbook\Tools\Jobs\ProcessFile public function process() { if ($this->getRegion() === 'UE') { - return str_replace(self::LOCAL_ROOT, self::DISTANT_ROOT, $this->getPath($this->isForce())); + $res = str_replace(self::LOCAL_ROOT, self::DISTANT_ROOT, $this->getPath($this->isForce())); } else { - return str_replace(self::US_LOCAL_ROOT, self::US_DISTANT_ROOT, $this->getPath($this->isForce())); + $res = str_replace(self::US_LOCAL_ROOT, self::US_DISTANT_ROOT, $this->getPath($this->isForce())); } + Buffer::syncOut(); + return $res; } public function getPdf(): string|null @@ -185,8 +189,10 @@ class ProcessFile extends \Fluidbook\Tools\Jobs\ProcessFile if ($this->getPage() > $this->getPDFPagesNumber()) { throw new \Exception('PDF document has ' . $this->getPDFPagesNumber() . ' pages and you are requesting the page ' . $this->getPage()); } - PDFTools::split($this->getPdf(), $this->getOut() . '/pdf'); + PDFOperations::splitPDF($this->getPdf(), $this->getOut() . '/pdf'); } - return $res; + return Buffer::getIn($res); } + + } \ No newline at end of file