use Cubist\PDF\CommandLine\FWSTK;
use Cubist\PDF\PDFTools;
use Cubist\Util\Gzip;
+use Cubist\Util\Math;
use Illuminate\Support\Facades\Cache;
class FluidbookDocument extends ToolboxModel
'plural' => 'documents'];
protected static $_permissionBase = 'fluidbook-document';
protected $casts = ['bookmarks' => 'array', 'pdf_data' => 'array', 'file_data' => 'array'];
+
+ protected $manualcut = false;
+ protected $autocut = false;
+ protected $manualcrop = false;
+ protected $autocrop = false;
+
+ protected $processSync = true;
+
const TEXT_PLAIN = 'p';
const TEXT_INDEX = 'i';
const TEXT_HTML = 'h';
public function processUpload($uploadID, $sync = false)
{
- FluidbookDocumentUpload::updateProgression($uploadID, __('Analyse du document'), 1.1);
- $this->checkInfos();
- FluidbookDocumentUpload::updateProgression($uploadID, __('Nettoyage du document'), 1.2);
+ $this->processSync = $sync;
+ $this->updateProgression($uploadID, __('Nettoyage du document'), 1.3);
$this->fixPDF();
- FluidbookDocumentUpload::updateProgression($uploadID, __('Découpe du document'), 1.3);
+ $this->updateProgression($uploadID, __('Découpe du document'), 1.4);
+ $this->cropAndCut();
+ $this->updateProgression($uploadID, __('Analyse du document'), 1.5);
+ $this->checkInfos(true);
+ $this->updateProgression($uploadID, __('Séparation des pages'), 1.6);
$this->splitPDF();
- FluidbookDocumentUpload::updateProgression($uploadID, __('Extraction des textes'), 1.75);
+ $this->updateProgression($uploadID, __('Extraction des textes'), 1.75);
$this->extractTexts();
- FluidbookDocumentUpload::updateProgression($uploadID, __('Extraction des liens'), 1.9);
+ $this->updateProgression($uploadID, __('Extraction des liens'), 1.9);
$this->extractLinks();
- FluidbookDocumentUpload::updateProgression($uploadID, __('Conversion des pages'), 2);
+ $this->updateProgression($uploadID, __('Conversion des pages'), 2);
$jobs = [];
$files = [
$delay = 0;
for ($i = 1; $i <= $this->pages; $i++) {
foreach ($files as $file) {
- $job = new FluidbookDocumentFileProcess($this, $i, $file[0], $file[1], $file[2], $file[3]);
+ $job = new FluidbookDocumentFileProcess($this, $i, $file[0] ?? 'jpg', $file[1] ?? 150, $file[2] ?? true, $file[3] ?? true);
if ($sync) {
dispatch_sync($job);
} else {
}
}
+ public function echoStatus($id)
+ {
+ if (!$this->processSync) {
+ return;
+ }
+ $p = FluidbookDocumentUpload::getProgression($id);
+ echo $p['progress'] . ' | ' . $p['message'] . "\n";
+ }
+
public function extractTexts()
{
- PDFTools::extractTexts($this->path('original.pdf'), $this->path(), 'fluidbook', '');
- PDFTools::extractHighlightsData($this->path('original.pdf'), $this->path());
+ PDFTools::extractTexts($this->getPDFSource(), $this->path(), 'fluidbook', '');
+ PDFTools::extractHighlightsData($this->getPDFSource(), $this->path());
}
public function extractLinks()
{
- PDFTools::extractLinks($this->path('original.pdf'), $this->path());
+ PDFTools::extractLinks($this->getPDFSource(), $this->path());
}
protected function _checkJobs($uploadID, $jobs, $nbfiles)
{
+ $this->checkInfos();
$nbjobs = $nbfiles * $this->pages;
-
$done = 0;
foreach ($jobs as $job) {
/** @var $job FluidbookDocumentFileProcess */
}
$progress = $done / $nbjobs;
if ($progress === 1) {
- FluidbookDocumentUpload::updateProgression($uploadID, __('Conversion terminée'), 3);
+ $this->updateProgression($uploadID, __('Conversion terminée'), 3);
} else {
- FluidbookDocumentUpload::updateProgression($uploadID, __('Conversion des pages (:done/:pages)', ['pages' => $this->pages, 'done' => round($done / $nbfiles)]), 2 + $progress);
+ $this->updateProgression($uploadID, __('Conversion des pages (:done/:pages)', ['pages' => $this->pages, 'done' => round($done / $nbfiles)]), 2 + $progress);
}
return $progress;
}
return 620 / $this->pdf_data['size'][0];
}
- public function checkInfos()
+ public function checkInfos($force = false)
{
- if (null === $this->pdf_data) {
- $infos = PDFTools::infos($this->path('original.pdf'));
- $this->pages = $infos['pages'];
- $this->pdf_data = $infos['infos'];
- $this->bookmarks = $infos['bookmarks'];
+ if ($force || null === $this->pdf_data) {
+ $infos = $this->_getInfos();
+ $this->pages = $infos->pages;
+ $this->pdf_data = $infos->pdf_data;
+ $this->bookmarks = $infos->bookmarks;
$this->saveWithoutFlushingCache();
}
}
+ protected function _getInfos($type = 'crop')
+ {
+ $res = new \stdClass();
+ $infos = PDFTools::infos($this->getPDFSource($type));
+ $res->pages = $infos['pages'];
+ $res->pdf_data = $infos['infos'];
+ $res->bookmarks = $infos['bookmarks'];
+ return $res;
+ }
+
public function fixPDF()
{
- $fixed = $this->path('fixed.pdf');
- $original = $this->path('original.pdf');
- $crop = $this->path('crop.pdf');
+ $fixed = $this->getPDFSource('fixed');
+ $original = $this->getPDFSource('original');
if (!file_exists($fixed) || filesize($fixed) === 0 || filemtime($fixed) < filemtime($original)) {
PDFTools::fixPDF($original, $fixed);
}
- if (!file_exists($crop)) {
- `ln -s $fixed $crop`;
- }
return $fixed;
}
public function splitPDF()
{
- PDFTools::split($this->fixPDF(), $this->path('pdf'));
+ PDFTools::split($this->getPDFSource(), $this->path('pdf'));
}
/**
{
$path = $this->_getTextPath($page, $type, $extractionMethod, $ignoreSeparators);
if (!$this->_checkTextFile($path)) {
- PDFTools::extractTexts($this->path('original.pdf'), $this->path(), $extractionMethod, $ignoreSeparators);
+ PDFTools::extractTexts($this->getPDFSource(), $this->path(), $extractionMethod, $ignoreSeparators);
if (!Gzip::file_exists($path)) {
throw new \Exception('An error occured while producing file ' . $path);
}
{
$path = $this->_getHightlightFilePath($page);
if (!$this->_checkTextFile($path)) {
- PDFTools::extractHighlightsData($this->path('original.pdf'), $this->path());
+ PDFTools::extractHighlightsData($this->getPDFSource(), $this->path());
if (!Gzip::file_exists($path)) {
throw new \Exception('An error occured while producing file ' . $path);
}
return $this->path($type . '.pdf');
}
+ public function cutDocument($mode)
+ {
+ $fwstk = new FWSTK();
+ $fwstk->setArg('--input ' . $this->getPDFSource('fixed'));
+ $fwstk->setArg('--cut ' . $mode);
+ $fwstk->setArg('--output ' . $this->getPDFSource());
+ $fwstk->execute();
+ }
+
+ public function trimDocument()
+ {
+ $fwstk = new FWSTK();
+ $fwstk->setArg('--input ' . $this->getPDFSource('fixed'));
+ $fwstk->setArg('--trim');
+ $fwstk->setArg('--output ' . $this->getPDFSource());
+ $fwstk->execute();
+ }
+
+ public function lnCrop()
+ {
+ $root = $this->path();
+ `cd $root;ln -s fixed.pdf crop.pdf`;
+ }
+
+ protected function isCropped()
+ {
+ return $this->autocrop || $this->manualcrop || $this->autocut || $this->manualcut;
+ }
+
+ public function cropAndCut()
+ {
+ $infos = $this->_getInfos('fixed');
+
+ $this->detectSpreads($infos);
+ $this->detectPageDifferences($infos);
+
+ if (!$this->isCropped()) {
+ $this->lnCrop();
+ return false;
+ }
+ if ($this->autocut) {
+ $this->cutDocument($this->autocut);
+ return true;
+ }
+ if ($this->autocrop == 'trim') {
+ $this->trimDocument();
+ }
+ return false;
+ }
+
+ protected function detectPageDifferences($pagesInfos)
+ {
+ // Vérifie si la cropbox et la trimbox sont identiques pour toutes les pages
+ $difference = false;
+ foreach ($pagesInfos->pdf_data['page'] as $page => $infos) {
+ if (!isset($infos['crop']) || !isset($infos['crop'])) {
+ continue;
+ }
+ if ($infos['crop'] != $infos['trim']) {
+ $difference = true;
+ }
+ }
+ if (!$difference) {
+ return false;
+ }
+ // Vérifie si la trimbox définie toutes les pages de la même taille
+ $heights = array();
+ $widths = array();
+ foreach ($pagesInfos->pdf_data['page'] as $page => $infos) {
+ $heights[] = round($infos['trim']->height);
+ $widths[] = round($infos['trim']->width);
+ }
+ $heights = array_unique($heights);
+ $widths = array_unique($widths);
+ if (count($heights) == 1 && count($widths) == 1) {
+ $this->autocrop = 'trim';
+ $this->manualcrop = false;
+ } else {
+ $this->autocrop = false;
+ $this->manualcrop = true;
+ }
+ }
+
+ protected function detectSpreads($pagesInfos)
+ {
+ // Détection des spreads
+ $this->autocut = false;
+ $this->manualcut = false;
+ if ($pagesInfos->pages <= 2) {
+ return;
+ }
+
+ foreach ($pagesInfos->pdf_data['page'] as $page => $infos) {
+ if ($page == 1) {
+ $first = $infos['size'];
+ } elseif ($page == $pagesInfos->pages) {
+ $last = $infos['size'];
+ } elseif ($page == 2) {
+ $second = $infos['size'];
+ }
+ }
+
+ if ($first == $last && $last == $second) {
+ $ratio = $first[0] / $first[1];
+ $this->autocut = false;
+ if ($ratio <= 1) {
+ $this->manualcut = false;
+ } elseif ($ratio >= 6) {
+ $this->manualcut = 'L8';
+ } elseif ($ratio >= 3) {
+ $this->manualcut = 'L4';
+ } elseif ($ratio >= 2) {
+ $this->manualcut = 'L3';
+ } else {
+ $this->manualcut = '14-23';
+ }
+ return;
+ }
+ $this->manualcut = false;
+ if (self::compareSizes($last, $first) && Math::compare($first[0] * 2, $second[0], 0.9)) {
+ $this->autocut = '1-23-4';
+ }
+ if (Math::compare($first[0] * 2, $second[0], 0.9) && self::compareSizes($last, $second)) {
+ $this->autocut = '1-23';
+ }
+ }
+
+ public static function compareSizes($x, $y, $tolerance = 0.9)
+ {
+ return Math::compare($x[0], $y[0], $tolerance) && Math::compare($x[1], $y[1], $tolerance);
+ }
+
+ public function clearAllFiles()
+ {
+ $cmd = 'cd ' . $this->path() . ';find . ! -name \'original.pdf\' ! -name \'.\' ! -name \'..\' -exec rm -rf {} +';
+ `$cmd`;
+ }
+
+ protected function updateProgression($uploadID, $message, $progress)
+ {
+ FluidbookDocumentUpload::updateProgression($uploadID, $message, $progress);
+ $this->echoStatus($uploadID);
+ }
+
}