From eda40274a846a4d1dfcc789282e20b2d809455ef Mon Sep 17 00:00:00 2001 From: Vincent Vanwaelscappel Date: Mon, 22 Aug 2022 22:13:47 +0200 Subject: [PATCH] wip #5410 @0.5 --- app/Fluidbook/SEO/Document.php | 74 +++++++++++++---------------- app/Jobs/FluidbookCompiler.php | 19 ++++++-- app/Models/FluidbookDocument.php | 38 ++++++++++++++- app/Models/FluidbookPublication.php | 6 +++ composer.lock | 30 ++++++------ 5 files changed, 106 insertions(+), 61 deletions(-) diff --git a/app/Fluidbook/SEO/Document.php b/app/Fluidbook/SEO/Document.php index b99a7d676..a6c274cc6 100644 --- a/app/Fluidbook/SEO/Document.php +++ b/app/Fluidbook/SEO/Document.php @@ -2,15 +2,16 @@ namespace App\Fluidbook\SEO; +use App\Jobs\FluidbookCompiler; +use App\Models\FluidbookDocument; use App\Util\Excel; use Cubist\Util\Text; -use Fluidbook\Tools\Compiler\CompilerInterface; class Document { /** - * @var CompilerInterface + * @var FluidbookCompiler */ public $compiler; /** @@ -22,7 +23,7 @@ class Document /** - * @param $compiler CompilerInterface + * @param $compiler FluidbookCompiler */ public function __construct($compiler) { @@ -30,7 +31,7 @@ class Document // Define default seo contents $this->createPages(); - $file=$this->compiler->getSetting('seoAdvanced',''); + $file = $this->compiler->getSetting('seoAdvanced', ''); if ($file) { $sheets = Excel::excelToArray($this->compiler->working_path($file), true); foreach ($sheets as $sheet) { @@ -55,7 +56,7 @@ class Document } } - $this->html = file_get_contents($this->compiler->assets . '/_index.html'); + $this->html = file_get_contents($this->compiler->source_path('/_index.html')); } public function createPages() @@ -74,70 +75,59 @@ class Document return; } foreach ($this->compiler->pages as $page => $infos) { - $this->createPage($page, $this->getTextContent($infos)); + $this->createPage($page, $this->getTextContent($page)); } } public function createPage($page, $content) { + $title = $this->compiler->getSetting('title'); + $seoDescription = $this->compiler->getSetting('seoDescription', ''); + $p = new Page($this); $p->page = $page; - $p->title = ($page == 1) ? $this->compiler->book->parametres->title : $this->_getPageLabel($page); + $p->title = ($page == 1) ? $title : $this->_getPageLabel($page); $p->text = $content; - $p->description = $this->compiler->book->parametres->seoDescription ? $this->compiler->book->parametres->seoDescription : $this->compiler->book->parametres->title . ' - Powered by Fluidbook'; - $p->socialDescription = $this->compiler->book->parametres->facebook_description || $this->compiler->book->parametres->seoDescription; - $p->keywords = $this->compiler->book->parametres->seoKeywords; - $p->robots = $this->compiler->book->parametres->seoRobots ? 'index,follow' : 'noindex,nofollow'; + $p->description = $seoDescription ?: $title . ' - Powered by Fluidbook'; + $p->socialDescription = $this->compiler->getSetting('facebook_description', '') || $seoDescription; + $p->keywords = $this->compiler->getSetting('seoKeywords', ''); + $p->robots = $this->compiler->getSetting('seoRobots', true) ? 'index,follow' : 'noindex,nofollow'; // Google analytics + $ga = $this->compiler->getSetting('googleAnalytics', ''); + $gacustom = $this->compiler->getSetting('googleAnalyticsCustom', ''); + $statsCustom = $this->compiler->getSetting('statsCustom', ''); + $p->ua = ''; - if ($this->compiler->book->parametres->googleAnalytics != '') { - $codes = explode(',', $this->compiler->book->parametres->googleAnalytics); + if ($ga != '') { + $codes = explode(',', $ga); $p->ua .= ""; } - if ($this->compiler->book->parametres->googleAnalyticsCustom != '') { - $p->ua .= $this->compiler->book->parametres->googleAnalyticsCustom; + if ($gacustom) { + $p->ua .= $gacustom; } - - if ($this->compiler->book->parametres->statsCustom != '') { - $p->footer = $this->compiler->book->parametres->statsCustom; + if ($statsCustom) { + $p->footer = $statsCustom; } - $this->pages[$page] = $p; } - public function getTextContent($infos) + public function getTextContent($page) { - switch ($this->compiler->book->parametres->textExtraction) { - case 'poppler': - $prefix = 'p'; - break; - case 'fluidbook': - $prefix = 'f'; - break; - default: - $prefix = ''; - break; - } + $f = $this->compiler->getFluidbook()->getTextFile($page, FluidbookDocument::TEXT_HTML); + $res = trim(file_get_contents($f)); - $f = wsDocument::getDir($infos['document_id']) . $prefix . 'h' . $infos['document_page'] . '.txt'; - if (file_exists($f)) { - $res = trim(file_get_contents($f)); - } else { - $res = ''; - } - - if (strpos($res, '
') !== 0) { + if (!str_starts_with($res, '
')) { $res = preg_replace('|\<\/div\>$|', '', $res); } - return $res; } public function _getPageLabel($page) { - $res = $this->compiler->book->parametres->title; - foreach ($this->compiler->book->chapters as $chapter) { + $res = $this->compiler->getSetting('title'); + dd($this->compiler->getSetting('chapters')); + foreach ($this->compiler->getSetting('chapters') as $chapter) { if (trim($chapter->label, "\t\r\n\0\x0B-+") == '') { continue; } diff --git a/app/Jobs/FluidbookCompiler.php b/app/Jobs/FluidbookCompiler.php index 423f260ec..d5baadb09 100644 --- a/app/Jobs/FluidbookCompiler.php +++ b/app/Jobs/FluidbookCompiler.php @@ -2,6 +2,7 @@ namespace App\Jobs; +use App\Fluidbook\SEO\Document; use App\Models\FluidbookPublication; use App\Models\FluidbookTheme; use App\Models\FluidbookTranslate; @@ -17,7 +18,6 @@ use Cubist\Util\Files\VirtualDirectory; use Cubist\Util\Graphics\Color; use Cubist\Util\Graphics\Image; use Cubist\Util\Graphics\Resizer; -use Cubist\Util\ObjectUtil; use Cubist\Util\PHP; use Cubist\Util\Text; use Cubist\Util\Url; @@ -1137,7 +1137,7 @@ class FluidbookCompiler extends Base implements CompilerInterface return $this->linkScale; } - public function virtualToPhysical($virtual) + public function virtualToPhysical($virtual): string|int { if (isset($this->pageLabels[$virtual])) { return $virtual; @@ -1943,7 +1943,7 @@ height="0" width="0" style="display:none;visibility:hidden"> } $this->vdir->file_put_contents('p/' . $seoArticle['url'], $html); } - $this->seo = new wsHTML5Seo($this); + $this->seo = new Document($this); } public function addContentLock($page, $unlockConditions = '') @@ -3998,4 +3998,17 @@ height="0" width="0" style="display:none;visibility:hidden"> } } + public function source_path($path = ''): string + { + $res = $this->assets; + if ($path) { + $res .= '/' . $path; + } + return $res; + } + + public function page_path($page, $path = ''): string + { + // TODO: Implement page_path() method. + } } diff --git a/app/Models/FluidbookDocument.php b/app/Models/FluidbookDocument.php index f95777fa7..6d0188ba1 100644 --- a/app/Models/FluidbookDocument.php +++ b/app/Models/FluidbookDocument.php @@ -9,6 +9,7 @@ use App\Util\FluidbookFarm; use Cubist\Backpack\Magic\Fields\Integer; use Cubist\Backpack\Magic\Fields\Text; use Cubist\Backpack\Magic\Fields\Textarea; +use Cubist\PDF\CommandLine\FWSTK; use Cubist\PDF\PDFTools; use Illuminate\Support\Facades\Cache; @@ -21,6 +22,9 @@ class FluidbookDocument extends ToolboxModel 'plural' => 'documents']; protected static $_permissionBase = 'fluidbook-document'; protected $casts = ['bookmarks' => 'array', 'pdf_data' => 'array', 'file_data' => 'array']; + const TEXT_PLAIN = 'p'; + const TEXT_INDEX = 'i'; + const TEXT_HTML = 'h'; public function setFields() { @@ -87,7 +91,7 @@ class FluidbookDocument extends ToolboxModel public function extractTexts() { - PDFTools::extractTexts($this->path('original.pdf'), $this->path(), '', false, true); + PDFTools::extractTexts($this->path('original.pdf'), $this->path(), 'fluidbook', ''); PDFTools::extractHighlightsData($this->path('original.pdf'), $this->path()); } @@ -284,4 +288,36 @@ class FluidbookDocument extends ToolboxModel return $path; } + /** + * @throws \Exception + */ + public function getTextFile($page, $type = self::TEXT_PLAIN, $extractionMethod = 'fluidbook', $ignoreSeparators = '') + { + $path = $this->_getTextPath($page, $type, $extractionMethod, $ignoreSeparators); + if (!file_exists($path)) { + PDFTools::extractTexts($this->path('original.pdf'), $this->path(), $extractionMethod, $ignoreSeparators); + if (!file_exists($path)) { + throw new \Exception('An error occured while producing file ' . $path); + } + } + return $path; + } + + protected static function _getTextFilePrefix($textExtraction) + { + $map = ['poppler' => 'p', 'fluidbook' => 'f', 'pdfbox' => '']; + return $map[$textExtraction]; + } + + public function _getTextPath($page, $type = self::TEXT_PLAIN, $extractionMethod = 'fluidbook', $ignoreSeparators = '') + { + $sepFolder = ''; + if ($ignoreSeparators) { + $sepFolder = 'sep_' . md5($ignoreSeparators) . '/'; + } + + $prefix = self::_getTextFilePrefix($extractionMethod); + return $this->path('texts/' . $sepFolder . $prefix . $type . $page . '.txt'); + } + } diff --git a/app/Models/FluidbookPublication.php b/app/Models/FluidbookPublication.php index 666e7a269..8b0870548 100644 --- a/app/Models/FluidbookPublication.php +++ b/app/Models/FluidbookPublication.php @@ -174,6 +174,12 @@ class FluidbookPublication extends ToolboxSettingsModel return self::$_docs[$documentID]; } + public function getTextFile($page, $type = FluidbookDocument::TEXT_PLAIN) + { + $compo = $this->composition[$page]; + return self::_getDocument($compo[0])->getTextFile($compo[1], $type, $this->textExtraction, $this->ignoreSearchSeparators); + } + public function getFile($page, $format = 'jpg', $resolution = 150, $withText = true, $withGraphics = true, $version = 'html', $force = false) { diff --git a/composer.lock b/composer.lock index ec420ff48..95b33fbfa 100644 --- a/composer.lock +++ b/composer.lock @@ -2039,13 +2039,13 @@ "source": { "type": "git", "url": "git://git.cubedesigners.com/cubist_pdf.git", - "reference": "a9776fd1cf4698352dfe7421aafc53ed0b4c1d6e" + "reference": "3de1229df9751a411d0c5cb5d85fcd4889a85826" }, "dist": { "type": "tar", - "url": "https://composer.cubedesigners.com/dist/cubist/pdf/cubist-pdf-dev-master-7c7ca6.tar", - "reference": "a9776fd1cf4698352dfe7421aafc53ed0b4c1d6e", - "shasum": "af35bdf43a35c2b9ca7420d0672289b586f5593b" + "url": "https://composer.cubedesigners.com/dist/cubist/pdf/cubist-pdf-dev-master-ba9dfa.tar", + "reference": "3de1229df9751a411d0c5cb5d85fcd4889a85826", + "shasum": "8ae87c50b61ad69fe6d61390a2280823edccb6b0" }, "require": { "cubist/util": "dev-master", @@ -2081,7 +2081,7 @@ "cubist", "pdf" ], - "time": "2022-08-22T14:43:58+00:00" + "time": "2022-08-22T20:03:01+00:00" }, { "name": "cubist/scorm", @@ -2126,13 +2126,13 @@ "source": { "type": "git", "url": "git://git.cubedesigners.com/cubist_util.git", - "reference": "5f746d46b5892cf178f25ad8a2ba1e3d2cba0568" + "reference": "bbc6a1884edcdc5a700a3686ca719adfcb0d7d11" }, "dist": { "type": "tar", - "url": "https://composer.cubedesigners.com/dist/cubist/util/cubist-util-dev-master-19118f.tar", - "reference": "5f746d46b5892cf178f25ad8a2ba1e3d2cba0568", - "shasum": "930fe6d64099ab08dd7a1950d2c1ec20647ef9fb" + "url": "https://composer.cubedesigners.com/dist/cubist/util/cubist-util-dev-master-655736.tar", + "reference": "bbc6a1884edcdc5a700a3686ca719adfcb0d7d11", + "shasum": "b64ade6c61fcb2adb718dae612333897de215ce9" }, "require": { "cubist/net": "dev-master", @@ -2163,7 +2163,7 @@ } ], "description": "Utilities class", - "time": "2022-08-22T14:49:19+00:00" + "time": "2022-08-22T18:08:07+00:00" }, { "name": "cviebrock/eloquent-sluggable", @@ -3415,13 +3415,13 @@ "source": { "type": "git", "url": "git://git.cubedesigners.com/fluidbook_tools.git", - "reference": "d5e3e0fc0176fdb7764669032f02ad1bf638a9d2" + "reference": "d147c7ef909706a9eb94281407fb80064a356e3f" }, "dist": { "type": "tar", - "url": "https://composer.cubedesigners.com/dist/fluidbook/tools/fluidbook-tools-dev-master-1651ab.tar", - "reference": "d5e3e0fc0176fdb7764669032f02ad1bf638a9d2", - "shasum": "82602466fb73e0eb1ab91a3a70e44c62c0bb6ef8" + "url": "https://composer.cubedesigners.com/dist/fluidbook/tools/fluidbook-tools-dev-master-a0d84e.tar", + "reference": "d147c7ef909706a9eb94281407fb80064a356e3f", + "shasum": "95e4c9b5142c9ea48451a1c13e768083ceb9bd39" }, "require": { "barryvdh/laravel-debugbar": "^3.6", @@ -3455,7 +3455,7 @@ } ], "description": "Fluidbook Tools", - "time": "2022-08-22T15:58:45+00:00" + "time": "2022-08-22T19:31:13+00:00" }, { "name": "genealabs/laravel-model-caching", -- 2.39.5