From 1744840c2f94cfb7372e0959039e9d4f7f15800e Mon Sep 17 00:00:00 2001 From: Vincent Vanwaelscappel Date: Wed, 20 Oct 2021 09:52:53 +0200 Subject: [PATCH] wip #4804 @0.5 --- src/Document.php | 4 ++-- src/PDFTools.php | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Document.php b/src/Document.php index b6683e1..c3a006e 100644 --- a/src/Document.php +++ b/src/Document.php @@ -157,9 +157,9 @@ class Document PDFTools::extractHighlightsData($this->getPDFInput(), $this->getConvertPath()); } - public function processFullTexts() + public function processFullTexts($force = true) { - PDFTools::extractTexts($this->getPDFInput(), $this->getConvertPath()); + PDFTools::extractTexts($this->getPDFInput(), $this->getConvertPath(), '', $force); } public function getPageNumbers() diff --git a/src/PDFTools.php b/src/PDFTools.php index 8469b0c..4c88e3a 100644 --- a/src/PDFTools.php +++ b/src/PDFTools.php @@ -326,11 +326,15 @@ class PDFTools $fwstk->execute(); } - public static function extractTexts($pdf, $out, $ignoreSeparators = '') + public static function extractTexts($pdf, $out, $ignoreSeparators = '', $force = true) { $out .= 'texts/'; Files::mkdir($out); + if (!$force && file_exists($out . 'fp1.txt')) { + return; + } + $fwstk = new FWSTK(); $fwstk->setArg('--input ' . $pdf); $fwstk->setArg('--extractTexts ' . $out . '%s%d.txt'); -- 2.39.5