From b4fa0a70b1c0ddfb4e9b67e75e00da3d4424d0d3 Mon Sep 17 00:00:00 2001 From: Vincent Vanwaelscappel Date: Mon, 22 Aug 2022 21:08:52 +0200 Subject: [PATCH] wip #5410 @0.25 --- src/PDFTools.php | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/PDFTools.php b/src/PDFTools.php index a004b67..8b92e67 100644 --- a/src/PDFTools.php +++ b/src/PDFTools.php @@ -409,19 +409,18 @@ class PDFTools $fwstk->execute(); } - public static function extractTexts($pdf, $out, $ignoreSeparators = '', $force = true) + public static function extractTexts($pdf, $out, $textExtraction, $ignoreSeparators = '') { $out .= 'texts/'; - Files::mkdir($out); - - if (!$force && file_exists($out . 'fp1.txt')) { - return; + if ($ignoreSeparators) { + $out .= 'sep_' . md5($ignoreSeparators); } + $out = Files::mkdir($out) . '/'; $fwstk = new FWSTK(); $fwstk->setArg('--input ' . $pdf); $fwstk->setArg('--extractTexts ' . $out . '%s%d.txt'); - $fwstk->setArg('--extractTextsMethod fluidbook'); + $fwstk->setArg('--extractTextsMethod' . $textExtraction); $fwstk->setArg('--threads 1'); if ($ignoreSeparators !== '') { $fwstk->setArg('--ignoreSeparators ' . $ignoreSeparators); -- 2.39.5