From: Vincent Vanwaelscappel Date: Mon, 22 Aug 2022 19:08:52 +0000 (+0200) Subject: wip #5410 @0.25 X-Git-Url: http://git.cubedesigners.com/?a=commitdiff_plain;h=b4fa0a70b1c0ddfb4e9b67e75e00da3d4424d0d3;p=cubist_pdf.git wip #5410 @0.25 --- diff --git a/src/PDFTools.php b/src/PDFTools.php index a004b67..8b92e67 100644 --- a/src/PDFTools.php +++ b/src/PDFTools.php @@ -409,19 +409,18 @@ class PDFTools $fwstk->execute(); } - public static function extractTexts($pdf, $out, $ignoreSeparators = '', $force = true) + public static function extractTexts($pdf, $out, $textExtraction, $ignoreSeparators = '') { $out .= 'texts/'; - Files::mkdir($out); - - if (!$force && file_exists($out . 'fp1.txt')) { - return; + if ($ignoreSeparators) { + $out .= 'sep_' . md5($ignoreSeparators); } + $out = Files::mkdir($out) . '/'; $fwstk = new FWSTK(); $fwstk->setArg('--input ' . $pdf); $fwstk->setArg('--extractTexts ' . $out . '%s%d.txt'); - $fwstk->setArg('--extractTextsMethod fluidbook'); + $fwstk->setArg('--extractTextsMethod' . $textExtraction); $fwstk->setArg('--threads 1'); if ($ignoreSeparators !== '') { $fwstk->setArg('--ignoreSeparators ' . $ignoreSeparators);