From: Vincent Vanwaelscappel Date: Mon, 22 Aug 2022 20:01:11 +0000 (+0200) Subject: wip #5410 @0.5 X-Git-Url: http://git.cubedesigners.com/?a=commitdiff_plain;h=dda06b6bcefe9ad329a0349bc1bfab4e0fbc910c;p=cubist_pdf.git wip #5410 @0.5 --- diff --git a/.idea/misc.xml b/.idea/misc.xml index b44ac24..ad68c41 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + \ No newline at end of file diff --git a/resources/tools/fwstk/.idea/misc.xml b/resources/tools/fwstk/.idea/misc.xml index bec7cd5..e5dddc0 100644 --- a/resources/tools/fwstk/.idea/misc.xml +++ b/resources/tools/fwstk/.idea/misc.xml @@ -178,7 +178,7 @@ - + diff --git a/resources/tools/fwstk/.idea/vcs.xml b/resources/tools/fwstk/.idea/vcs.xml index 45cf85c..bc8238e 100644 --- a/resources/tools/fwstk/.idea/vcs.xml +++ b/resources/tools/fwstk/.idea/vcs.xml @@ -2,5 +2,6 @@ + \ No newline at end of file diff --git a/resources/tools/fwstk/.idea/workspace.xml b/resources/tools/fwstk/.idea/workspace.xml index 082ba1c..3b4574a 100644 --- a/resources/tools/fwstk/.idea/workspace.xml +++ b/resources/tools/fwstk/.idea/workspace.xml @@ -7,10 +7,13 @@ - + + + - - + + + + + - - - - - + + + + + - + @@ -93,26 +99,22 @@ - - - - - - - - - - - - - - - - - - - - + @@ -123,7 +125,7 @@ - + + + + + + + + + + + + - - - - @@ -492,6 +469,7 @@ + 1487172253077 diff --git a/resources/tools/fwstk/out/artifacts/fwstk_jar/fwstk.jar b/resources/tools/fwstk/out/artifacts/fwstk_jar/fwstk.jar index 757f7c1..8aba199 100644 Binary files a/resources/tools/fwstk/out/artifacts/fwstk_jar/fwstk.jar and b/resources/tools/fwstk/out/artifacts/fwstk_jar/fwstk.jar differ diff --git a/resources/tools/fwstk/src/com/fluidbook/fwstk/TextsThread.java b/resources/tools/fwstk/src/com/fluidbook/fwstk/TextsThread.java index 24e3b99..c7e31eb 100644 --- a/resources/tools/fwstk/src/com/fluidbook/fwstk/TextsThread.java +++ b/resources/tools/fwstk/src/com/fluidbook/fwstk/TextsThread.java @@ -140,7 +140,7 @@ public class TextsThread extends Thread { html = htmlStripper.getHTML(doc); html = cleanhtml(html); - if (html.equals(memoHTML)) { + if (!html.equals("") && html.equals(memoHTML)) { memoHTML = ""; continue; } @@ -154,6 +154,7 @@ public class TextsThread extends Thread { String hfile = file.replaceFirst("%s", "h"); try { + System.out.println(":)"); out = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(pfile), "UTF8")); out.write(text); diff --git a/resources/tools/fwstk/src/com/fluidbook/fwstk/layout/Group.java b/resources/tools/fwstk/src/com/fluidbook/fwstk/layout/Group.java index 212bf58..e35311d 100644 --- a/resources/tools/fwstk/src/com/fluidbook/fwstk/layout/Group.java +++ b/resources/tools/fwstk/src/com/fluidbook/fwstk/layout/Group.java @@ -4,7 +4,6 @@ */ package com.fluidbook.fwstk.layout; -import com.sun.org.apache.xpath.internal.operations.Bool; import cube.util.StringUtil; import org.apache.pdfbox.pdmodel.common.PDRectangle; diff --git a/src/PDFTools.php b/src/PDFTools.php index fc0c1ff..0fb4f61 100644 --- a/src/PDFTools.php +++ b/src/PDFTools.php @@ -415,17 +415,18 @@ class PDFTools if ($ignoreSeparators) { $out .= '/sep_' . md5($ignoreSeparators); } - $out = Files::mkdir($out) . '/'; + $out = Files::mkdir($out); $fwstk = new FWSTK(); $fwstk->setArg('--input ' . $pdf); $fwstk->setArg('--extractTexts ' . $out . '%s%d.txt'); - $fwstk->setArg('--extractTextsMethod' . $textExtraction); + $fwstk->setArg('--extractTextsMethod ' . $textExtraction); $fwstk->setArg('--threads 1'); - if ($ignoreSeparators !== '') { + if ($ignoreSeparators) { $fwstk->setArg('--ignoreSeparators ' . $ignoreSeparators); } $fwstk->execute(); + $fwstk->dd(); }