]> _ Git - cubist_pdf.git/commitdiff
wip #6430 @0.5
authorVincent Vanwaelscappel <vincent@cubedesigners.com>
Mon, 23 Oct 2023 16:15:42 +0000 (18:15 +0200)
committerVincent Vanwaelscappel <vincent@cubedesigners.com>
Mon, 23 Oct 2023 16:15:42 +0000 (18:15 +0200)
resources/tools/fwstk/.idea/workspace.xml
resources/tools/fwstk/bin/com/fluidbook/fwstk/TextsThread.class
resources/tools/fwstk/bin/cube/util/StringUtil.class
resources/tools/fwstk/out/artifacts/fwstk_jar/fwstk.jar
resources/tools/fwstk/src/com/fluidbook/fwstk/TextsThread.java
resources/tools/fwstk/src/cube/util/StringUtil.java

index 466fcb6ea2e0308f8a053f42f1ece23b7cc1c0a4..c839016ea10d3533f43113b3afe86206f5e931a0 100644 (file)
@@ -9,7 +9,7 @@
     <option name="autoReloadType" value="SELECTIVE" />
   </component>
   <component name="ChangeListManager">
-    <list default="true" id="f146bc67-2578-4de3-9db2-94d2d43e9e83" name="Default" comment="wip #5410" />
+    <list default="true" id="f146bc67-2578-4de3-9db2-94d2d43e9e83" name="Default" comment="wip #643" />
     <option name="SHOW_DIALOG" value="false" />
     <option name="HIGHLIGHT_CONFLICTS" value="true" />
     <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
     <configuration name="extract layout" type="Application" factoryName="Application">
       <option name="MAIN_CLASS_NAME" value="com.fluidbook.fwstk.Main" />
       <module name="fwstk" />
-      <option name="PROGRAM_PARAMETERS" value="--input C:\Users\vince\Desktop\test.pdf --mode robust --layout C:\Users\vince\Desktop\test\p%d.fby --threads 1" />
+      <option name="PROGRAM_PARAMETERS" value="--input C:\Users\vince\Desktop\1.pdf --mode robust --layout C:\Users\vince\Desktop\test\p%d.fby --threads 1" />
       <method v="2">
         <option name="Make" enabled="true" />
       </method>
       <workItem from="1694699117438" duration="618000" />
       <workItem from="1697471786856" duration="382000" />
       <workItem from="1697543960076" duration="2149000" />
+      <workItem from="1698075842281" duration="1718000" />
     </task>
     <task id="LOCAL-00001" summary="wip #1111 @0.5">
       <created>1487172253077</created>
index d1cf2001effc04c54afe25964416318c742512c9..965dedb9a761d37c996173fc1297edfdf80f0188 100644 (file)
Binary files a/resources/tools/fwstk/bin/com/fluidbook/fwstk/TextsThread.class and b/resources/tools/fwstk/bin/com/fluidbook/fwstk/TextsThread.class differ
index f28562f28a415de84dddf543cfc710c25e6a8a10..5de73f657e7410b546e2f949452a5536de2f276a 100644 (file)
Binary files a/resources/tools/fwstk/bin/cube/util/StringUtil.class and b/resources/tools/fwstk/bin/cube/util/StringUtil.class differ
index 6fe8dd5c13017babfe42a20ea73a4cd210f5350e..161aed6363be3165d373ed84bdd198e6a1c56d93 100644 (file)
Binary files a/resources/tools/fwstk/out/artifacts/fwstk_jar/fwstk.jar and b/resources/tools/fwstk/out/artifacts/fwstk_jar/fwstk.jar differ
index 77b4323afb76a47737eef08b01b68d0fb8853350..a5014a793289874976a82ee641114b2edc91dbf4 100644 (file)
@@ -201,6 +201,7 @@ public class TextsThread extends Thread {
                     fbtext = html2text(fbtext);
                     if (this.robust) {
                         fbtext=StringUtil.removeSpaces(fbtext);
+                        fbtext=StringUtil.removeAccents(fbtext);
                     }
 
                     try {
index 74abfb8dff58d5c4b38c6f7c91de20041007c7fb..0ef8cc69e1dfd66fb4c687a61db89939a946723a 100644 (file)
@@ -3,12 +3,17 @@ package cube.util;
 import cube.util.AsciiUtils;
 import org.apache.commons.lang3.StringUtils;
 
+import java.text.Normalizer;
 import java.util.HashMap;
 
 public class StringUtil {
 
     public static String removeAccents(String in) {
-        return AsciiUtils.convertNonAscii(in);
+        in = AsciiUtils.convertNonAscii(in);
+        in = Normalizer.normalize(in, Normalizer.Form.NFD)
+                .replaceAll("[\\p{InCombiningDiacriticalMarks}\\p{IsM}]+", "");
+        in = in.replaceAll("[\\u0e34-\\u0e3e\\u0e47-\\u0e4e]", "");
+        return in;
     }
 
     public static String condenseWhite(String in) {
@@ -75,7 +80,7 @@ public class StringUtil {
     }
 
     public static String removeSpaces(String in) {
-        return StringUtils.deleteWhitespace(in).replaceAll("\\s+","");
+        return StringUtils.deleteWhitespace(in).replaceAll("\\s+", "");
     }
 
     public static String removeControl(String in) {