From 9c78e31bc0773d3f89f5e6a6eaa92387e78790e6 Mon Sep 17 00:00:00 2001 From: "vincent@cubedesigners.com" Date: Fri, 14 Mar 2014 18:15:35 +0000 Subject: [PATCH] --- .../tools/fwstk/nbproject/build-impl.xml | 18 +- .../tools/fwstk/nbproject/genfiles.properties | 4 +- .../nbproject/private/private.properties | 2 +- .../tools/fwstk/nbproject/private/private.xml | 2 + .../tools/fwstk/src/cube/util/StringUtil.java | 262 +++++++++--------- inc/ws/DAO/class.ws.dao.book.php | 57 +++- inc/ws/Util/html5/class.ws.html5.compiler.php | 4 +- 7 files changed, 204 insertions(+), 145 deletions(-) diff --git a/fluidbook/tools/fwstk/nbproject/build-impl.xml b/fluidbook/tools/fwstk/nbproject/build-impl.xml index 3ce365776..ce48ba209 100644 --- a/fluidbook/tools/fwstk/nbproject/build-impl.xml +++ b/fluidbook/tools/fwstk/nbproject/build-impl.xml @@ -105,9 +105,12 @@ is divided into following sections: - - - + + + + + + @@ -1211,11 +1214,14 @@ is divided into following sections: - + + + + - + @@ -1285,7 +1291,7 @@ is divided into following sections: - + Some tests failed; see details above. diff --git a/fluidbook/tools/fwstk/nbproject/genfiles.properties b/fluidbook/tools/fwstk/nbproject/genfiles.properties index 111fd3fa1..747289906 100644 --- a/fluidbook/tools/fwstk/nbproject/genfiles.properties +++ b/fluidbook/tools/fwstk/nbproject/genfiles.properties @@ -4,5 +4,5 @@ build.xml.stylesheet.CRC32=8064a381@1.68.1.46 # This file is used by a NetBeans-based IDE to track changes in generated files such as build-impl.xml. # Do not edit this file. You may delete it but then the IDE will never regenerate such files for you. nbproject/build-impl.xml.data.CRC32=75d32fc3 -nbproject/build-impl.xml.script.CRC32=773be4cd -nbproject/build-impl.xml.stylesheet.CRC32=5a01deb7@1.68.1.46 +nbproject/build-impl.xml.script.CRC32=542c3da2 +nbproject/build-impl.xml.stylesheet.CRC32=876e7a8f@1.74.1.48 diff --git a/fluidbook/tools/fwstk/nbproject/private/private.properties b/fluidbook/tools/fwstk/nbproject/private/private.properties index 7a31961f3..01b77c475 100644 --- a/fluidbook/tools/fwstk/nbproject/private/private.properties +++ b/fluidbook/tools/fwstk/nbproject/private/private.properties @@ -5,4 +5,4 @@ do.jar=true file.reference.icu4j-50_1.jar=H:\\Works\\Java\\jar\\icu4j-4_6_1.jar javac.debug=true javadoc.preview=true -user.properties.file=H:\\Applications\\Roaming\\Netbeans\\7.4\\user\\build.properties +user.properties.file=H:\\Applications\\Roaming\\Netbeans\\8.0RC1\\user\\build.properties diff --git a/fluidbook/tools/fwstk/nbproject/private/private.xml b/fluidbook/tools/fwstk/nbproject/private/private.xml index 226920c84..12af136ca 100644 --- a/fluidbook/tools/fwstk/nbproject/private/private.xml +++ b/fluidbook/tools/fwstk/nbproject/private/private.xml @@ -4,7 +4,9 @@ + file:/H:/Works/cubeExtranet/fluidbook/tools/fwstk/src/cube/util/StringUtil.java file:/H:/Works/cubeExtranet/fluidbook/tools/fwstk/src/com/fluidbook/fwstk/Main.java + file:/H:/Works/cubeExtranet/fluidbook/tools/fwstk/src/com/fluidbook/fwstk/layout/Word.java file:/H:/Works/cubeExtranet/fluidbook/tools/fwstk/src/com/fluidbook/fwstk/TextsThread.java diff --git a/fluidbook/tools/fwstk/src/cube/util/StringUtil.java b/fluidbook/tools/fwstk/src/cube/util/StringUtil.java index 6438d20ad..081be3313 100644 --- a/fluidbook/tools/fwstk/src/cube/util/StringUtil.java +++ b/fluidbook/tools/fwstk/src/cube/util/StringUtil.java @@ -1,131 +1,131 @@ -package cube.util; - -import java.util.HashMap; - -public class StringUtil { - - public static String removeAccents(String in) { - return AsciiUtils.convertNonAscii(in); - } - - public static String condenseWhite(String in) { - return in.replaceAll("\\p{Space}++", " "); - } - - public static String removePoints(String in) { - return StringUtil.removePoints(in, ""); - } - - public static String separateLigatures(String in) { - in=in.replaceAll("\u00C6","AE"); - in=in.replaceAll("\u00E6","ae"); - in=in.replaceAll("\u0152","OE"); - in=in.replaceAll("\u0153","oe"); - in=in.replaceAll("\u0132","IJ"); - in=in.replaceAll("\u0133","ij"); - in=in.replaceAll("\ufb00","ff"); - in=in.replaceAll("\ufb01","fi"); - in=in.replaceAll("\ufb02","fl"); - in=in.replaceAll("\ufb03","ffi"); - in=in.replaceAll("\ufb04","ffl"); - - return in; - } - - public static String removePoints(String in, String ignore) { - - if (ignore != "") { - String ig = ""; - for (int i = 0; i < ignore.length(); i++) { - ig += "\\" + ignore.substring(i, i + 1); - } - ignore = "&&[^" + ig + "]"; - } - - String res = in.replaceAll( - "[\\u2000-\\u20ff\\x21-\\x2f\\x3a-\\x3f\\x5b-\\x5f\\x7b-\\xa0\\xaa-\\xbf’" + ignore + "]", - " "); - - return res; - } - - public static String normalizeWhite(String in){ - String res = in.replaceAll( - "[\\u2000-\\u20ff]", - " "); - - return res; - } - - public static String removeControl(String in) { - return in.replaceAll("\\p{Cntrl}", " "); - } - - public static String removeTags(String in) { - in = in.replaceAll("<.+>", " "); - return condenseWhite(in); - } - - public static String trim(String str, String[] chars) { - - Boolean suite = false; - for (String c : chars) { - if (str.contains(c)) { - suite = true; - break; - } - } - if (!suite) { - return str; - } - - str = ltrim(str, chars); - str = rtrim(str, chars); - - return str; - } - - public static String rtrim(String str, String[] charsToTrim) { - int length = str.length() - 1; - - rightloop: - for (; length >= 0; length--) { - for (String c : charsToTrim) { - if (str.indexOf(c) == length) { - continue rightloop; - } - } - break; - } - - str = str.substring(0, length + 1); - return str; - } - - public static String[] splitStr(String str) { - - byte[] chars = str.getBytes(); - String[] res = new String[chars.length]; - for (int i = 0; i < chars.length; i++) { - res[i] = String.valueOf((char) chars[i]); - } - - return res; - } - - public static String ltrim(String str, String[] charsToTrim) { - int startIndex = 0; - leftloop: - for (startIndex = 0; startIndex <= str.length(); startIndex++) { - for (String c : charsToTrim) { - if (str.indexOf(c) == startIndex) { - continue leftloop; - } - } - break; - } - str = str.substring(startIndex); - return str; - - } -} +package cube.util; + +import java.util.HashMap; + +public class StringUtil { + + public static String removeAccents(String in) { + return AsciiUtils.convertNonAscii(in); + } + + public static String condenseWhite(String in) { + return in.replaceAll("\\p{Space}++", " "); + } + + public static String removePoints(String in) { + return StringUtil.removePoints(in, ""); + } + + public static String separateLigatures(String in) { + in = in.replaceAll("\u00C6", "AE"); + in = in.replaceAll("\u00E6", "ae"); + in = in.replaceAll("\u0152", "OE"); + in = in.replaceAll("\u0153", "oe"); + in = in.replaceAll("\u0132", "IJ"); + in = in.replaceAll("\u0133", "ij"); + in = in.replaceAll("\ufb00", "ff"); + in = in.replaceAll("\ufb01", "fi"); + in = in.replaceAll("\ufb02", "fl"); + in = in.replaceAll("\ufb03", "ffi"); + in = in.replaceAll("\ufb04", "ffl"); + + return in; + } + + public static String removePoints(String in, String ignore) { + + if (ignore != "") { + String ig = ""; + for (int i = 0; i < ignore.length(); i++) { + ig += "\\" + ignore.substring(i, i + 1); + } + ignore = "&&[^" + ig + "]"; + } + + String res = in.replaceAll( + "[\\u2000-\\u20ff\\x21-\\x2f\\x3a-\\x3f\\x5b-\\x5f\\x7b-\\xa0\\xaa-\\xbf’" + ignore + "]", + " "); + + return res; + } + + public static String normalizeWhite(String in) { + String res = in.replaceAll( + "[\\u2000-\\u20ff\\ufffd]", + " "); + + return res; + } + + public static String removeControl(String in) { + return in.replaceAll("\\p{Cntrl}", " "); + } + + public static String removeTags(String in) { + in = in.replaceAll("<.+>", " "); + return condenseWhite(in); + } + + public static String trim(String str, String[] chars) { + + Boolean suite = false; + for (String c : chars) { + if (str.contains(c)) { + suite = true; + break; + } + } + if (!suite) { + return str; + } + + str = ltrim(str, chars); + str = rtrim(str, chars); + + return str; + } + + public static String rtrim(String str, String[] charsToTrim) { + int length = str.length() - 1; + + rightloop: + for (; length >= 0; length--) { + for (String c : charsToTrim) { + if (str.indexOf(c) == length) { + continue rightloop; + } + } + break; + } + + str = str.substring(0, length + 1); + return str; + } + + public static String[] splitStr(String str) { + + byte[] chars = str.getBytes(); + String[] res = new String[chars.length]; + for (int i = 0; i < chars.length; i++) { + res[i] = String.valueOf((char) chars[i]); + } + + return res; + } + + public static String ltrim(String str, String[] charsToTrim) { + int startIndex = 0; + leftloop: + for (startIndex = 0; startIndex <= str.length(); startIndex++) { + for (String c : charsToTrim) { + if (str.indexOf(c) == startIndex) { + continue leftloop; + } + } + break; + } + str = str.substring(startIndex); + return str; + + } +} diff --git a/inc/ws/DAO/class.ws.dao.book.php b/inc/ws/DAO/class.ws.dao.book.php index d9c58b96d..36cb61ae6 100644 --- a/inc/ws/DAO/class.ws.dao.book.php +++ b/inc/ws/DAO/class.ws.dao.book.php @@ -798,7 +798,7 @@ class wsDAOBook extends commonDAO { $c->update('WHERE book_id=\'' . $this->con->escape($book_id) . '\''); } - public function makeTextsIndexes($book, $pages, &$index, &$textes) { + public function makeTextsIndexes($book, $pages, &$index, &$textes, $simple = false) { $prefix = ''; if ($book->parametres->textExtraction == 'poppler') { @@ -814,7 +814,11 @@ class wsDAOBook extends commonDAO { mkdir($dir, 0777, true); } - $ifilec = $dir . '/' . $prefix . 'index.json'; + if ($simple) { + $ifilec = $dir . '/' . $prefix . 'sindex.json'; + } else { + $ifilec = $dir . '/' . $prefix . 'index.json'; + } $tfilec = $dir . '/' . $prefix . 'textes.json'; if (file_exists($ifilec) && file_exists($tfilec) && (min(filemtime($ifilec), filemtime($tfilec)) >= $book->composition_update)) { @@ -851,7 +855,11 @@ class wsDAOBook extends commonDAO { $text = file_get_contents($tfile); $ipage = file_get_contents($ifile); - $this->fillIndexWithWords($index, $book_page, $ipage); + if ($simple) { + $this->fillIndexWithWordsSimple($index, $book_page, $ipage); + } else { + $this->fillIndexWithWords($index, $book_page, $ipage); + } $textes[$book_page] = $text; } ksort($index); @@ -863,6 +871,49 @@ class wsDAOBook extends commonDAO { file_put_contents($ifilec, $index); } + protected function _escapeIndex($str) { + $todelete = array('\ufffd'); + foreach ($todelete as $d) { + $str = str_replace($d, '', $str); + } + return $str; + } + + protected function fillIndexWithWordsSimple(&$index, $page, $ipage) { + $twords = explode("\n", trim($ipage)); + + foreach ($twords as $woadata) { + $w1 = explode(',', trim($woadata)); + if (count($w1) <= 1) { + continue; + } + list($woa, $worddata) = $w1; + $e = explode("\t", $worddata, 2); + if (count($e) < 2) { + continue; + } + list($total, $wordslist) = $e; + + if ($woa == '') { + continue; + } + + if (!isset($index[$woa])) { + $index[$woa] = array('t' => 0, 'p' => array()); + } + $index[$woa]['t'] += $total; + + $words = explode("\t", $wordslist); + foreach ($words as $word) { + list($wordwa, $count) = explode('$', $word, 2); + if (!isset($index[$woa]['p'][$page])) { + $index[$woa]['p'][$page] = 0; + } + $index[$woa]['p'][$page] += $count; + } + } + } + protected function fillIndexWithWords(&$index, $page, $ipage) { $twords = explode("\n", trim($ipage)); diff --git a/inc/ws/Util/html5/class.ws.html5.compiler.php b/inc/ws/Util/html5/class.ws.html5.compiler.php index 629c787df..eeb94bcba 100644 --- a/inc/ws/Util/html5/class.ws.html5.compiler.php +++ b/inc/ws/Util/html5/class.ws.html5.compiler.php @@ -492,7 +492,7 @@ class wsHTML5Compiler { $script .= '' . "\n"; if ($this->book->parametres->search) { //$script .= '' . "\n"; - $script .= '' . "\n"; + //$script .= '' . "\n"; } foreach ($this->pluginJs as $p) { $script .= '' . "\n"; @@ -800,7 +800,7 @@ class wsHTML5Compiler { } public function writeTexts() { - $this->daoBook->makeTextsIndexes($this->book, $this->pages, $index, $textes); + $this->daoBook->makeTextsIndexes($this->book, $this->pages, $index, $textes, true); $jsindex = 'var INDEX=' . $index . ';' . "\r"; $jstexts = 'var TEXTS=' . $textes . ';' . "\r"; -- 2.39.5