</and>\r
</condition>\r
<condition property="do.archive">\r
- <not>\r
- <istrue value="${jar.archive.disabled}"/>\r
- </not>\r
+ <or>\r
+ <not>\r
+ <istrue value="${jar.archive.disabled}"/>\r
+ </not>\r
+ <istrue value="${not.archive.disabled}"/>\r
+ </or>\r
</condition>\r
<condition property="do.mkdist">\r
<and>\r
</not>\r
</and>\r
</condition>\r
- <javadoc additionalparam="${javadoc.additionalparam}" author="${javadoc.author}" charset="UTF-8" destdir="${dist.javadoc.dir}" docencoding="UTF-8" encoding="${javadoc.encoding.used}" failonerror="true" noindex="${javadoc.noindex}" nonavbar="${javadoc.nonavbar}" notree="${javadoc.notree}" private="${javadoc.private}" source="${javac.source}" splitindex="${javadoc.splitindex}" use="${javadoc.use}" useexternalfile="true" version="${javadoc.version}" windowtitle="${javadoc.windowtitle}">\r
+ <condition else="" property="bug5101868workaround" value="*.java">\r
+ <matches pattern="1\.[56](\..*)?" string="${java.version}"/>\r
+ </condition>\r
+ <javadoc additionalparam="-J-Dfile.encoding=${file.encoding} ${javadoc.additionalparam}" author="${javadoc.author}" charset="UTF-8" destdir="${dist.javadoc.dir}" docencoding="UTF-8" encoding="${javadoc.encoding.used}" failonerror="true" noindex="${javadoc.noindex}" nonavbar="${javadoc.nonavbar}" notree="${javadoc.notree}" private="${javadoc.private}" source="${javac.source}" splitindex="${javadoc.splitindex}" use="${javadoc.use}" useexternalfile="true" version="${javadoc.version}" windowtitle="${javadoc.windowtitle}">\r
<classpath>\r
<path path="${javac.classpath}"/>\r
</classpath>\r
- <fileset dir="${src.src.dir}" excludes="*.java,${excludes}" includes="${includes}">\r
+ <fileset dir="${src.src.dir}" excludes="${bug5101868workaround},${excludes}" includes="${includes}">\r
<filename name="**/*.java"/>\r
</fileset>\r
<fileset dir="${build.generated.sources.dir}" erroronmissingdir="false">\r
<mkdir dir="${build.test.results.dir}"/>\r
</target>\r
<target depends="init,compile-test,-pre-test-run" if="have.tests" name="-do-test-run">\r
- <j2seproject3:test testincludes="**/*Test.java"/>\r
+ <j2seproject3:test includes="${includes}" testincludes="**/*Test.java"/>\r
</target>\r
<target depends="init,compile-test,-pre-test-run,-do-test-run" if="have.tests" name="-post-test-run">\r
<fail if="tests.failed" unless="ignore.failing.tests">Some tests failed; see details above.</fail>\r
-package cube.util;\r
-\r
-import java.util.HashMap;\r
-\r
-public class StringUtil {\r
-\r
- public static String removeAccents(String in) {\r
- return AsciiUtils.convertNonAscii(in);\r
- }\r
-\r
- public static String condenseWhite(String in) {\r
- return in.replaceAll("\\p{Space}++", " ");\r
- }\r
-\r
- public static String removePoints(String in) {\r
- return StringUtil.removePoints(in, "");\r
- }\r
-\r
- public static String separateLigatures(String in) {\r
- in=in.replaceAll("\u00C6","AE");\r
- in=in.replaceAll("\u00E6","ae");\r
- in=in.replaceAll("\u0152","OE");\r
- in=in.replaceAll("\u0153","oe");\r
- in=in.replaceAll("\u0132","IJ");\r
- in=in.replaceAll("\u0133","ij");\r
- in=in.replaceAll("\ufb00","ff");\r
- in=in.replaceAll("\ufb01","fi");\r
- in=in.replaceAll("\ufb02","fl");\r
- in=in.replaceAll("\ufb03","ffi");\r
- in=in.replaceAll("\ufb04","ffl");\r
- \r
- return in;\r
- }\r
-\r
- public static String removePoints(String in, String ignore) {\r
-\r
- if (ignore != "") {\r
- String ig = "";\r
- for (int i = 0; i < ignore.length(); i++) {\r
- ig += "\\" + ignore.substring(i, i + 1);\r
- }\r
- ignore = "&&[^" + ig + "]";\r
- }\r
-\r
- String res = in.replaceAll(\r
- "[\\u2000-\\u20ff\\x21-\\x2f\\x3a-\\x3f\\x5b-\\x5f\\x7b-\\xa0\\xaa-\\xbf’" + ignore + "]",\r
- " ");\r
- \r
- return res;\r
- }\r
- \r
- public static String normalizeWhite(String in){\r
- String res = in.replaceAll(\r
- "[\\u2000-\\u20ff]",\r
- " ");\r
- \r
- return res;\r
- }\r
-\r
- public static String removeControl(String in) {\r
- return in.replaceAll("\\p{Cntrl}", " ");\r
- }\r
-\r
- public static String removeTags(String in) {\r
- in = in.replaceAll("<.+>", " ");\r
- return condenseWhite(in);\r
- }\r
-\r
- public static String trim(String str, String[] chars) {\r
-\r
- Boolean suite = false;\r
- for (String c : chars) {\r
- if (str.contains(c)) {\r
- suite = true;\r
- break;\r
- }\r
- }\r
- if (!suite) {\r
- return str;\r
- }\r
-\r
- str = ltrim(str, chars);\r
- str = rtrim(str, chars);\r
-\r
- return str;\r
- }\r
-\r
- public static String rtrim(String str, String[] charsToTrim) {\r
- int length = str.length() - 1;\r
-\r
- rightloop:\r
- for (; length >= 0; length--) {\r
- for (String c : charsToTrim) {\r
- if (str.indexOf(c) == length) {\r
- continue rightloop;\r
- }\r
- }\r
- break;\r
- }\r
-\r
- str = str.substring(0, length + 1);\r
- return str;\r
- }\r
-\r
- public static String[] splitStr(String str) {\r
-\r
- byte[] chars = str.getBytes();\r
- String[] res = new String[chars.length];\r
- for (int i = 0; i < chars.length; i++) {\r
- res[i] = String.valueOf((char) chars[i]);\r
- }\r
-\r
- return res;\r
- }\r
-\r
- public static String ltrim(String str, String[] charsToTrim) {\r
- int startIndex = 0;\r
- leftloop:\r
- for (startIndex = 0; startIndex <= str.length(); startIndex++) {\r
- for (String c : charsToTrim) {\r
- if (str.indexOf(c) == startIndex) {\r
- continue leftloop;\r
- }\r
- }\r
- break;\r
- }\r
- str = str.substring(startIndex);\r
- return str;\r
-\r
- }\r
-}\r
+package cube.util;
+
+import java.util.HashMap;
+
+public class StringUtil {
+
+ public static String removeAccents(String in) {
+ return AsciiUtils.convertNonAscii(in);
+ }
+
+ public static String condenseWhite(String in) {
+ return in.replaceAll("\\p{Space}++", " ");
+ }
+
+ public static String removePoints(String in) {
+ return StringUtil.removePoints(in, "");
+ }
+
+ public static String separateLigatures(String in) {
+ in = in.replaceAll("\u00C6", "AE");
+ in = in.replaceAll("\u00E6", "ae");
+ in = in.replaceAll("\u0152", "OE");
+ in = in.replaceAll("\u0153", "oe");
+ in = in.replaceAll("\u0132", "IJ");
+ in = in.replaceAll("\u0133", "ij");
+ in = in.replaceAll("\ufb00", "ff");
+ in = in.replaceAll("\ufb01", "fi");
+ in = in.replaceAll("\ufb02", "fl");
+ in = in.replaceAll("\ufb03", "ffi");
+ in = in.replaceAll("\ufb04", "ffl");
+
+ return in;
+ }
+
+ public static String removePoints(String in, String ignore) {
+
+ if (ignore != "") {
+ String ig = "";
+ for (int i = 0; i < ignore.length(); i++) {
+ ig += "\\" + ignore.substring(i, i + 1);
+ }
+ ignore = "&&[^" + ig + "]";
+ }
+
+ String res = in.replaceAll(
+ "[\\u2000-\\u20ff\\x21-\\x2f\\x3a-\\x3f\\x5b-\\x5f\\x7b-\\xa0\\xaa-\\xbf’" + ignore + "]",
+ " ");
+
+ return res;
+ }
+
+ public static String normalizeWhite(String in) {
+ String res = in.replaceAll(
+ "[\\u2000-\\u20ff\\ufffd]",
+ " ");
+
+ return res;
+ }
+
+ public static String removeControl(String in) {
+ return in.replaceAll("\\p{Cntrl}", " ");
+ }
+
+ public static String removeTags(String in) {
+ in = in.replaceAll("<.+>", " ");
+ return condenseWhite(in);
+ }
+
+ public static String trim(String str, String[] chars) {
+
+ Boolean suite = false;
+ for (String c : chars) {
+ if (str.contains(c)) {
+ suite = true;
+ break;
+ }
+ }
+ if (!suite) {
+ return str;
+ }
+
+ str = ltrim(str, chars);
+ str = rtrim(str, chars);
+
+ return str;
+ }
+
+ public static String rtrim(String str, String[] charsToTrim) {
+ int length = str.length() - 1;
+
+ rightloop:
+ for (; length >= 0; length--) {
+ for (String c : charsToTrim) {
+ if (str.indexOf(c) == length) {
+ continue rightloop;
+ }
+ }
+ break;
+ }
+
+ str = str.substring(0, length + 1);
+ return str;
+ }
+
+ public static String[] splitStr(String str) {
+
+ byte[] chars = str.getBytes();
+ String[] res = new String[chars.length];
+ for (int i = 0; i < chars.length; i++) {
+ res[i] = String.valueOf((char) chars[i]);
+ }
+
+ return res;
+ }
+
+ public static String ltrim(String str, String[] charsToTrim) {
+ int startIndex = 0;
+ leftloop:
+ for (startIndex = 0; startIndex <= str.length(); startIndex++) {
+ for (String c : charsToTrim) {
+ if (str.indexOf(c) == startIndex) {
+ continue leftloop;
+ }
+ }
+ break;
+ }
+ str = str.substring(startIndex);
+ return str;
+
+ }
+}
$c->update('WHERE book_id=\'' . $this->con->escape($book_id) . '\'');
}
- public function makeTextsIndexes($book, $pages, &$index, &$textes) {
+ public function makeTextsIndexes($book, $pages, &$index, &$textes, $simple = false) {
$prefix = '';
if ($book->parametres->textExtraction == 'poppler') {
mkdir($dir, 0777, true);
}
- $ifilec = $dir . '/' . $prefix . 'index.json';
+ if ($simple) {
+ $ifilec = $dir . '/' . $prefix . 'sindex.json';
+ } else {
+ $ifilec = $dir . '/' . $prefix . 'index.json';
+ }
$tfilec = $dir . '/' . $prefix . 'textes.json';
if (file_exists($ifilec) && file_exists($tfilec) && (min(filemtime($ifilec), filemtime($tfilec)) >= $book->composition_update)) {
$text = file_get_contents($tfile);
$ipage = file_get_contents($ifile);
- $this->fillIndexWithWords($index, $book_page, $ipage);
+ if ($simple) {
+ $this->fillIndexWithWordsSimple($index, $book_page, $ipage);
+ } else {
+ $this->fillIndexWithWords($index, $book_page, $ipage);
+ }
$textes[$book_page] = $text;
}
ksort($index);
file_put_contents($ifilec, $index);
}
+ protected function _escapeIndex($str) {
+ $todelete = array('\ufffd');
+ foreach ($todelete as $d) {
+ $str = str_replace($d, '', $str);
+ }
+ return $str;
+ }
+
+ protected function fillIndexWithWordsSimple(&$index, $page, $ipage) {
+ $twords = explode("\n", trim($ipage));
+
+ foreach ($twords as $woadata) {
+ $w1 = explode(',', trim($woadata));
+ if (count($w1) <= 1) {
+ continue;
+ }
+ list($woa, $worddata) = $w1;
+ $e = explode("\t", $worddata, 2);
+ if (count($e) < 2) {
+ continue;
+ }
+ list($total, $wordslist) = $e;
+
+ if ($woa == '') {
+ continue;
+ }
+
+ if (!isset($index[$woa])) {
+ $index[$woa] = array('t' => 0, 'p' => array());
+ }
+ $index[$woa]['t'] += $total;
+
+ $words = explode("\t", $wordslist);
+ foreach ($words as $word) {
+ list($wordwa, $count) = explode('$', $word, 2);
+ if (!isset($index[$woa]['p'][$page])) {
+ $index[$woa]['p'][$page] = 0;
+ }
+ $index[$woa]['p'][$page] += $count;
+ }
+ }
+ }
+
protected function fillIndexWithWords(&$index, $page, $ipage) {
$twords = explode("\n", trim($ipage));