--- /dev/null
+$label=Make texts indices\r
-config=Extract_texts\r
+config=Make_texts_indices\r
--- /dev/null
+application.args=--makeTexts H:\\Documents\\fwstk\\mktexts\\%s10.txt\r
<project-private xmlns="http://www.netbeans.org/ns/project-private/1">\r
<editor-bookmarks xmlns="http://www.netbeans.org/ns/editor-bookmarks/1"/>\r
<editor-bookmarks xmlns="http://www.netbeans.org/ns/editor-bookmarks/2" lastBookmarkId="0"/>\r
- <open-files xmlns="http://www.netbeans.org/ns/projectui-open-files/1">\r
- <file>file:/H:/Works/cubeExtranet/fluidbook/tools/fwstk/src/com/fluidbook/fwstk/Main.java</file>\r
- <file>file:/H:/Works/cubeExtranet/fluidbook/tools/fwstk/src/cube/util/Array.java</file>\r
- <file>file:/H:/Works/cubeExtranet/fluidbook/tools/fwstk/src/com/fluidbook/fwstk/layout/Word.java</file>\r
- <file>file:/H:/Works/cubeExtranet/fluidbook/tools/fwstk/src/com/fluidbook/fwstk/layout/LayoutStripper.java</file>\r
- <file>file:/H:/Works/cubeExtranet/fluidbook/tools/fwstk/src/com/fluidbook/fwstk/CustomStripper.java</file>\r
- <file>file:/H:/Works/cubeExtranet/fluidbook/tools/fwstk/src/com/fluidbook/fwstk/TextsThread.java</file>\r
- <file>file:/H:/Works/cubeExtranet/fluidbook/tools/fwstk/src/com/fluidbook/fwstk/Link.java</file>\r
- </open-files>\r
</project-private>\r
file.reference.avalon-framework-4.1.4.jar=lib/avalon-framework-4.1.4.jar\r
file.reference.bcmail-jdk16-146.jar=lib/bcmail-jdk16-146.jar\r
file.reference.bcprov-jdk16-146.jar=lib/bcprov-jdk16-146.jar\r
+file.reference.commons-io-2.4.jar=lib\\commons-io-2.4.jar\r
file.reference.commons-logging-1.1.1.jar=lib/commons-logging-1.1.1.jar\r
file.reference.fontbox-1.7.1.jar=lib\\fontbox-1.7.1.jar\r
file.reference.FWSTK_Resources.jar=../FWSTK_Resources/dist/FWSTK_Resources.jar\r
${file.reference.fontbox-1.7.1.jar}:\\r
${file.reference.jempbox-1.7.1.jar}:\\r
${file.reference.pdfbox-1.7.1.jar}:\\r
- ${file.reference.icu4j-50_1.jar}\r
+ ${file.reference.icu4j-50_1.jar}:\\r
+ ${file.reference.commons-io-2.4.jar}\r
# Space-separated list of extra javac options\r
javac.compilerargs=\r
javac.deprecation=false\r
\r
public function makeTextsIndexes($book, $pages, &$index, &$textes) {\r
\r
+ $prefix = '';\r
+ if ($book->parametres->textExtraction == 'poppler') {\r
+ $prefix = 'p';\r
+ }\r
+\r
+\r
$dir = WS_BOOKS . '/index/' . $book->book_id;\r
if ($book->parametres->ignoreSearchSeparators != '') {\r
$dir.='/' . sha1($book->parametres->ignoreSearchSeparators);\r
mkdir($dir, 0777, true);\r
}\r
\r
- $ifilec = $dir . '/index.json';\r
- $tfilec = $dir . '/textes.json';\r
+ $ifilec = $dir . '/' . $prefix . 'index.json';\r
+ $tfilec = $dir . '/' . $prefix . 'textes.json';\r
\r
if (file_exists($ifilec) && file_exists($tfilec) && (min(filemtime($ifilec), filemtime($tfilec)) >= $book->composition_update)) {\r
$index = file_get_contents($ifilec);\r
$index = array();\r
$textes = array();\r
foreach ($pages as $book_page => $infos) {\r
- $tfile = WS_DOCS . '/' . $infos['document_id'] . '/p' . $infos['document_page'] . '.txt';\r
- $ifile = WS_DOCS . '/' . $infos['document_id'] . '/i' . $infos['document_page'] . '.txt';\r
+ $tfile = WS_DOCS . '/' . $infos['document_id'] . '/' . $prefix . 'p' . $infos['document_page'] . '.txt';\r
+ $ifile = WS_DOCS . '/' . $infos['document_id'] . '/' . $prefix . 'i' . $infos['document_page'] . '.txt';\r
$text = file_get_contents($tfile);\r
$ipage = file_get_contents($ifile);\r
\r
$this->fields['ignoreSearch'] = array('type' => 'textarea', 'default' => '', 'editable' => true, 'label' => __('Mots à ignorer'),\r
'hint' => __('Liste des mots à ignorer séparés par des virgules'), 'grade' => 5);\r
$this->fields['ignoreSearchSeparators'] = array('type' => 'text', 'default' => '', 'editable' => true, 'label' => __('Caractères à ne pas considérer comme un séparateur de mot'));\r
+ $this->fields['textExtraction'] = array('type' => 'combo', 'default' => 'pdfbox', 'editable' => true, 'label' => __("Méthode d'extraction des textes"),\r
+ 'datas' => array(__('PDFBox') => 'pdfbox',\r
+ __('Poppler') => 'poppler'));\r
+\r
$this->forms['search'] = array('label' => __('Moteur de recherche'),\r
- 'fieldsnames' => array('search', 'ignoreSearch', 'ignoreSearchSeparators'));\r
+ 'fieldsnames' => array('search', 'ignoreSearch', 'ignoreSearchSeparators', 'textExtraction'));\r
\r
$this->fields['soundTheme'] = array('type' => 'combo', 'default' => 'classic', 'editable' => true, 'label' => __("Thème sonore"),\r
'datas' => array(__('Pas de son') => '',\r
}\r
\r
if (is_null($page)) {\r
- $pointer = gzopen($this->log . '/commons.log.gz', 'ab');\r
+ $pointer = fopen($this->log . '/commons.log', 'ab');\r
} else {\r
- $pointer = gzopen($this->log . '/p' . $page . '.log.gz', 'ab');\r
+ $pointer = fopen($this->log . '/p' . $page . '.log', 'ab');\r
}\r
\r
- gzwrite($pointer, $c);\r
- gzclose($pointer);\r
+ fwrite($pointer, $c);\r
+ fclose($pointer);\r
}\r
\r
public function __destruct() {\r