From: vincent@cubedesigners.com Date: Fri, 7 Dec 2012 14:24:01 +0000 (+0000) Subject: (no commit message) X-Git-Url: http://git.cubedesigners.com/?a=commitdiff_plain;h=8ed006589aceb4d98fc07e13b66f93cb9d46c41a;p=cubeextranet.git --- diff --git a/fluidbook/tools/fwstk/lib/commons-io-2.4.jar b/fluidbook/tools/fwstk/lib/commons-io-2.4.jar new file mode 100644 index 000000000..90035a4fe Binary files /dev/null and b/fluidbook/tools/fwstk/lib/commons-io-2.4.jar differ diff --git a/fluidbook/tools/fwstk/nbproject/configs/Make_texts_indices.properties b/fluidbook/tools/fwstk/nbproject/configs/Make_texts_indices.properties new file mode 100644 index 000000000..259a5c6d7 --- /dev/null +++ b/fluidbook/tools/fwstk/nbproject/configs/Make_texts_indices.properties @@ -0,0 +1 @@ +$label=Make texts indices diff --git a/fluidbook/tools/fwstk/nbproject/private/config.properties b/fluidbook/tools/fwstk/nbproject/private/config.properties index 9d6df0f95..65acd2997 100644 --- a/fluidbook/tools/fwstk/nbproject/private/config.properties +++ b/fluidbook/tools/fwstk/nbproject/private/config.properties @@ -1 +1 @@ -config=Extract_texts +config=Make_texts_indices diff --git a/fluidbook/tools/fwstk/nbproject/private/configs/Make_texts_indices.properties b/fluidbook/tools/fwstk/nbproject/private/configs/Make_texts_indices.properties new file mode 100644 index 000000000..8ef236f17 --- /dev/null +++ b/fluidbook/tools/fwstk/nbproject/private/configs/Make_texts_indices.properties @@ -0,0 +1 @@ +application.args=--makeTexts H:\\Documents\\fwstk\\mktexts\\%s10.txt diff --git a/fluidbook/tools/fwstk/nbproject/private/private.xml b/fluidbook/tools/fwstk/nbproject/private/private.xml index 3a048af1e..4ffd82c24 100644 --- a/fluidbook/tools/fwstk/nbproject/private/private.xml +++ b/fluidbook/tools/fwstk/nbproject/private/private.xml @@ -2,13 +2,4 @@ - - file:/H:/Works/cubeExtranet/fluidbook/tools/fwstk/src/com/fluidbook/fwstk/Main.java - file:/H:/Works/cubeExtranet/fluidbook/tools/fwstk/src/cube/util/Array.java - file:/H:/Works/cubeExtranet/fluidbook/tools/fwstk/src/com/fluidbook/fwstk/layout/Word.java - file:/H:/Works/cubeExtranet/fluidbook/tools/fwstk/src/com/fluidbook/fwstk/layout/LayoutStripper.java - file:/H:/Works/cubeExtranet/fluidbook/tools/fwstk/src/com/fluidbook/fwstk/CustomStripper.java - file:/H:/Works/cubeExtranet/fluidbook/tools/fwstk/src/com/fluidbook/fwstk/TextsThread.java - file:/H:/Works/cubeExtranet/fluidbook/tools/fwstk/src/com/fluidbook/fwstk/Link.java - diff --git a/fluidbook/tools/fwstk/nbproject/project.properties b/fluidbook/tools/fwstk/nbproject/project.properties index 1d69fd626..0a8e12808 100644 --- a/fluidbook/tools/fwstk/nbproject/project.properties +++ b/fluidbook/tools/fwstk/nbproject/project.properties @@ -30,6 +30,7 @@ excludes= file.reference.avalon-framework-4.1.4.jar=lib/avalon-framework-4.1.4.jar file.reference.bcmail-jdk16-146.jar=lib/bcmail-jdk16-146.jar file.reference.bcprov-jdk16-146.jar=lib/bcprov-jdk16-146.jar +file.reference.commons-io-2.4.jar=lib\\commons-io-2.4.jar file.reference.commons-logging-1.1.1.jar=lib/commons-logging-1.1.1.jar file.reference.fontbox-1.7.1.jar=lib\\fontbox-1.7.1.jar file.reference.FWSTK_Resources.jar=../FWSTK_Resources/dist/FWSTK_Resources.jar @@ -51,7 +52,8 @@ javac.classpath=\ ${file.reference.fontbox-1.7.1.jar}:\ ${file.reference.jempbox-1.7.1.jar}:\ ${file.reference.pdfbox-1.7.1.jar}:\ - ${file.reference.icu4j-50_1.jar} + ${file.reference.icu4j-50_1.jar}:\ + ${file.reference.commons-io-2.4.jar} # Space-separated list of extra javac options javac.compilerargs= javac.deprecation=false diff --git a/inc/ws/DAO/class.ws.dao.book.php b/inc/ws/DAO/class.ws.dao.book.php index cf31c311b..ee216bcf2 100644 --- a/inc/ws/DAO/class.ws.dao.book.php +++ b/inc/ws/DAO/class.ws.dao.book.php @@ -691,6 +691,12 @@ class wsDAOBook extends commonDAO { public function makeTextsIndexes($book, $pages, &$index, &$textes) { + $prefix = ''; + if ($book->parametres->textExtraction == 'poppler') { + $prefix = 'p'; + } + + $dir = WS_BOOKS . '/index/' . $book->book_id; if ($book->parametres->ignoreSearchSeparators != '') { $dir.='/' . sha1($book->parametres->ignoreSearchSeparators); @@ -699,8 +705,8 @@ class wsDAOBook extends commonDAO { mkdir($dir, 0777, true); } - $ifilec = $dir . '/index.json'; - $tfilec = $dir . '/textes.json'; + $ifilec = $dir . '/' . $prefix . 'index.json'; + $tfilec = $dir . '/' . $prefix . 'textes.json'; if (file_exists($ifilec) && file_exists($tfilec) && (min(filemtime($ifilec), filemtime($tfilec)) >= $book->composition_update)) { $index = file_get_contents($ifilec); @@ -730,8 +736,8 @@ class wsDAOBook extends commonDAO { $index = array(); $textes = array(); foreach ($pages as $book_page => $infos) { - $tfile = WS_DOCS . '/' . $infos['document_id'] . '/p' . $infos['document_page'] . '.txt'; - $ifile = WS_DOCS . '/' . $infos['document_id'] . '/i' . $infos['document_page'] . '.txt'; + $tfile = WS_DOCS . '/' . $infos['document_id'] . '/' . $prefix . 'p' . $infos['document_page'] . '.txt'; + $ifile = WS_DOCS . '/' . $infos['document_id'] . '/' . $prefix . 'i' . $infos['document_page'] . '.txt'; $text = file_get_contents($tfile); $ipage = file_get_contents($ifile); diff --git a/inc/ws/Metier/class.ws.book.parametres.php b/inc/ws/Metier/class.ws.book.parametres.php index fb316ef1f..1beb811cf 100644 --- a/inc/ws/Metier/class.ws.book.parametres.php +++ b/inc/ws/Metier/class.ws.book.parametres.php @@ -123,8 +123,12 @@ class wsBookParametres extends wsParametres { $this->fields['ignoreSearch'] = array('type' => 'textarea', 'default' => '', 'editable' => true, 'label' => __('Mots à ignorer'), 'hint' => __('Liste des mots à ignorer séparés par des virgules'), 'grade' => 5); $this->fields['ignoreSearchSeparators'] = array('type' => 'text', 'default' => '', 'editable' => true, 'label' => __('Caractères à ne pas considérer comme un séparateur de mot')); + $this->fields['textExtraction'] = array('type' => 'combo', 'default' => 'pdfbox', 'editable' => true, 'label' => __("Méthode d'extraction des textes"), + 'datas' => array(__('PDFBox') => 'pdfbox', + __('Poppler') => 'poppler')); + $this->forms['search'] = array('label' => __('Moteur de recherche'), - 'fieldsnames' => array('search', 'ignoreSearch', 'ignoreSearchSeparators')); + 'fieldsnames' => array('search', 'ignoreSearch', 'ignoreSearchSeparators', 'textExtraction')); $this->fields['soundTheme'] = array('type' => 'combo', 'default' => 'classic', 'editable' => true, 'label' => __("Thème sonore"), 'datas' => array(__('Pas de son') => '', diff --git a/inc/ws/Metier/class.ws.document.php b/inc/ws/Metier/class.ws.document.php index da911fd70..0daec3afd 100644 --- a/inc/ws/Metier/class.ws.document.php +++ b/inc/ws/Metier/class.ws.document.php @@ -892,13 +892,13 @@ class wsDocument extends cubeMetier { } if (is_null($page)) { - $pointer = gzopen($this->log . '/commons.log.gz', 'ab'); + $pointer = fopen($this->log . '/commons.log', 'ab'); } else { - $pointer = gzopen($this->log . '/p' . $page . '.log.gz', 'ab'); + $pointer = fopen($this->log . '/p' . $page . '.log', 'ab'); } - gzwrite($pointer, $c); - gzclose($pointer); + fwrite($pointer, $c); + fclose($pointer); } public function __destruct() {