$fwstk->setPath(CONVERTER_PATH);
$fwstk->setArg('--input ' . $out . '/crop.pdf');
$fwstk->setArg('--extractTexts ' . $out . '%s%d.txt');
+ $fwstk->setArg('--extractTextsMethod ' . $book->parametres->textExtraction);
if ($book->parametres->ignoreSearchSeparators != '') {
$fwstk->setArg('--ignoreSeparators ' . $book->parametres->ignoreSearchSeparators);
}
- $fwstk->setArg('--threads 4');
+ $fwstk->setArg('--threads 1');
$fwstk->execute();
$res = $fwstk->commande . "\n\n";
exit;
}
+ public static function installWescoVentes() {
+ $exporter = new wsExporter();
+ $exporter->export(15296, $x, 'install_ftp', 'online', "wescogrolj-catalogca:b9uA7U72eW@ftp.cluster023.hosting.ovh.net", '');
+ header('Location: http://www.wesco-group.com/download/Catalogues/2017/Wesco-Rapport-ventes-0-12-ans/');
+ exit;
+ }
+
public static function fixStats($args) {
commonDroits::min(5);
$book = trim($args[0], ' .');
$prefix = '';
if ($book->parametres->textExtraction == 'poppler') {
$prefix = 'p';
+ } else if ($book->parametres->textExtraction == 'fluidbook') {
+ $prefix = 'f';
}
-
$dir = WS_BOOKS . '/index/' . $book->book_id;
if ($book->parametres->ignoreSearchSeparators != '') {
$dir .= '/' . sha1($book->parametres->ignoreSearchSeparators);
$fwstk->setPath(CONVERTER_PATH);
$fwstk->setArg('--input ' . $out . '/crop.pdf');
$fwstk->setArg('--extractTexts ' . $out . '%s%d.txt');
+ $fwstk->setArg('--extractTextsMethod ' . $book->parametres->textExtraction);
$fwstk->setArg('--threads 1');
$fwstk->setArg('--ignoreSeparators ' . $book->parametres->ignoreSearchSeparators);
$fwstk->execute();
if (!file_exists($tfile) || !file_exists($ifile)) {
$daoDoc = new wsDAODocument($core->con);
- $doc = $daoDoc->selectById($infos['document_id']);
- $doc->getLinksAndTexts();
+ $out = wsDocument::getDir($infos['document_id']);
+
+ $fwstk = new cubeCommandLine('fwstk.sh');
+ $fwstk->setPath(CONVERTER_PATH);
+ $fwstk->setArg('--input ' . $out . '/crop.pdf');
+ $fwstk->setArg('--extractTexts ' . $out . '%s%d.txt');
+ $fwstk->setArg('--extractTextsMethod ' . $book->parametres->textExtraction);
+ if ($book->parametres->ignoreSearchSeparators != '') {
+ $fwstk->setArg('--ignoreSeparators ' . $book->parametres->ignoreSearchSeparators);
+ }
+ $fwstk->setArg('--threads 1');
+ $fwstk->execute();
}
CubeIT_Util_Gzip::compressIfNotCompressed($tfile);
$this->fields['ignoreWordLimit'] = ['type' => 'integer', 'default' => 3, 'editable' => true, 'label' => __('Ignorer les mots de moins de X caractères')];
$this->fields['textExtraction'] = array('type' => 'combo', 'default' => 'pdfbox', 'editable' => true, 'label' => __("Méthode d'extraction des textes"),
'datas' => array(__('PDFBox') => 'pdfbox',
- __('Poppler') => 'poppler'));
+ __('Poppler') => 'poppler',
+ __('Fluidbook (expérimentation basée sur PDFBox)') => 'fluidbook'));
$this->fields['searchShowNoResultsPages'] = array('type' => 'boolean', 'default' => false, 'editable' => true, 'label' => __("Afficher les pages sans résultats"));
$this->fields['searchPageSelectionAlgorithm'] = array('type' => 'combo', 'editable' => true, 'default' => 'AND',
'datas' => [__('les double-pages qui contiennent tous les mots recherchés (ET logique)') => 'AND', __('les double-page qui contiennent un des mots recherché (OU logique)') => 'OR'], 'label' => __('Algorithme de sélection des résultats'));
$this->fields['scorm_variables'] = array('type' => 'textarea', 'default' => '', 'editable' => true, 'label' => __('Variables SCORM'), 'grade' => 5, 'hint' => __('Laisser vide pour utiliser le titre de la publication'));
$this->forms['scorm'] = array('label' => __('SCORM'),
- 'fieldsnames' => array('scorm_enable', 'scorm_id', 'scorm_org', 'scorm_title','scorm_variables'));
+ 'fieldsnames' => array('scorm_enable', 'scorm_id', 'scorm_org', 'scorm_title', 'scorm_variables'));
}
}
if ($this->CropAndCut()) {
$this->getInfos($this->cropped, true);
}
- $this->getLinksAndTexts();
+ $this->getLinks();
}
public function CropAndCut() {
}
}
- public function getLinksAndTexts() {
+ public function getLinks() {
$fwstk = new cubeCommandLine('fwstk.sh');
$fwstk->setPath(CONVERTER_PATH);
$fwstk->setArg('--input ' . $this->cropped);
- $fwstk->setArg('--extractTexts ' . $this->out . '%s%d.txt');
$fwstk->setArg('--extractLinks ' . $this->out . 'p%d.csv');
$fwstk->setArg('--threads 1');
$fwstk->execute();