]> _ Git - cubist_minisearch.git/commitdiff
wip #4804 @1
authorVincent Vanwaelscappel <vincent@cubedesigners.com>
Tue, 19 Oct 2021 17:43:42 +0000 (19:43 +0200)
committerVincent Vanwaelscappel <vincent@cubedesigners.com>
Tue, 19 Oct 2021 17:43:42 +0000 (19:43 +0200)
.idea/cubist_minisearch.iml
.idea/php.xml
composer.json
src/Document.php
src/Document/PDF.php [new file with mode: 0644]
src/Index.php

index 3bde56061fb85f253549ab2a689f447d104c8923..cf5cda5253fae99ed7957f3c0734e7fb35f39a4d 100644 (file)
@@ -42,7 +42,6 @@
       <excludeFolder url="file://$MODULE_DIR$/vendor/symfony/routing" />
       <excludeFolder url="file://$MODULE_DIR$/vendor/symfony/process" />
       <excludeFolder url="file://$MODULE_DIR$/vendor/symfony/mime" />
-      <excludeFolder url="file://$MODULE_DIR$/vendor/symfony/mailer" />
       <excludeFolder url="file://$MODULE_DIR$/vendor/symfony/http-kernel" />
       <excludeFolder url="file://$MODULE_DIR$/vendor/symfony/http-foundation" />
       <excludeFolder url="file://$MODULE_DIR$/vendor/symfony/polyfill-intl-normalizer" />
       <excludeFolder url="file://$MODULE_DIR$/vendor/graham-campbell/result-type" />
       <excludeFolder url="file://$MODULE_DIR$/vendor/dragonmantank/cron-expression" />
       <excludeFolder url="file://$MODULE_DIR$/vendor/composer" />
+      <excludeFolder url="file://$MODULE_DIR$/vendor/swiftmailer/swiftmailer" />
+      <excludeFolder url="file://$MODULE_DIR$/vendor/cubist/util" />
+      <excludeFolder url="file://$MODULE_DIR$/vendor/cubist/pdf" />
+      <excludeFolder url="file://$MODULE_DIR$/vendor/cubist/net" />
+      <excludeFolder url="file://$MODULE_DIR$/vendor/opis/closure" />
+      <excludeFolder url="file://$MODULE_DIR$/vendor/symfony/polyfill-php73" />
+      <excludeFolder url="file://$MODULE_DIR$/vendor/symfony/polyfill-iconv" />
     </content>
     <orderEntry type="inheritedJdk" />
     <orderEntry type="sourceFolder" forTests="false" />
index 0bb6ec05b7a4f2b30dbef4ab0b4485a570ea416b..4131144c121901e5198e020bc5bb366e9cd0d088 100644 (file)
@@ -40,7 +40,6 @@
       <path value="$PROJECT_DIR$/vendor/symfony/routing" />
       <path value="$PROJECT_DIR$/vendor/symfony/process" />
       <path value="$PROJECT_DIR$/vendor/symfony/mime" />
-      <path value="$PROJECT_DIR$/vendor/symfony/mailer" />
       <path value="$PROJECT_DIR$/vendor/symfony/http-kernel" />
       <path value="$PROJECT_DIR$/vendor/symfony/http-foundation" />
       <path value="$PROJECT_DIR$/vendor/symfony/polyfill-intl-normalizer" />
       <path value="$PROJECT_DIR$/vendor/graham-campbell/result-type" />
       <path value="$PROJECT_DIR$/vendor/dragonmantank/cron-expression" />
       <path value="$PROJECT_DIR$/vendor/composer" />
+      <path value="$PROJECT_DIR$/vendor/swiftmailer/swiftmailer" />
+      <path value="$PROJECT_DIR$/vendor/cubist/util" />
+      <path value="$PROJECT_DIR$/vendor/cubist/pdf" />
+      <path value="$PROJECT_DIR$/vendor/cubist/net" />
+      <path value="$PROJECT_DIR$/vendor/opis/closure" />
+      <path value="$PROJECT_DIR$/vendor/symfony/polyfill-php73" />
+      <path value="$PROJECT_DIR$/vendor/symfony/polyfill-iconv" />
     </include_path>
   </component>
   <component name="PhpProjectSharedConfiguration" php_language_level="7.3" />
index 24d810b54d28f55352d189e583a82f7543cfdb24..7c55158220904a017d13c5ea2364685c603632d8 100644 (file)
   "require": {
     "php": ">=7.3.0",
     "laravel/framework": "^v8.64.0",
-    "ext-json": "*"
+    "ext-json": "*",
+    "cubist/pdf": "dev-master"
+  },
+  "suggest": {
+    "cubist/pdf": "Allows to support PDF indexation"
   },
   "repositories": [
     {
index dc2651d3cc7ade15216debf696a1562f0ad91e6d..203e47be51719d4b35f5218776afd11cd67c846b 100644 (file)
@@ -132,12 +132,19 @@ class Document
     }
 
     /**
-     * @return array
+     * @return array[]
      */
     public function process()
     {
-        //TODO
-        return [];
+        return [$this->getIndividualDocData()];
+    }
+
+    /**
+     * @return string[]
+     */
+    public function getIndividualDocData()
+    {
+        return ['id' => $this->getId(), 'type' => $this->getType(), 'thumb' => $this->getTitle(), 'title' => $this->getTitle(), 'url' => $this->getUrl(), 'text' => $this->getText()];
     }
 
 }
\ No newline at end of file
diff --git a/src/Document/PDF.php b/src/Document/PDF.php
new file mode 100644 (file)
index 0000000..853456e
--- /dev/null
@@ -0,0 +1,42 @@
+<?php
+
+namespace Cubist\Minisearch\Document;
+
+use Cubist\Minisearch\Document;
+use Cubist\PDF\PDFTools;
+
+class PDF extends Document
+{
+    /**
+     * @var string
+     */
+    protected $file;
+
+    public function __construct($file)
+    {
+        if (!class_exists(\Cubist\PDF\PDFTools::class)) {
+            throw new \Exception('Package cubist/pdf is required in order to index PDF documents');
+        }
+    }
+
+    public function process()
+    {
+        $base = $this->getIndividualDocData();
+
+
+        $doc = new \Cubist\PDF\Document($this->file);
+        $doc->processFullTexts();
+
+        $pages = $doc->getPages();
+
+        $res = [];
+        for ($i = 1; $i <= $pages; $i++) {
+            $page = $base;
+            $page['id'] .= '#' . $i;
+            $page['url'] .= '#' . $i;
+            $page['text'] = file_get_contents($doc->getConvertPath() . '/texts/fp' . $i . '.txt');
+            $res[] = $page;
+        }
+        return $res;
+    }
+}
\ No newline at end of file
index 2a286411a41d24c983cf9a5671f2cf849c0dd57b..083c4e6191684ba4af22bc5c3afac0cadef5c786 100644 (file)
@@ -63,7 +63,7 @@ class Index implements ShouldQueue, ShouldBeUnique
     {
         $res = [];
         foreach ($this->documents as $document) {
-            $res[] = $document->process();
+            $res = array_merge($res,$document->process());
         }
         return 'const documents = ' . json_encode($res) . ';';
     }