]> _ Git - cubist_cms-back.git/commitdiff
wip #3501 @0.5
authorVincent Vanwaelscappel <vincent@cubedesigners.com>
Wed, 18 Mar 2020 17:43:43 +0000 (18:43 +0100)
committerVincent Vanwaelscappel <vincent@cubedesigners.com>
Wed, 18 Mar 2020 17:43:43 +0000 (18:43 +0100)
src/app/Magic/Search.php

index 70e5bd2bdc6474619ebf10536237e2acc1b1e215..b3b7a2354af2a2c58e06da7bc42d2e18396e9b23 100644 (file)
@@ -10,8 +10,8 @@ use Cviebrock\LaravelElasticsearch\Facade as Elasticsearch;
 
 class Search
 {
-    protected static $_mappings = ['fr' =>
-        [
+    protected static $_mappings = [
+        'fr' => [
             'type' => 'text',
             'analyzer' => 'french_light',
             'fields' => [
@@ -33,7 +33,13 @@ class Search
         ],
         'de' => [
             'type' => 'text',
-            'analyser' => 'german'
+            'analyzer' => 'german_light',
+            'fields' => [
+                'stemmed' => [
+                    'type' => 'text',
+                    'analyzer' => 'german_heavy'
+                ]
+            ]
         ],
         'es' => [
             'type' => 'text',
@@ -51,79 +57,113 @@ class Search
 
     protected static $_analysis = ['fr' =>
         [
-            "filter" => [
-                "french_elision" => [
-                    "type" => "elision",
-                    "articles_case" => true,
-                    "articles" => ["l", "m", "t", "qu", "n", "s", "j", "d", "c", "jusqu", "quoiqu", "lorsqu", "puisqu"]
+            'filter' => [
+                'french_elision' => [
+                    'type' => 'elision',
+                    'articles_case' => true,
+                    'articles' => ['l', 'm', 't', 'qu', 'n', 's', 'j', 'd', 'c', 'jusqu', 'quoiqu', 'lorsqu', 'puisqu']
                 ],
-                "french_synonym" => [
-                    "type" => "synonym",
-                    "ignore_case" => true,
-                    "expand" => true,
-                    "synonyms" => [
-                        "salade, laitue",
-                        "mayo, mayonnaise",
-                        "grille, toaste",
+                'french_synonym' => [
+                    'type' => 'synonym',
+                    'ignore_case' => true,
+                    'expand' => true,
+                    'synonyms' => [
+                        'salade, laitue',
+                        'mayo, mayonnaise',
+                        'grille, toaste',
                         'pmi, pm instrumentation',
                     ]
                 ],
-                "french_stemmer" => [
-                    "type" => "stemmer",
-                    "language" => "light_french"
+                'french_stemmer' => [
+                    'type' => 'stemmer',
+                    'language' => 'light_french'
                 ]
             ],
-            "analyzer" => [
-                "french_heavy" => [
-                    "tokenizer" => "icu_tokenizer",
-                    "char_filter" => ["html_strip"],
-                    "filter" => [
-                        "french_elision",
-                        "icu_folding",
-                        "french_synonym",
-                        "french_stemmer"
+            'analyzer' => [
+                'french_heavy' => [
+                    'tokenizer' => 'icu_tokenizer',
+                    'char_filter' => ['html_strip'],
+                    'filter' => [
+                        'french_elision',
+                        'icu_folding',
+                        'french_synonym',
+                        'french_stemmer'
                     ]
                 ],
-                "french_light" => [
-                    "tokenizer" => "icu_tokenizer",
-                    "char_filter" => ["html_strip"],
-                    "filter" => [
-                        "french_elision",
-                        "icu_folding"
+                'french_light' => [
+                    'tokenizer' => 'icu_tokenizer',
+                    'char_filter' => ['html_strip'],
+                    'filter' => [
+                        'french_elision',
+                        'icu_folding'
                     ]
                 ]
             ]
         ],
         'en' => [
-            "filter" => [
-                "english_synonym" => [
-                    "type" => "synonym",
-                    "ignore_case" => true,
-                    "expand" => true,
-                    "synonyms" => [
+            'filter' => [
+                'english_synonym' => [
+                    'type' => 'synonym',
+                    'ignore_case' => true,
+                    'expand' => true,
+                    'synonyms' => [
                         'pmi, pm instrumentation',
                     ]
                 ],
-                "english_stemmer" => [
-                    "type" => "stemmer",
-                    "language" => "light_english"
+                'english_stemmer' => [
+                    'type' => 'stemmer',
+                    'language' => 'light_english'
                 ]
             ],
-            "analyzer" => [
-                "english_heavy" => [
-                    "tokenizer" => "icu_tokenizer",
-                    "char_filter" => ["html_strip"],
-                    "filter" => [
-                        "icu_folding",
-                        "english_synonym",
-                        "english_stemmer"
+            'analyzer' => [
+                'english_heavy' => [
+                    'tokenizer' => 'icu_tokenizer',
+                    'char_filter' => ['html_strip'],
+                    'filter' => [
+                        'icu_folding',
+                        'english_synonym',
+                        'english_stemmer'
                     ]
                 ],
-                "english_light" => [
-                    "tokenizer" => "icu_tokenizer",
-                    "char_filter" => ["html_strip"],
-                    "filter" => [
-                        "icu_folding"
+                'english_light' => [
+                    'tokenizer' => 'icu_tokenizer',
+                    'char_filter' => ['html_strip'],
+                    'filter' => [
+                        'icu_folding'
+                    ]
+                ]
+            ]
+        ],
+        'de' => [
+            'filter' => [
+                'german_synonym' => [
+                    'type' => 'synonym',
+                    'ignore_case' => true,
+                    'expand' => true,
+                    'synonyms' => [
+                        'pmi, pm instrumentation',
+                    ]
+                ],
+                'german_stemmer' => [
+                    'type' => 'stemmer',
+                    'language' => 'light_german'
+                ]
+            ],
+            'analyzer' => [
+                'german_heavy' => [
+                    'tokenizer' => 'icu_tokenizer',
+                    'char_filter' => ['html_strip'],
+                    'filter' => [
+                        'icu_folding',
+                        'german_synonym',
+                        'german_stemmer'
+                    ]
+                ],
+                'german_light' => [
+                    'tokenizer' => 'icu_tokenizer',
+                    'char_filter' => ['html_strip'],
+                    'filter' => [
+                        'icu_folding'
                     ]
                 ]
             ]
@@ -158,6 +198,7 @@ class Search
     {
         $index = self::_getIndexKey($locale, $variant);
 
+
         try {
             Elasticsearch::indices()->delete(['index' => $index]);
         } catch (\Exception $e) {
@@ -213,13 +254,14 @@ class Search
                 'main' => $doc->getDOM()->saveHTML($doc->select('main', false)->item(0)),
                 'breadcrumbs' => $breadcrumbs,
                 'type' => $type,
+                'url' => $url,
             ];
 
             $data = [
                 'body' => $body,
                 'index' => $index,
                 'type' => '_doc',
-                'id' => $url,
+                'id' => hash('sha256', $url),
             ];
 
             echo 'Indexing ' . $variant . ' - ' . $locale . ' | ' . $url . "\n";
@@ -319,7 +361,7 @@ class Search
 
         $hits = [];
         foreach ($res['hits']['hits'] as $hit) {
-            $hits[] = ['url' => $hit['_id'],
+            $hits[] = ['url' => $hit['_source']['url'],
                 'title' => $hit['_source']['short_title'],
                 'breadcrumbs' => json_decode($hit['_source']['breadcrumbs']),
             ];