class Search
{
- protected static $_mappings = ['fr' =>
- [
+ protected static $_mappings = [
+ 'fr' => [
'type' => 'text',
'analyzer' => 'french_light',
'fields' => [
],
'de' => [
'type' => 'text',
- 'analyser' => 'german'
+ 'analyzer' => 'german_light',
+ 'fields' => [
+ 'stemmed' => [
+ 'type' => 'text',
+ 'analyzer' => 'german_heavy'
+ ]
+ ]
],
'es' => [
'type' => 'text',
protected static $_analysis = ['fr' =>
[
- "filter" => [
- "french_elision" => [
- "type" => "elision",
- "articles_case" => true,
- "articles" => ["l", "m", "t", "qu", "n", "s", "j", "d", "c", "jusqu", "quoiqu", "lorsqu", "puisqu"]
+ 'filter' => [
+ 'french_elision' => [
+ 'type' => 'elision',
+ 'articles_case' => true,
+ 'articles' => ['l', 'm', 't', 'qu', 'n', 's', 'j', 'd', 'c', 'jusqu', 'quoiqu', 'lorsqu', 'puisqu']
],
- "french_synonym" => [
- "type" => "synonym",
- "ignore_case" => true,
- "expand" => true,
- "synonyms" => [
- "salade, laitue",
- "mayo, mayonnaise",
- "grille, toaste",
+ 'french_synonym' => [
+ 'type' => 'synonym',
+ 'ignore_case' => true,
+ 'expand' => true,
+ 'synonyms' => [
+ 'salade, laitue',
+ 'mayo, mayonnaise',
+ 'grille, toaste',
'pmi, pm instrumentation',
]
],
- "french_stemmer" => [
- "type" => "stemmer",
- "language" => "light_french"
+ 'french_stemmer' => [
+ 'type' => 'stemmer',
+ 'language' => 'light_french'
]
],
- "analyzer" => [
- "french_heavy" => [
- "tokenizer" => "icu_tokenizer",
- "char_filter" => ["html_strip"],
- "filter" => [
- "french_elision",
- "icu_folding",
- "french_synonym",
- "french_stemmer"
+ 'analyzer' => [
+ 'french_heavy' => [
+ 'tokenizer' => 'icu_tokenizer',
+ 'char_filter' => ['html_strip'],
+ 'filter' => [
+ 'french_elision',
+ 'icu_folding',
+ 'french_synonym',
+ 'french_stemmer'
]
],
- "french_light" => [
- "tokenizer" => "icu_tokenizer",
- "char_filter" => ["html_strip"],
- "filter" => [
- "french_elision",
- "icu_folding"
+ 'french_light' => [
+ 'tokenizer' => 'icu_tokenizer',
+ 'char_filter' => ['html_strip'],
+ 'filter' => [
+ 'french_elision',
+ 'icu_folding'
]
]
]
],
'en' => [
- "filter" => [
- "english_synonym" => [
- "type" => "synonym",
- "ignore_case" => true,
- "expand" => true,
- "synonyms" => [
+ 'filter' => [
+ 'english_synonym' => [
+ 'type' => 'synonym',
+ 'ignore_case' => true,
+ 'expand' => true,
+ 'synonyms' => [
'pmi, pm instrumentation',
]
],
- "english_stemmer" => [
- "type" => "stemmer",
- "language" => "light_english"
+ 'english_stemmer' => [
+ 'type' => 'stemmer',
+ 'language' => 'light_english'
]
],
- "analyzer" => [
- "english_heavy" => [
- "tokenizer" => "icu_tokenizer",
- "char_filter" => ["html_strip"],
- "filter" => [
- "icu_folding",
- "english_synonym",
- "english_stemmer"
+ 'analyzer' => [
+ 'english_heavy' => [
+ 'tokenizer' => 'icu_tokenizer',
+ 'char_filter' => ['html_strip'],
+ 'filter' => [
+ 'icu_folding',
+ 'english_synonym',
+ 'english_stemmer'
]
],
- "english_light" => [
- "tokenizer" => "icu_tokenizer",
- "char_filter" => ["html_strip"],
- "filter" => [
- "icu_folding"
+ 'english_light' => [
+ 'tokenizer' => 'icu_tokenizer',
+ 'char_filter' => ['html_strip'],
+ 'filter' => [
+ 'icu_folding'
+ ]
+ ]
+ ]
+ ],
+ 'de' => [
+ 'filter' => [
+ 'german_synonym' => [
+ 'type' => 'synonym',
+ 'ignore_case' => true,
+ 'expand' => true,
+ 'synonyms' => [
+ 'pmi, pm instrumentation',
+ ]
+ ],
+ 'german_stemmer' => [
+ 'type' => 'stemmer',
+ 'language' => 'light_german'
+ ]
+ ],
+ 'analyzer' => [
+ 'german_heavy' => [
+ 'tokenizer' => 'icu_tokenizer',
+ 'char_filter' => ['html_strip'],
+ 'filter' => [
+ 'icu_folding',
+ 'german_synonym',
+ 'german_stemmer'
+ ]
+ ],
+ 'german_light' => [
+ 'tokenizer' => 'icu_tokenizer',
+ 'char_filter' => ['html_strip'],
+ 'filter' => [
+ 'icu_folding'
]
]
]
{
$index = self::_getIndexKey($locale, $variant);
+
try {
Elasticsearch::indices()->delete(['index' => $index]);
} catch (\Exception $e) {
'main' => $doc->getDOM()->saveHTML($doc->select('main', false)->item(0)),
'breadcrumbs' => $breadcrumbs,
'type' => $type,
+ 'url' => $url,
];
$data = [
'body' => $body,
'index' => $index,
'type' => '_doc',
- 'id' => $url,
+ 'id' => hash('sha256', $url),
];
echo 'Indexing ' . $variant . ' - ' . $locale . ' | ' . $url . "\n";
$hits = [];
foreach ($res['hits']['hits'] as $hit) {
- $hits[] = ['url' => $hit['_id'],
+ $hits[] = ['url' => $hit['_source']['url'],
'title' => $hit['_source']['short_title'],
'breadcrumbs' => json_decode($hit['_source']['breadcrumbs']),
];