From: Vincent Vanwaelscappel Date: Wed, 21 Aug 2019 18:42:26 +0000 (+0200) Subject: wip #2940 X-Git-Url: http://git.cubedesigners.com/?a=commitdiff_plain;h=9dce5e72081e6627ac930cbf1e069abb734874f8;p=cubist_cms-back.git wip #2940 --- diff --git a/src/app/Console/Commands/SearchIndexCommand.php b/src/app/Console/Commands/SearchIndexCommand.php index 1d5009f..a794f6b 100644 --- a/src/app/Console/Commands/SearchIndexCommand.php +++ b/src/app/Console/Commands/SearchIndexCommand.php @@ -6,6 +6,7 @@ namespace Cubist\Backpack\app\Console\Commands; use Cubist\Backpack\app\Magic\Menu\Menu; use Cubist\Backpack\app\Magic\Menu\PageItem; use Cubist\Backpack\app\Magic\Menu\VirtualItem; +use Cubist\Backpack\app\Magic\Search; use Cubist\Backpack\app\Template\Navigation; use Cubist\Backpack\app\Template\Redirection; use Cviebrock\LaravelElasticsearch\Facade as Elasticsearch; @@ -20,175 +21,8 @@ class SearchIndexCommand extends Command public function handle() { - $index = config('cubist.internal_search_index'); - - try { - Elasticsearch::indices()->delete(['index' => $index]); - } catch (\Exception $e) { - echo $e->getMessage(); - } - - Elasticsearch::indices()->create( - [ - 'index' => $index, - 'body' => [ - 'settings' => - [ - 'analysis' => $this->_french(), - ], - 'mappings' => $this->_typeMapping(true), - ] - ]); - - /** @var Cubist\Backpack\app\Magic\Menu\Item[] $pages */ - $pages = Menu::getNavigation()->findAll(); - - $indexed = []; - - foreach ($pages as $page) { - // Skip nav items - if ($page instanceof VirtualItem) { - continue; - } - // Skip redirection & navigation pages - if ($page instanceof PageItem) { - /** @var PageItem $template */ - $template = $page->getPage()->getUsedTemplate(); - if ($template instanceof Redirection || $template instanceof Navigation) { - continue; - } - } - - $href = $page->getHref(); - if ($href == '#' || $href=='home') { - continue; - } - - $url = action("PageController@catchall", ['page' => $href]); - if (isset($indexed[$url])) { - continue; - } - - $html = @file_get_contents($url); - if (!$html) { - continue; - } - $doc = new DOMSelector($html); - - /** @var \DOMElement $meta */ - $meta = $doc->select('meta[data-search]', false)[0]; - $enabled = $meta->getAttribute('data-search') == '1'; - if (!$enabled) { - continue; - } - $short_title = $meta->getAttribute('data-short-title'); - $keywords = $meta->getAttribute('data-keywords'); - - $body = [ - 'short_title' => $short_title, - 'long_title' => (string)$doc->select('title')[0]['text'], - 'keywords' => $keywords, - 'description' => $doc->select('meta[name="description"]')[0]['attributes']['content'], - 'main' => $doc->getDOM()->saveHTML($doc->select('main', false)->item(0)), - ]; - - $data = [ - 'body' => $body, - 'index' => $index, - 'type' => '_doc', - 'id' => $url, - ]; - - echo $href . ' : ' . $url . "\n"; - $indexed[$url] = true; - - Elasticsearch::index($data); - } + Search::index(); } - protected function _typeMapping($source = true) - { - $res = []; - if ($source) { - $res = ['_source' => [ - 'enabled' => true, - ] - ]; - } - - - $res['properties'] = [ - 'short_title' => $this->_frenchMapping(), - 'long_title' => $this->_frenchMapping(), - 'description' => $this->_frenchMapping(), - 'keywords' => $this->_frenchMapping(), - 'main' => $this->_frenchMapping(), - ]; - - return $res; - - } - - protected function _frenchMapping() - { - return [ - 'type' => 'text', - 'analyzer' => 'french_light', - 'fields' => [ - 'stemmed' => [ - 'type' => 'text', - 'analyzer' => 'french_heavy' - ] - ] - ]; - } - - protected function _french() - { - return [ - "filter" => [ - "french_elision" => [ - "type" => "elision", - "articles_case" => true, - "articles" => ["l", "m", "t", "qu", "n", "s", "j", "d", "c", "jusqu", "quoiqu", "lorsqu", "puisqu"] - ], - "french_synonym" => [ - "type" => "synonym", - "ignore_case" => true, - "expand" => true, - "synonyms" => [ - "salade, laitue", - "mayo, mayonnaise", - "grille, toaste", - 'pmi, pm instrumentation', - ] - ], - "french_stemmer" => [ - "type" => "stemmer", - "language" => "light_french" - ] - ], - "analyzer" => [ - "french_heavy" => [ - "tokenizer" => "icu_tokenizer", - "char_filter" => ["html_strip"], - "filter" => [ - "french_elision", - "icu_folding", - "french_synonym", - "french_stemmer" - ] - ], - "french_light" => [ - "tokenizer" => "icu_tokenizer", - "char_filter" => ["html_strip"], - "filter" => [ - "french_elision", - "icu_folding" - ] - ] - ] - ]; - } } diff --git a/src/app/Magic/Search.php b/src/app/Magic/Search.php new file mode 100644 index 0000000..9e1893f --- /dev/null +++ b/src/app/Magic/Search.php @@ -0,0 +1,226 @@ +delete(['index' => $index]); + } catch (\Exception $e) { + echo $e->getMessage(); + } + + Elasticsearch::indices()->create( + [ + 'index' => $index, + 'body' => [ + 'settings' => + [ + 'analysis' => self::_french(), + ], + 'mappings' => self::_typeMapping(true), + ] + ]); + + /** @var Cubist\Backpack\app\Magic\Menu\Item[] $pages */ + $pages = Menu::getNavigation()->findAll(); + + $indexed = []; + + foreach ($pages as $page) { + // Skip nav items + if ($page instanceof VirtualItem) { + continue; + } + // Skip redirection & navigation pages + if ($page instanceof PageItem) { + /** @var PageItem $template */ + $template = $page->getPage()->getUsedTemplate(); + if ($template instanceof Redirection || $template instanceof Navigation) { + continue; + } + } + + $href = $page->getHref(); + if ($href == '#' || $href == 'home') { + continue; + } + + $url = action("PageController@catchall", ['page' => $href]); + if (isset($indexed[$url])) { + continue; + } + + $html = @file_get_contents($url); + if (!$html) { + continue; + } + $doc = new DOMSelector($html); + + /** @var \DOMElement $meta */ + $meta = $doc->select('meta[data-search]', false)[0]; + $enabled = $meta->getAttribute('data-search') == '1'; + if (!$enabled) { + continue; + } + $short_title = $meta->getAttribute('data-short-title'); + $keywords = $meta->getAttribute('data-keywords'); + + $body = [ + 'short_title' => $short_title, + 'long_title' => (string)$doc->select('title')[0]['text'], + 'keywords' => $keywords, + 'description' => $doc->select('meta[name="description"]')[0]['attributes']['content'], + 'main' => $doc->getDOM()->saveHTML($doc->select('main', false)->item(0)), + ]; + + $data = [ + 'body' => $body, + 'index' => $index, + 'type' => '_doc', + 'id' => $url, + ]; + + echo $href . ' : ' . $url . "\n"; + $indexed[$url] = true; + + Elasticsearch::index($data); + } + + } + + + protected static function _typeMapping($source = true) + { + $res = []; + if ($source) { + $res = ['_source' => [ + 'enabled' => true, + ] + ]; + } + + + $res['properties'] = [ + 'short_title' => self::_frenchMapping(), + 'long_title' => self::_frenchMapping(), + 'description' => self::_frenchMapping(), + 'keywords' => self::_frenchMapping(), + 'main' => self::_frenchMapping(), + ]; + + return $res; + + } + + protected static function _frenchMapping() + { + return [ + 'type' => 'text', + 'analyzer' => 'french_light', + 'fields' => [ + 'stemmed' => [ + 'type' => 'text', + 'analyzer' => 'french_heavy' + ] + ] + ]; + } + + protected function _french() + { + return [ + "filter" => [ + "french_elision" => [ + "type" => "elision", + "articles_case" => true, + "articles" => ["l", "m", "t", "qu", "n", "s", "j", "d", "c", "jusqu", "quoiqu", "lorsqu", "puisqu"] + ], + "french_synonym" => [ + "type" => "synonym", + "ignore_case" => true, + "expand" => true, + "synonyms" => [ + "salade, laitue", + "mayo, mayonnaise", + "grille, toaste", + 'pmi, pm instrumentation', + ] + ], + "french_stemmer" => [ + "type" => "stemmer", + "language" => "light_french" + ] + ], + "analyzer" => [ + "french_heavy" => [ + "tokenizer" => "icu_tokenizer", + "char_filter" => ["html_strip"], + "filter" => [ + "french_elision", + "icu_folding", + "french_synonym", + "french_stemmer" + ] + ], + "french_light" => [ + "tokenizer" => "icu_tokenizer", + "char_filter" => ["html_strip"], + "filter" => [ + "french_elision", + "icu_folding" + ] + ] + ] + ]; + } + + public static function query($term, $limit = null) + { + $index = config('cubist.internal_search_index'); + + // Set weight of each field + $fields = ['short_title' => 4, 'long_title' => 1, 'keywords' => 5, 'description' => 1, 'main' => 2]; + + $queryfields = []; + foreach ($fields as $field => $weight) { + $queryfields[] = $field . '^' . $weight; + $queryfields[] = $field . '.stemmed' . '^' . $weight; + } + + $res = \Elasticsearch::search(['index' => $index, + 'body' => [ + //'explain' => true, + 'from' => 0, + 'size' => $limit, + 'query' => [ + 'multi_match' => [ + 'query' => $term, + 'fields' => $queryfields, + ] + ] + ] + ]); + + $hits = []; + foreach ($res['hits']['hits'] as $hit) { + $hits[] = ['url' => $hit['_id'], + 'title' => $hit['_source']['short_title'] + ]; + } + + return $hits; + } +}