From: Louis Jeckel Date: Fri, 11 Sep 2020 11:17:58 +0000 (+0200) Subject: bigger searchable text chunks X-Git-Url: http://git.cubedesigners.com/?a=commitdiff_plain;h=85c09e7ddf4e47a3431de9bdbc11aea06809d6f6;p=psq.git bigger searchable text chunks --- diff --git a/app/PdfFile.php b/app/PdfFile.php index 1579881..8b97a8a 100644 --- a/app/PdfFile.php +++ b/app/PdfFile.php @@ -461,17 +461,39 @@ class PdfFile extends TwillModel implements Sortable foreach(HtmlDomParser::str_get_html($html)->find('div.page') as $page){ $page_i++; + + $buffer = ""; + $count = 0; + foreach ($page->find('p') as $paragraph){ + $text = html_entity_decode($paragraph->plaintext); if(empty($text)) continue; + $text = preg_replace('/^([A-Z]) /', '$1', $text); + $buffer .= ' ' . Str::lower(trim($text)); + + if($count === 10) { + $result[] = [ + 'content' => $buffer, + 'page' => $page_i, + ]; + $count = 0; + } else { + $count++; + } + + } + + if($count > 0) { $result[] = [ - 'content' => Str::lower(trim($text)), + 'content' => $buffer, 'page' => $page_i, ]; } + } $this->searchableTexts()->createMany($result); diff --git a/resources/views/auth/login.blade.php b/resources/views/auth/login.blade.php index c12b97e..79e397b 100644 --- a/resources/views/auth/login.blade.php +++ b/resources/views/auth/login.blade.php @@ -42,7 +42,7 @@
- +