From 5c932b5d0d2b648e09b029ec41172ebe3074fc52 Mon Sep 17 00:00:00 2001 From: Vincent Vanwaelscappel Date: Wed, 21 Aug 2024 17:48:28 +0200 Subject: [PATCH] wip #7048 @1 --- src/Html.php | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/Html.php b/src/Html.php index d6bc149..c14532f 100644 --- a/src/Html.php +++ b/src/Html.php @@ -34,7 +34,11 @@ class Html protected static function _getTextNode(DOMNode $domNode): array { $res = []; + $exclude = ['script', 'style', 'link']; foreach ($domNode->childNodes as $node) { + if (in_array($node->nodeName, $exclude)) { + continue; + } if ($node instanceof \DOMText) { $res[] = $node->nodeValue; } @@ -54,7 +58,15 @@ class Html $doc = new DOMDocument('1.0', 'utf-8'); $doc->loadHTML('' . $string); - $res = static::_getTextNode($doc); + $nodes = static::_getTextNode($doc); + $res = []; + foreach ($nodes as $node) { + $node = trim($node); + if (!$node) { + continue; + } + $res[] = $node; + } return $res; } catch (\Exception $e) { return [$string]; -- 2.39.5