]> _ Git - cubist_util.git/commitdiff
wip #3373 @1
authorVincent Vanwaelscappel <vincent@cubedesigners.com>
Tue, 28 Jan 2020 17:54:27 +0000 (18:54 +0100)
committerVincent Vanwaelscappel <vincent@cubedesigners.com>
Tue, 28 Jan 2020 17:54:27 +0000 (18:54 +0100)
composer.json
src/Cubist/Util/Text.php

index 269eaf4cc9f2665190dd59dcf0e276c1a75e2718..4ee919387ffe3c335d55c61639565356fbb0ce85 100644 (file)
@@ -29,7 +29,7 @@
     "ext-simplexml": "*",
     "ext-json": "*",
     "ext-iconv": "*",
-    "laminas/laminas-filter": "^2.9",
+    "illuminate/support": "~5.8|^6.0",
     "cubist/net": "dev-master"
   }
 }
index 83821250b3a9cb0cda0038b493fe00bd6ac0e0ef..55a935535b87833776a8d7cc97be37f2362aecd8 100644 (file)
 <?php
 
 namespace Cubist\Util;
+
+use Illuminate\Support\Str;
+
 class Text
 {
 
-       public static function utf8_encode($text, $from = 'ISO-8859-1')
-       {
-               return self::toUTF8($text, $from);
-       }
-
-       public static function toUTF8($str, $encoding = null)
-       {
-               if (!$encoding) {
-                       $encoding = self::detectEncoding($str);
-               }
-
-               $str = iconv($encoding, 'UTF-8//TRANSLIT', $str);
-               return self::removeOddStuff($str);
-       }
-
-       public static function removeOddStuff($str)
-       {
-               $pattern = array();
-               $pattern["'"] = '\x{0092}\x{00b4}\x{0060}\x{2018}\x{2019}';
-               $pattern['oe'] = '\x{009c}';
-               $pattern['...'] = '\x{0085}';
-               $pattern['Oe'] = '\x{008c}';
-               $pattern[' '] = '\x{0096}';
-               $pattern['«'] = '\x{0093}';
-               $pattern['»'] = '\x{0094}';
-
-               foreach ($pattern as $r => $p) {
-                       $str = preg_replace('|[' . $p . ']|u', $r, $str);
-               }
-               return $str;
-       }
-
-       public static function getAccentsPattern()
-       {
-               $pattern = array();
-               $pattern['A'] = '\x{00C0}-\x{00C5}';
-               $pattern['AE'] = '\x{00C6}';
-               $pattern['C'] = '\x{00C7}';
-               $pattern['D'] = '\x{00D0}';
-               $pattern['E'] = '\x{00C8}-\x{00CB}';
-               $pattern['I'] = '\x{00CC}-\x{00CF}';
-               $pattern['N'] = '\x{00D1}';
-               $pattern['O'] = '\x{00D2}-\x{00D6}\x{00D8}';
-               $pattern['OE'] = '\x{0152}';
-               $pattern['S'] = '\x{0160}';
-               $pattern['U'] = '\x{00D9}-\x{00DC}';
-               $pattern['Y'] = '\x{00DD}';
-               $pattern['Z'] = '\x{017D}';
-
-               $pattern['a'] = '\x{00E0}-\x{00E5}';
-               $pattern['ae'] = '\x{00E6}';
-               $pattern['c'] = '\x{00E7}';
-               $pattern['d'] = '\x{00F0}';
-               $pattern['e'] = '\x{00E8}-\x{00EB}';
-               $pattern['i'] = '\x{00EC}-\x{00EF}';
-               $pattern['n'] = '\x{00F1}';
-               $pattern['o'] = '\x{00F2}-\x{00F6}\x{00F8}';
-               $pattern['oe'] = '\x{0153}';
-               $pattern['s'] = '\x{0161}';
-               $pattern['u'] = '\x{00F9}-\x{00FC}';
-               $pattern['y'] = '\x{00FD}\x{00FF}';
-               $pattern['z'] = '\x{017E}';
-
-               $pattern['ss'] = '\x{00DF}';
-               return $pattern;
-       }
-
-       public static function removeAccents($str, $clean = true)
-       {
-               $pattern = self::getAccentsPattern();
-               if ($clean) {
-                       $str = self::cleanUTF8($str);
-                       $del = array('’' => ' ', '”' => ' ', '“' => ' ', '•' => ' ', '…' => ' ', '€' => ' ',
-                               '–' => ' ', '‘' => ' ');
-                       foreach ($del as $d => $p) {
-                               $str = str_replace($d, $p, $str);
-                       }
-               }
-               foreach ($pattern as $r => $p) {
-                       $str = preg_replace('/[' . $p . ']/u', $r, $str);
-               }
-
-               $from = 'o';
-               $to = 'o';
-
-               $str = strtr($str, $from, $to);
-
-               return $str;
-       }
-
-       public static function keepOnlyLettersAndDigits($str)
-       {
-               return self::condenseWhite(preg_replace('|[^0-9A-Za-z]|ui', ' ', self::removeAccents($str)));
-       }
-
-       public static function makeAccentInsensiblePattern($str)
-       {
-               $patterns = self::getAccentsPattern();
-               $chars = preg_split('//ui', $str, -1, PREG_SPLIT_NO_EMPTY);
-               $pattern = '|';
-               foreach ($chars as $char) {
-                       if (isset($patterns[$char])) {
-                               $pattern .= '[';
-                               $pattern .= $char;
-                               $pattern .= $patterns[$char];
-                               $pattern .= ']{1}';
-                       } else {
-                               $pattern .= $char;
-                       }
-               }
-               $pattern .= '|iu';
-               return $pattern;
-       }
-
-       public static function preg_areplace($search, $replace, $subject)
-       {
-               $pattern = self::makeAccentInsensiblePattern($search);
-               return preg_replace($pattern, $replace, $subject);
-       }
-
-       public static function multiExplode($separator, $str, $limit = null)
-       {
-               $seps = array('§', '£', '¤', '#', '¨', '^', '%');
-               foreach ($seps as $sep) {
-                       if (stristr($str, $sep)) {
-                               continue;
-                       }
-                       break;
-               }
-
-               $str = preg_replace('|[' . preg_quote($separator, '-') . ']|', $sep, $str);
-               if (is_null($limit)) {
-                       return explode($sep, $str);
-               } else {
-                       return explode($sep, $str, $limit);
-               }
-       }
-
-       public static function countWords($str)
-       {
-               return count(preg_split('|\s|', $str));
-       }
-
-       public static function explodeNewLines($str)
-       {
-               $str = trim($str);
-               if ($str === '') {
-                       return [];
-               }
-               $str = self::condenseNewLines($str);
-               return preg_split('|\v|', $str);
-       }
-
-       public static function substrWord($str, $words, $end = '', $wordsorig = null)
-       {
-               if (is_null($wordsorig)) {
-                       $wordsorig = $words;
-               }
-
-               $maxchars = $wordsorig * 6;
-
-               $o = self::countWords($str);
-               if ($o <= $words) {
-                       $res = $str;
-                       $addend = false;
-               } else {
-                       $e = self::multiExplode(" \n", $str, $words);
-                       array_pop($e);
-                       $res = implode(' ', $e);
-                       $addend = true;
-               }
-               if (mb_strlen($res) > $maxchars) {
-                       return self::substrWord($str, $words - 1, $end, $wordsorig);
-               }
-
-               if ($addend) {
-                       $res .= $end;
-               }
-
-               return $res;
-       }
-
-       public static function substrWordChars($str, $chars, $end = '')
-       {
-               if (strlen($str) <= $chars) {
-                       return $str;
-               }
-
-               $str = trim(substr($str, 0, $chars));
-               $s = preg_split('|\s+|', $str);
-               array_pop($s);
-               return implode(' ', $s) . $end;
-       }
-
-       public static function ucfirst($str, $lower = false)
-       {
-               if ($lower) {
-                       $str = mb_strtolower($str);
-               }
-               $first = mb_substr($str, 0, 1);
-               $suite = mb_substr($str, 1);
-               return mb_strtoupper($first) . $suite;
-       }
-
-       public static function removeNl($str)
-       {
-               $trans = array("\n" => ' ', "\r" => ' ');
-               $str = strtr($str, $trans);
-               return self::condenseWhite($str);
-       }
-
-       public static function condenseWhite($str)
-       {
-               return preg_replace('|[\s]{2,100}|u', ' ', $str);
-       }
-
-       public static function condenseNewLines($str)
-       {
-               $str = self::normalizeLines($str);
-               $str = preg_replace('|\n{2,100}|', "\n", $str);
-               return $str;
-       }
-
-       public static function html2text($str)
-       {
-               $res = self::strip_tags($str);
-               $res = str_replace('&nbsp;', ' ', $res);
-
-               return $res;
-       }
-
-       public static function strip_tags($str, $allowed_tags = array(), $trim = false)
-       {
-               // return preg_replace('|\<.*\>|uU', '', $str);
-               // http://www.php.net/manual/fr/function.strip-tags.php#86463
-               if (!is_array($allowed_tags)) {
-                       $allowed_tags = !empty($allowed_tags) ? array($allowed_tags) : array();
-               }
-               $tags = implode('|', $allowed_tags);
-
-               if (empty($tags)) {
-                       $tags = '[a-z]+';
-               }
-
-               preg_match_all('@</?\s*(' . $tags . ')(\s+[a-z_]+=(\'[^\']+\'|"[^"]+"))*\s*/?>@i', $str, $matches);
-
-               $full_tags = $matches[0];
-               $tag_names = $matches[1];
-
-               foreach ($full_tags as $i => $full_tag) {
-                       if (!in_array($tag_names[$i], $allowed_tags)) {
-                               if ($trim) {
-                                       unset($full_tags[$i]);
-                               } else {
-                                       $str = str_replace($full_tag, '', $str);
-                               }
-                       }
-               }
-
-               return $trim ? implode('', $full_tags) : $str;
-       }
-
-       public static function str2URL($str, $replace = '-', $exclude_slashs = false, $exclude_dots = false)
-       {
-               if (is_object($str)) {
-                       $str = json_encode($str);
-               }
-               $str = str_replace('&amp;', '&', $str);
-               $str = str_replace(':', ' ', $str);
-               if (!$exclude_slashs) {
-                       $str = str_replace('/', ' ', $str);
-               }
-
-               $str = self::deaccent($str);
-               $str = preg_replace('/[^A-Za-z0-9_\s\'\:\/[\]-]/', '', $str);
-
-               return self::tidyURL($str, true);
-
-       }
-
-       public static function cleanUTF8($str, $replace = '?')
-       {
-               while (($bad_index = self::utf8badFind($str)) !== false) {
-                       $str = substr_replace($str, $replace, $bad_index, 1);
-               }
-               $str = str_replace('\16', $replace, $str);
-               $str = str_replace('\18', $replace, $str);
-               return $str;
-       }
-
-       public static function getChar($code)
-       {
-               $code = trim($code, '&;');
-               return html_entity_decode('&' . $code . ';', ENT_QUOTES, 'UTF-8');
-       }
-
-       public static function randText($length = 300)
-       {
-               $str = 'aeiouy azertyuiopqsdfghjklmwxcvbn eaiouaeiou               ';
-               $list = str_split($str);
-               $nb = strlen($str) - 1;
-               $res = '';
-               for ($i = 0; $i <= $length; $i++) {
-                       $pos = rand(0, $nb);
-                       $res .= $list[$pos];
-               }
-               return $res;
-       }
-
-       public static function splitWordsWithCase($str)
-       {
-               $non_word = '\x{0000}-\x{002F}\x{003A}-\x{0040}\x{005b}-\x{0060}\x{007B}-\x{007E}\x{00A0}-\x{00BF}\s';
-               if (preg_match_all('/([^' . $non_word . ']{3,})/msu', html::clean($str), $match)) {
-                       foreach ($match[1] as $i => $v) {
-                               $match[1][$i] = $v;
-                       }
-                       return $match[1];
-               }
-               return array();
-       }
-
-       public static function find_words_from_list($str, $list)
-       {
-               $words = array_unique(self::splitWordsWithCase($str));;
-               if (is_array($list)) {
-                       $liste = $list;
-               } else {
-                       $liste = array_unique(self::splitWords($list));
-               }
-
-               $l = array();
-               foreach ($words as $ll) {
-                       $lll = self::removeAccents($ll);
-                       $lll = strtolower($lll);
-                       $liste_real[$lll][] = $ll;
-                       $l[] = $lll;
-               }
-
-               $diff = array_intersect($liste, $l);
-               $res = array();
-               if ($diff) {
-                       foreach ($diff as $d) {
-                               $res = array_merge($res, $liste_real[$d]);
-                       }
-                       return $res;
-               }
-               return false;
-       }
-
-       public static function mb_str_split($string)
-       {
-               $stop = mb_strlen($string);
-               $result = array();
-
-               for ($idx = 0; $idx < $stop; $idx++) {
-                       $result[] = mb_substr($string, $idx, 1);
-               }
-
-               return $result;
-       }
-
-       public static function strToArray($str)
-       {
-               return self::mb_str_split($str);
-       }
-
-       public static function utf8ToUnicode($str)
-       {
-               $mState = 0; // cached expected number of octets after the current octet
-               // until the beginning of the next UTF8 character sequence
-               $mUcs4 = 0; // cached Unicode character
-               $mBytes = 1; // cached expected number of octets in the current sequence
-
-               $out = array();
-
-               $len = strlen($str);
-               for ($i = 0; $i < $len; $i++) {
-                       $in = ord($str{$i});
-                       if (0 == $mState) {
-                               // When mState is zero we expect either a US-ASCII character or a
-                               // multi-octet sequence.
-                               if (0 == (0x80 & ($in))) {
-                                       // US-ASCII, pass straight through.
-                                       $out[] = $in;
-                                       $mBytes = 1;
-                               } else if (0xC0 == (0xE0 & ($in))) {
-                                       // First octet of 2 octet sequence
-                                       $mUcs4 = ($in);
-                                       $mUcs4 = ($mUcs4 & 0x1F) << 6;
-                                       $mState = 1;
-                                       $mBytes = 2;
-                               } else if (0xE0 == (0xF0 & ($in))) {
-                                       // First octet of 3 octet sequence
-                                       $mUcs4 = ($in);
-                                       $mUcs4 = ($mUcs4 & 0x0F) << 12;
-                                       $mState = 2;
-                                       $mBytes = 3;
-                               } else if (0xF0 == (0xF8 & ($in))) {
-                                       // First octet of 4 octet sequence
-                                       $mUcs4 = ($in);
-                                       $mUcs4 = ($mUcs4 & 0x07) << 18;
-                                       $mState = 3;
-                                       $mBytes = 4;
-                               } else if (0xF8 == (0xFC & ($in))) {
-                                       /* First octet of 5 octet sequence.
-                                        *
-                                        * This is illegal because the encoded codepoint must be either
-                                        * (a) not the shortest form or
-                                        * (b) outside the Unicode range of 0-0x10FFFF.
-                                        * Rather than trying to resynchronize, we will carry on until the end
-                                        * of the sequence and let the later error handling code catch it.
-                                        */
-                                       $mUcs4 = ($in);
-                                       $mUcs4 = ($mUcs4 & 0x03) << 24;
-                                       $mState = 4;
-                                       $mBytes = 5;
-                               } else if (0xFC == (0xFE & ($in))) {
-                                       // First octet of 6 octet sequence, see comments for 5 octet sequence.
-                                       $mUcs4 = ($in);
-                                       $mUcs4 = ($mUcs4 & 1) << 30;
-                                       $mState = 5;
-                                       $mBytes = 6;
-                               } else {
-                                       /* Current octet is neither in the US-ASCII range nor a legal first
-                                        * octet of a multi-octet sequence.
-                                        */
-                                       return false;
-                               }
-                       } else {
-                               // When mState is non-zero, we expect a continuation of the multi-octet
-                               // sequence
-                               if (0x80 == (0xC0 & ($in))) {
-                                       // Legal continuation.
-                                       $shift = ($mState - 1) * 6;
-                                       $tmp = $in;
-                                       $tmp = ($tmp & 0x0000003F) << $shift;
-                                       $mUcs4 |= $tmp;
-
-                                       if (0 == --$mState) {
-                                               /* End of the multi-octet sequence. mUcs4 now contains the final
-                                                * Unicode codepoint to be output
-                                                *
-                                                * Check for illegal sequences and codepoints.
-                                                */
-                                               // From Unicode 3.1, non-shortest form is illegal
-                                               if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
-                                                       ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
-                                                       ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
-                                                       (4 < $mBytes) ||
-                                                       // From Unicode 3.2, surrogate characters are illegal
-                                                       (($mUcs4 & 0xFFFFF800) == 0xD800) ||
-                                                       // Codepoints outside the Unicode range are illegal
-                                                       ($mUcs4 > 0x10FFFF)
-                                               ) {
-                                                       return false;
-                                               }
-                                               if (0xFEFF != $mUcs4) {
-                                                       // BOM is legal but we don't want to output it
-                                                       $out[] = $mUcs4;
-                                               }
-                                               // initialize UTF8 cache
-                                               $mState = 0;
-                                               $mUcs4 = 0;
-                                               $mBytes = 1;
-                                       }
-                               } else {
-                                       /* ((0xC0 & (*in) != 0x80) && (mState != 0))
-                                        *
-                                        * Incomplete multi-octet sequence.
-                                        */
-                                       return false;
-                               }
-                       }
-               }
-               return $out;
-       }
-
-       /**
-        * Takes an array of ints representing the Unicode characters and returns
-        * a UTF-8 string. Astral planes are supported ie. the ints in the
-        * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
-        * are not allowed.
-        *
-        * Returns false if the input array contains ints that represent
-        * surrogates or are outside the Unicode range.
-        */
-       public static function unicodeToUtf8($arr)
-       {
-               $dest = '';
-               foreach ($arr as $src) {
-                       if ($src < 0) {
-                               return false;
-                       } else if ($src <= 0x007f) {
-                               $dest .= chr($src);
-                       } else if ($src <= 0x07ff) {
-                               $dest .= chr(0xc0 | ($src >> 6));
-                               $dest .= chr(0x80 | ($src & 0x003f));
-                       } else if ($src == 0xFEFF) {
-                               // nop -- zap the BOM
-                       } else if ($src >= 0xD800 && $src <= 0xDFFF) {
-                               // found a surrogate
-                               return false;
-                       } else if ($src <= 0xffff) {
-                               $dest .= chr(0xe0 | ($src >> 12));
-                               $dest .= chr(0x80 | (($src >> 6) & 0x003f));
-                               $dest .= chr(0x80 | ($src & 0x003f));
-                       } else if ($src <= 0x10ffff) {
-                               $dest .= chr(0xf0 | ($src >> 18));
-                               $dest .= chr(0x80 | (($src >> 12) & 0x3f));
-                               $dest .= chr(0x80 | (($src >> 6) & 0x3f));
-                               $dest .= chr(0x80 | ($src & 0x3f));
-                       } else {
-                               // out of range
-                               return false;
-                       }
-               }
-               return $dest;
-       }
-
-       public static function uchr($n)
-       {
-               return self::unicodeToUtf8(array($n));
-       }
-
-       public static function uord($c)
-       {
-               $r = self::utf8ToUnicode($c);
-               return array_shift($r);
-       }
-
-       public static function strcmp($s1, $s2, $ignoreCase = false, $ignoreAccents = false, $trim = false)
-       {
-               if ($trim !== false) {
-                       $s1 = trim($s1, $trim);
-                       $s2 = trim($s2, $trim);
-               }
-               if ($ignoreAccents) {
-                       $s1 = self::removeAccents($s1);
-                       $s2 = self::removeAccents($s2);
-               }
-               if ($ignoreCase) {
-                       $s1 = mb_strtolower($s1);
-                       $s2 = mb_strtolower($s2);
-               }
-
-               return strcmp($s1, $s2);
-       }
-
-       public static function removeNewLines($input)
-       {
-               $res = preg_replace("|\s+|", ' ', $input);
-               return $res;
-       }
-
-       /**
-        *
-        * @param string $str
-        * @param boolean $compact
-        * @return array
-        */
-       public static function splitLines($str, $compact = true)
-       {
-               $str = str_replace("\r\n", "\n", $str);
-               $str = str_replace("\r", "\n", $str);
-               $str = explode("\n", $str);
-
-               if (!$compact) {
-                       return $str;
-               }
-
-               $res = array();
-               foreach ($str as $s) {
-                       $s = trim($s);
-                       if ($s == '') {
-                               continue;
-                       }
-                       $res[] = $s;
-               }
-               return $res;
-       }
-
-       public static function parseUrl($url, $forceScheme = true)
-       {
-               $url = trim($url);
-               if (substr($url, 0, 2) == '//') {
-                       $url = 'http:' . $url;
-               }
-               $res = parse_url($url);
-               if ($forceScheme && !isset($res['scheme'])) {
-                       $url = 'http://' . $url;
-                       $res = parse_url($url);
-               }
-
-               if (isset($res['query'])) {
-                       parse_str($res['query'], $tmp);
-                       $res['query_params'] = $tmp;
-               }
-
-               if (isset($res['path'])) {
-                       $components = explode('/', trim($res['path'], '/'));
-                       $filteredComponents = array();
-                       foreach ($components as $c) {
-                               if ($c == '') {
-                                       continue;
-                               }
-                               $filteredComponents[] = $c;
-                       }
-                       $res['path_components'] = $filteredComponents;
-               }
-               return $res;
-       }
-
-       public static function pluriel($nb, $singulier, $pluriel, $zero = false, $displayNb = true)
-       {
-               $nb = intval($nb);
-               $res = '';
-               if ($displayNb) {
-                       $res .= $nb . ' ';
-               }
-               if ($nb == 0 && $zero) {
-                       return $zero;
-               }
-               if ($nb <= 1) {
-                       $res .= $singulier;
-               } else {
-                       $res .= $pluriel;
-               }
-               return $res;
-       }
-
-       public static function normalizeLines($text, $os = 'nix')
-       {
-               $text = str_replace("\r\n", "\n", $text);
-               $text = str_replace("\r", "\n", $text);
-               if ($os == 'win') {
-                       return str_replace("\n", "\r\n", $text);
-               }
-               return $text;
-       }
-
-       public static function underscoreToCamelCase($str, $upperFirst = false)
-       {
-               $inflector = new Zend_Filter_Inflector(':string');
-               $inflector->addRules(array(':string' => array('Word_UnderscoreToCamelCase')));
-               $str = $inflector->filter(array('string' => $str));
-               if (!$upperFirst) {
-                       $str{0} = mb_strtolower($str{0});
-               }
-               return $str;
-       }
-
-       public static function camelCaseToUnderscore($str)
-       {
-
-               preg_match_all('!([A-Z][A-Z0-9]*(?=$|[A-Z][a-z0-9])|[A-Za-z][a-z0-9]+)!', $str, $matches);
-               $ret = $matches[0];
-               foreach ($ret as &$match) {
-                       $match = $match == strtoupper($match) ? strtolower($match) : lcfirst($match);
-               }
-               return implode('_', $ret);
-       }
-
-       // Stops orphans in HTML by replacing the last space with a &nbsp;
-       public static function preventOrphans($str)
-       {
-
-               $find = ' '; // What to search for
-               $replace = '&nbsp;'; // What to replace it with
-
-               $last_space = strrpos($str, $find); // Find last occurrence in string
-
-               if ($last_space !== false) {
-                       $str = substr_replace($str, $replace, $last_space, strlen($find));
-               }
-
-               // Also replace punctuation that has spaces before it (eg. in French)
-               $punctuations = array(' :', ' !', ' ?', '« ', ' »');
-               $replacements = array("{$replace}:", "{$replace}!", "{$replace}?", "«{$replace}", "{$replace}»");
-               $str = str_replace($punctuations, $replacements, $str);
-
-               return $str;
-       }
-
-       /**
-        * Check email address
-        *
-        * Returns true if $email is a valid email address.
-        *
-        * @copyright Cal Henderson
-        * @license http://creativecommons.org/licenses/by-sa/2.5/ CC-BY-SA
-        * @link http://www.iamcal.com/publish/articles/php/parsing_email/
-        *
-        * @param string $email Email string
-        * @return boolean
-        */
-       public static function isEmail($email)
-       {
-               $qtext = '[^\\x0d\\x22\\x5c\\x80-\\xff]';
-               $dtext = '[^\\x0d\\x5b-\\x5d\\x80-\\xff]';
-               $atom = '[^\\x00-\\x20\\x22\\x28\\x29\\x2c\\x2e\\x3a-\\x3c\\x3e\\x40\\x5b-\\x5d\\x7f-\\xff]+';
-               $quoted_pair = '\\x5c[\\x00-\\x7f]';
-               $domain_literal = "\\x5b($dtext|$quoted_pair)*\\x5d";
-               $quoted_string = "\\x22($qtext|$quoted_pair)*\\x22";
-               $domain_ref = $atom;
-               $sub_domain = "($domain_ref|$domain_literal)";
-               $word = "($atom|$quoted_string)";
-               $domain = "$sub_domain(\\x2e$sub_domain)*";
-               $local_part = "$word(\\x2e$word)*";
-               $addr_spec = "$local_part\\x40$domain";
-
-               return (boolean)preg_match("!^$addr_spec$!", $email);
-       }
-
-       /**
-        * Accents replacement
-        *
-        * Replaces some occidental accentuated characters by their ASCII
-        * representation.
-        *
-        * @param    string $str String to deaccent
-        * @return    string
-        */
-       public static function deaccent($str)
-       {
-               $pattern['A'] = '\x{00C0}-\x{00C5}';
-               $pattern['AE'] = '\x{00C6}';
-               $pattern['C'] = '\x{00C7}';
-               $pattern['D'] = '\x{00D0}';
-               $pattern['E'] = '\x{00C8}-\x{00CB}';
-               $pattern['I'] = '\x{00CC}-\x{00CF}';
-               $pattern['N'] = '\x{00D1}';
-               $pattern['O'] = '\x{00D2}-\x{00D6}\x{00D8}';
-               $pattern['OE'] = '\x{0152}';
-               $pattern['S'] = '\x{0160}';
-               $pattern['U'] = '\x{00D9}-\x{00DC}';
-               $pattern['Y'] = '\x{00DD}';
-               $pattern['Z'] = '\x{017D}';
-
-               $pattern['a'] = '\x{00E0}-\x{00E5}';
-               $pattern['ae'] = '\x{00E6}';
-               $pattern['c'] = '\x{00E7}';
-               $pattern['d'] = '\x{00F0}';
-               $pattern['e'] = '\x{00E8}-\x{00EB}';
-               $pattern['i'] = '\x{00EC}-\x{00EF}';
-               $pattern['n'] = '\x{00F1}';
-               $pattern['o'] = '\x{00F2}-\x{00F6}\x{00F8}';
-               $pattern['oe'] = '\x{0153}';
-               $pattern['s'] = '\x{0161}';
-               $pattern['u'] = '\x{00F9}-\x{00FC}';
-               $pattern['y'] = '\x{00FD}\x{00FF}';
-               $pattern['z'] = '\x{017E}';
-
-               $pattern['ss'] = '\x{00DF}';
-
-               foreach ($pattern as $r => $p) {
-                       $str = preg_replace('/[' . $p . ']/u', $r, $str);
-               }
-
-               return $str;
-       }
-
-       /**
-        * URL cleanup
-        *
-        * @param string $str URL to tidy
-        * @param boolean $keep_slashes Keep slashes in URL
-        * @param boolean $keep_spaces Keep spaces in URL
-        * @return string
-        */
-       public static function tidyURL($str, $keep_slashes = true, $keep_spaces = false)
-       {
-               $str = strip_tags($str);
-               $str = str_replace(array('?', '&', '#', '=', '+', '<', '>', '"', '%'), '', $str);
-               $str = str_replace("'", ' ', $str);
-               $str = preg_replace('/[\s]+/u', ' ', trim($str));
-
-               if (!$keep_slashes) {
-                       $str = str_replace('/', '-', $str);
-               }
-
-               if (!$keep_spaces) {
-                       $str = str_replace(' ', '-', $str);
-               }
-
-               $str = preg_replace('/[-]+/', '-', $str);
-
-               # Remove path changes in URL
-               $str = preg_replace('%^/%', '', $str);
-               $str = preg_replace('%\.+/%', '', $str);
-
-               return $str;
-       }
-
-       /**
-        * Cut string
-        *
-        * Returns a cuted string on spaced at given length $l.
-        *
-        * @param    string $str String to cut
-        * @param    integer $l Length to keep
-        * @return    string
-        */
-       public static function cutString($str, $l)
-       {
-               $s = preg_split('/([\s]+)/u', $str, -1, PREG_SPLIT_DELIM_CAPTURE);
-
-               $res = '';
-               $L = 0;
-
-               if (mb_strlen($s[0]) >= $l) {
-                       return mb_substr($s[0], 0, $l);
-               }
-
-               foreach ($s as $v) {
-                       $L = $L + mb_strlen($v);
-
-                       if ($L > $l) {
-                               break;
-                       } else {
-                               $res .= $v;
-                       }
-               }
-
-               return trim($res);
-       }
-
-       /**
-        * Split words
-        *
-        * Returns an array of words from a given string.
-        *
-        * @param string $str Words to split
-        * @return array
-        */
-       public static function splitWords($str, $minChar = 3)
-       {
-               $non_word = '\x{0000}-\x{002F}\x{003A}-\x{0040}\x{005b}-\x{0060}\x{007B}-\x{007E}\x{00A0}-\x{00BF}\s';
-               if (preg_match_all('/([^' . $non_word . ']{' . $minChar . ',})/msu', html::clean($str), $match)) {
-                       foreach ($match[1] as $i => $v) {
-                               $match[1][$i] = mb_strtolower($v);
-                       }
-                       return $match[1];
-               }
-               return array();
-       }
-
-       /**
-        * Encoding detection
-        *
-        * Returns the encoding (in lowercase) of given $str.
-        *
-        * @param string $str String
-        * @return string
-        */
-       public static function detectEncoding($str)
-       {
-               return strtolower(mb_detect_encoding($str . ' ',
-                       'UTF-8,ISO-8859-1,ISO-8859-2,ISO-8859-3,' .
-                       'ISO-8859-4,ISO-8859-5,ISO-8859-6,ISO-8859-7,ISO-8859-8,' .
-                       'ISO-8859-9,ISO-8859-10,ISO-8859-13,ISO-8859-14,ISO-8859-15'));
-       }
-
-       /**
-        * Find bad UTF8 tokens
-        *
-        * Locates the first bad byte in a UTF-8 string returning it's
-        * byte index in the string
-        * PCRE Pattern to locate bad bytes in a UTF-8 string
-        * Comes from W3 FAQ: Multilingual Forms
-        * Note: modified to include full ASCII range including control chars
-        *
-        * @copyright Harry Fuecks
-        * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html GNU LGPL 2.1
-        * @link http://phputf8.sourceforge.net
-        *
-        * @param string $str String to search
-        * @return integer|false
-        */
-       public static function utf8badFind($str)
-       {
-               $UTF8_BAD =
-                       '([\x00-\x7F]' .                          # ASCII (including control chars)
-                       '|[\xC2-\xDF][\x80-\xBF]' .               # non-overlong 2-byte
-                       '|\xE0[\xA0-\xBF][\x80-\xBF]' .           # excluding overlongs
-                       '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' .    # straight 3-byte
-                       '|\xED[\x80-\x9F][\x80-\xBF]' .           # excluding surrogates
-                       '|\xF0[\x90-\xBF][\x80-\xBF]{2}' .        # planes 1-3
-                       '|[\xF1-\xF3][\x80-\xBF]{3}' .            # planes 4-15
-                       '|\xF4[\x80-\x8F][\x80-\xBF]{2}' .        # plane 16
-                       '|(.{1}))';                              # invalid byte
-               $pos = 0;
-               $badList = array();
-
-               while (preg_match('/' . $UTF8_BAD . '/S', $str, $matches)) {
-                       $bytes = strlen($matches[0]);
-                       if (isset($matches[2])) {
-                               return $pos;
-                       }
-                       $pos += $bytes;
-                       $str = substr($str, $bytes);
-               }
-               return false;
-       }
-
-
-       /**
-        * BOM removal
-        *
-        * Removes BOM from the begining of a string if present.
-        *
-        * @param string $str String to clean
-        * @return string
-        */
-       public static function removeBOM($str)
-       {
-               if (substr_count($str, '')) {
-                       return str_replace('', '', $str);
-               }
-
-               return $str;
-       }
-
-       /**
-        * Quoted printable conversion
-        *
-        * Encodes given str to quoted printable
-        *
-        * @param string $str String to encode
-        * @return string
-        */
-       public static function QPEncode($str)
-       {
-               $res = '';
-
-               foreach (preg_split("/\r?\n/msu", $str) as $line) {
-                       $l = '';
-                       preg_match_all('/./', $line, $m);
-
-                       foreach ($m[0] as $c) {
-                               $a = ord($c);
-
-                               if ($a < 32 || $a == 61 || $a > 126) {
-                                       $c = sprintf('=%02X', $a);
-                               }
-
-                               $l .= $c;
-                       }
-
-                       $res .= $l . "\r\n";
-               }
-               return $res;
-       }
+    public static function utf8_encode($text, $from = 'ISO-8859-1')
+    {
+        return self::toUTF8($text, $from);
+    }
+
+    public static function toUTF8($str, $encoding = null)
+    {
+        if (!$encoding) {
+            $encoding = self::detectEncoding($str);
+        }
+
+        $str = iconv($encoding, 'UTF-8//TRANSLIT', $str);
+        return self::removeOddStuff($str);
+    }
+
+    public static function removeOddStuff($str)
+    {
+        $pattern = array();
+        $pattern["'"] = '\x{0092}\x{00b4}\x{0060}\x{2018}\x{2019}';
+        $pattern['oe'] = '\x{009c}';
+        $pattern['...'] = '\x{0085}';
+        $pattern['Oe'] = '\x{008c}';
+        $pattern[' '] = '\x{0096}';
+        $pattern['«'] = '\x{0093}';
+        $pattern['»'] = '\x{0094}';
+
+        foreach ($pattern as $r => $p) {
+            $str = preg_replace('|[' . $p . ']|u', $r, $str);
+        }
+        return $str;
+    }
+
+    public static function getAccentsPattern()
+    {
+        $pattern = array();
+        $pattern['A'] = '\x{00C0}-\x{00C5}';
+        $pattern['AE'] = '\x{00C6}';
+        $pattern['C'] = '\x{00C7}';
+        $pattern['D'] = '\x{00D0}';
+        $pattern['E'] = '\x{00C8}-\x{00CB}';
+        $pattern['I'] = '\x{00CC}-\x{00CF}';
+        $pattern['N'] = '\x{00D1}';
+        $pattern['O'] = '\x{00D2}-\x{00D6}\x{00D8}';
+        $pattern['OE'] = '\x{0152}';
+        $pattern['S'] = '\x{0160}';
+        $pattern['U'] = '\x{00D9}-\x{00DC}';
+        $pattern['Y'] = '\x{00DD}';
+        $pattern['Z'] = '\x{017D}';
+
+        $pattern['a'] = '\x{00E0}-\x{00E5}';
+        $pattern['ae'] = '\x{00E6}';
+        $pattern['c'] = '\x{00E7}';
+        $pattern['d'] = '\x{00F0}';
+        $pattern['e'] = '\x{00E8}-\x{00EB}';
+        $pattern['i'] = '\x{00EC}-\x{00EF}';
+        $pattern['n'] = '\x{00F1}';
+        $pattern['o'] = '\x{00F2}-\x{00F6}\x{00F8}';
+        $pattern['oe'] = '\x{0153}';
+        $pattern['s'] = '\x{0161}';
+        $pattern['u'] = '\x{00F9}-\x{00FC}';
+        $pattern['y'] = '\x{00FD}\x{00FF}';
+        $pattern['z'] = '\x{017E}';
+
+        $pattern['ss'] = '\x{00DF}';
+        return $pattern;
+    }
+
+    public static function removeAccents($str, $clean = true)
+    {
+        $pattern = self::getAccentsPattern();
+        if ($clean) {
+            $str = self::cleanUTF8($str);
+            $del = array('’' => ' ', '”' => ' ', '“' => ' ', '•' => ' ', '…' => ' ', '€' => ' ',
+                '–' => ' ', '‘' => ' ');
+            foreach ($del as $d => $p) {
+                $str = str_replace($d, $p, $str);
+            }
+        }
+        foreach ($pattern as $r => $p) {
+            $str = preg_replace('/[' . $p . ']/u', $r, $str);
+        }
+
+        $from = 'o';
+        $to = 'o';
+
+        $str = strtr($str, $from, $to);
+
+        return $str;
+    }
+
+    public static function keepOnlyLettersAndDigits($str)
+    {
+        return self::condenseWhite(preg_replace('|[^0-9A-Za-z]|ui', ' ', self::removeAccents($str)));
+    }
+
+    public static function makeAccentInsensiblePattern($str)
+    {
+        $patterns = self::getAccentsPattern();
+        $chars = preg_split('//ui', $str, -1, PREG_SPLIT_NO_EMPTY);
+        $pattern = '|';
+        foreach ($chars as $char) {
+            if (isset($patterns[$char])) {
+                $pattern .= '[';
+                $pattern .= $char;
+                $pattern .= $patterns[$char];
+                $pattern .= ']{1}';
+            } else {
+                $pattern .= $char;
+            }
+        }
+        $pattern .= '|iu';
+        return $pattern;
+    }
+
+    public static function preg_areplace($search, $replace, $subject)
+    {
+        $pattern = self::makeAccentInsensiblePattern($search);
+        return preg_replace($pattern, $replace, $subject);
+    }
+
+    public static function multiExplode($separator, $str, $limit = null)
+    {
+        $seps = array('§', '£', '¤', '#', '¨', '^', '%');
+        foreach ($seps as $sep) {
+            if (stristr($str, $sep)) {
+                continue;
+            }
+            break;
+        }
+
+        $str = preg_replace('|[' . preg_quote($separator, '-') . ']|', $sep, $str);
+        if (is_null($limit)) {
+            return explode($sep, $str);
+        } else {
+            return explode($sep, $str, $limit);
+        }
+    }
+
+    public static function countWords($str)
+    {
+        return count(preg_split('|\s|', $str));
+    }
+
+    public static function explodeNewLines($str)
+    {
+        $str = trim($str);
+        if ($str === '') {
+            return [];
+        }
+        $str = self::condenseNewLines($str);
+        return preg_split('|\v|', $str);
+    }
+
+    public static function substrWord($str, $words, $end = '', $wordsorig = null)
+    {
+        if (is_null($wordsorig)) {
+            $wordsorig = $words;
+        }
+
+        $maxchars = $wordsorig * 6;
+
+        $o = self::countWords($str);
+        if ($o <= $words) {
+            $res = $str;
+            $addend = false;
+        } else {
+            $e = self::multiExplode(" \n", $str, $words);
+            array_pop($e);
+            $res = implode(' ', $e);
+            $addend = true;
+        }
+        if (mb_strlen($res) > $maxchars) {
+            return self::substrWord($str, $words - 1, $end, $wordsorig);
+        }
+
+        if ($addend) {
+            $res .= $end;
+        }
+
+        return $res;
+    }
+
+    public static function substrWordChars($str, $chars, $end = '')
+    {
+        if (strlen($str) <= $chars) {
+            return $str;
+        }
+
+        $str = trim(substr($str, 0, $chars));
+        $s = preg_split('|\s+|', $str);
+        array_pop($s);
+        return implode(' ', $s) . $end;
+    }
+
+    public static function ucfirst($str, $lower = false)
+    {
+        if ($lower) {
+            $str = mb_strtolower($str);
+        }
+        $first = mb_substr($str, 0, 1);
+        $suite = mb_substr($str, 1);
+        return mb_strtoupper($first) . $suite;
+    }
+
+    public static function removeNl($str)
+    {
+        $trans = array("\n" => ' ', "\r" => ' ');
+        $str = strtr($str, $trans);
+        return self::condenseWhite($str);
+    }
+
+    public static function condenseWhite($str)
+    {
+        return preg_replace('|[\s]{2,100}|u', ' ', $str);
+    }
+
+    public static function condenseNewLines($str)
+    {
+        $str = self::normalizeLines($str);
+        $str = preg_replace('|\n{2,100}|', "\n", $str);
+        return $str;
+    }
+
+    public static function html2text($str)
+    {
+        $res = self::strip_tags($str);
+        $res = str_replace('&nbsp;', ' ', $res);
+
+        return $res;
+    }
+
+    public static function strip_tags($str, $allowed_tags = array(), $trim = false)
+    {
+        // return preg_replace('|\<.*\>|uU', '', $str);
+        // http://www.php.net/manual/fr/function.strip-tags.php#86463
+        if (!is_array($allowed_tags)) {
+            $allowed_tags = !empty($allowed_tags) ? array($allowed_tags) : array();
+        }
+        $tags = implode('|', $allowed_tags);
+
+        if (empty($tags)) {
+            $tags = '[a-z]+';
+        }
+
+        preg_match_all('@</?\s*(' . $tags . ')(\s+[a-z_]+=(\'[^\']+\'|"[^"]+"))*\s*/?>@i', $str, $matches);
+
+        $full_tags = $matches[0];
+        $tag_names = $matches[1];
+
+        foreach ($full_tags as $i => $full_tag) {
+            if (!in_array($tag_names[$i], $allowed_tags)) {
+                if ($trim) {
+                    unset($full_tags[$i]);
+                } else {
+                    $str = str_replace($full_tag, '', $str);
+                }
+            }
+        }
+
+        return $trim ? implode('', $full_tags) : $str;
+    }
+
+    public static function str2URL($str, $replace = '-', $exclude_slashs = false, $exclude_dots = false)
+    {
+        if (is_object($str)) {
+            $str = json_encode($str);
+        }
+        $str = str_replace('&amp;', '&', $str);
+        $str = str_replace(':', ' ', $str);
+        if (!$exclude_slashs) {
+            $str = str_replace('/', ' ', $str);
+        }
+
+        $str = self::deaccent($str);
+        $str = preg_replace('/[^A-Za-z0-9_\s\'\:\/[\]-]/', '', $str);
+
+        return self::tidyURL($str, true);
+
+    }
+
+    public static function cleanUTF8($str, $replace = '?')
+    {
+        while (($bad_index = self::utf8badFind($str)) !== false) {
+            $str = substr_replace($str, $replace, $bad_index, 1);
+        }
+        $str = str_replace('\16', $replace, $str);
+        $str = str_replace('\18', $replace, $str);
+        return $str;
+    }
+
+    public static function getChar($code)
+    {
+        $code = trim($code, '&;');
+        return html_entity_decode('&' . $code . ';', ENT_QUOTES, 'UTF-8');
+    }
+
+    public static function randText($length = 300)
+    {
+        $str = 'aeiouy azertyuiopqsdfghjklmwxcvbn eaiouaeiou               ';
+        $list = str_split($str);
+        $nb = strlen($str) - 1;
+        $res = '';
+        for ($i = 0; $i <= $length; $i++) {
+            $pos = rand(0, $nb);
+            $res .= $list[$pos];
+        }
+        return $res;
+    }
+
+    public static function splitWordsWithCase($str)
+    {
+        $non_word = '\x{0000}-\x{002F}\x{003A}-\x{0040}\x{005b}-\x{0060}\x{007B}-\x{007E}\x{00A0}-\x{00BF}\s';
+        if (preg_match_all('/([^' . $non_word . ']{3,})/msu', html::clean($str), $match)) {
+            foreach ($match[1] as $i => $v) {
+                $match[1][$i] = $v;
+            }
+            return $match[1];
+        }
+        return array();
+    }
+
+    public static function find_words_from_list($str, $list)
+    {
+        $words = array_unique(self::splitWordsWithCase($str));;
+        if (is_array($list)) {
+            $liste = $list;
+        } else {
+            $liste = array_unique(self::splitWords($list));
+        }
+
+        $l = array();
+        foreach ($words as $ll) {
+            $lll = self::removeAccents($ll);
+            $lll = strtolower($lll);
+            $liste_real[$lll][] = $ll;
+            $l[] = $lll;
+        }
+
+        $diff = array_intersect($liste, $l);
+        $res = array();
+        if ($diff) {
+            foreach ($diff as $d) {
+                $res = array_merge($res, $liste_real[$d]);
+            }
+            return $res;
+        }
+        return false;
+    }
+
+    public static function mb_str_split($string)
+    {
+        $stop = mb_strlen($string);
+        $result = array();
+
+        for ($idx = 0; $idx < $stop; $idx++) {
+            $result[] = mb_substr($string, $idx, 1);
+        }
+
+        return $result;
+    }
+
+    public static function strToArray($str)
+    {
+        return self::mb_str_split($str);
+    }
+
+    public static function utf8ToUnicode($str)
+    {
+        $mState = 0; // cached expected number of octets after the current octet
+        // until the beginning of the next UTF8 character sequence
+        $mUcs4 = 0; // cached Unicode character
+        $mBytes = 1; // cached expected number of octets in the current sequence
+
+        $out = array();
+
+        $len = strlen($str);
+        for ($i = 0; $i < $len; $i++) {
+            $in = ord($str{$i});
+            if (0 == $mState) {
+                // When mState is zero we expect either a US-ASCII character or a
+                // multi-octet sequence.
+                if (0 == (0x80 & ($in))) {
+                    // US-ASCII, pass straight through.
+                    $out[] = $in;
+                    $mBytes = 1;
+                } else if (0xC0 == (0xE0 & ($in))) {
+                    // First octet of 2 octet sequence
+                    $mUcs4 = ($in);
+                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
+                    $mState = 1;
+                    $mBytes = 2;
+                } else if (0xE0 == (0xF0 & ($in))) {
+                    // First octet of 3 octet sequence
+                    $mUcs4 = ($in);
+                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
+                    $mState = 2;
+                    $mBytes = 3;
+                } else if (0xF0 == (0xF8 & ($in))) {
+                    // First octet of 4 octet sequence
+                    $mUcs4 = ($in);
+                    $mUcs4 = ($mUcs4 & 0x07) << 18;
+                    $mState = 3;
+                    $mBytes = 4;
+                } else if (0xF8 == (0xFC & ($in))) {
+                    /* First octet of 5 octet sequence.
+                     *
+                     * This is illegal because the encoded codepoint must be either
+                     * (a) not the shortest form or
+                     * (b) outside the Unicode range of 0-0x10FFFF.
+                     * Rather than trying to resynchronize, we will carry on until the end
+                     * of the sequence and let the later error handling code catch it.
+                     */
+                    $mUcs4 = ($in);
+                    $mUcs4 = ($mUcs4 & 0x03) << 24;
+                    $mState = 4;
+                    $mBytes = 5;
+                } else if (0xFC == (0xFE & ($in))) {
+                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
+                    $mUcs4 = ($in);
+                    $mUcs4 = ($mUcs4 & 1) << 30;
+                    $mState = 5;
+                    $mBytes = 6;
+                } else {
+                    /* Current octet is neither in the US-ASCII range nor a legal first
+                     * octet of a multi-octet sequence.
+                     */
+                    return false;
+                }
+            } else {
+                // When mState is non-zero, we expect a continuation of the multi-octet
+                // sequence
+                if (0x80 == (0xC0 & ($in))) {
+                    // Legal continuation.
+                    $shift = ($mState - 1) * 6;
+                    $tmp = $in;
+                    $tmp = ($tmp & 0x0000003F) << $shift;
+                    $mUcs4 |= $tmp;
+
+                    if (0 == --$mState) {
+                        /* End of the multi-octet sequence. mUcs4 now contains the final
+                         * Unicode codepoint to be output
+                         *
+                         * Check for illegal sequences and codepoints.
+                         */
+                        // From Unicode 3.1, non-shortest form is illegal
+                        if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
+                            ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
+                            ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
+                            (4 < $mBytes) ||
+                            // From Unicode 3.2, surrogate characters are illegal
+                            (($mUcs4 & 0xFFFFF800) == 0xD800) ||
+                            // Codepoints outside the Unicode range are illegal
+                            ($mUcs4 > 0x10FFFF)
+                        ) {
+                            return false;
+                        }
+                        if (0xFEFF != $mUcs4) {
+                            // BOM is legal but we don't want to output it
+                            $out[] = $mUcs4;
+                        }
+                        // initialize UTF8 cache
+                        $mState = 0;
+                        $mUcs4 = 0;
+                        $mBytes = 1;
+                    }
+                } else {
+                    /* ((0xC0 & (*in) != 0x80) && (mState != 0))
+                     *
+                     * Incomplete multi-octet sequence.
+                     */
+                    return false;
+                }
+            }
+        }
+        return $out;
+    }
+
+    /**
+     * Takes an array of ints representing the Unicode characters and returns
+     * a UTF-8 string. Astral planes are supported ie. the ints in the
+     * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
+     * are not allowed.
+     *
+     * Returns false if the input array contains ints that represent
+     * surrogates or are outside the Unicode range.
+     */
+    public static function unicodeToUtf8($arr)
+    {
+        $dest = '';
+        foreach ($arr as $src) {
+            if ($src < 0) {
+                return false;
+            } else if ($src <= 0x007f) {
+                $dest .= chr($src);
+            } else if ($src <= 0x07ff) {
+                $dest .= chr(0xc0 | ($src >> 6));
+                $dest .= chr(0x80 | ($src & 0x003f));
+            } else if ($src == 0xFEFF) {
+                // nop -- zap the BOM
+            } else if ($src >= 0xD800 && $src <= 0xDFFF) {
+                // found a surrogate
+                return false;
+            } else if ($src <= 0xffff) {
+                $dest .= chr(0xe0 | ($src >> 12));
+                $dest .= chr(0x80 | (($src >> 6) & 0x003f));
+                $dest .= chr(0x80 | ($src & 0x003f));
+            } else if ($src <= 0x10ffff) {
+                $dest .= chr(0xf0 | ($src >> 18));
+                $dest .= chr(0x80 | (($src >> 12) & 0x3f));
+                $dest .= chr(0x80 | (($src >> 6) & 0x3f));
+                $dest .= chr(0x80 | ($src & 0x3f));
+            } else {
+                // out of range
+                return false;
+            }
+        }
+        return $dest;
+    }
+
+    public static function uchr($n)
+    {
+        return self::unicodeToUtf8(array($n));
+    }
+
+    public static function uord($c)
+    {
+        $r = self::utf8ToUnicode($c);
+        return array_shift($r);
+    }
+
+    public static function strcmp($s1, $s2, $ignoreCase = false, $ignoreAccents = false, $trim = false)
+    {
+        if ($trim !== false) {
+            $s1 = trim($s1, $trim);
+            $s2 = trim($s2, $trim);
+        }
+        if ($ignoreAccents) {
+            $s1 = self::removeAccents($s1);
+            $s2 = self::removeAccents($s2);
+        }
+        if ($ignoreCase) {
+            $s1 = mb_strtolower($s1);
+            $s2 = mb_strtolower($s2);
+        }
+
+        return strcmp($s1, $s2);
+    }
+
+    public static function removeNewLines($input)
+    {
+        $res = preg_replace("|\s+|", ' ', $input);
+        return $res;
+    }
+
+    /**
+     *
+     * @param string $str
+     * @param boolean $compact
+     * @return array
+     */
+    public static function splitLines($str, $compact = true)
+    {
+        $str = str_replace("\r\n", "\n", $str);
+        $str = str_replace("\r", "\n", $str);
+        $str = explode("\n", $str);
+
+        if (!$compact) {
+            return $str;
+        }
+
+        $res = array();
+        foreach ($str as $s) {
+            $s = trim($s);
+            if ($s == '') {
+                continue;
+            }
+            $res[] = $s;
+        }
+        return $res;
+    }
+
+    public static function parseUrl($url, $forceScheme = true)
+    {
+        $url = trim($url);
+        if (substr($url, 0, 2) == '//') {
+            $url = 'http:' . $url;
+        }
+        $res = parse_url($url);
+        if ($forceScheme && !isset($res['scheme'])) {
+            $url = 'http://' . $url;
+            $res = parse_url($url);
+        }
+
+        if (isset($res['query'])) {
+            parse_str($res['query'], $tmp);
+            $res['query_params'] = $tmp;
+        }
+
+        if (isset($res['path'])) {
+            $components = explode('/', trim($res['path'], '/'));
+            $filteredComponents = array();
+            foreach ($components as $c) {
+                if ($c == '') {
+                    continue;
+                }
+                $filteredComponents[] = $c;
+            }
+            $res['path_components'] = $filteredComponents;
+        }
+        return $res;
+    }
+
+    public static function pluriel($nb, $singulier, $pluriel, $zero = false, $displayNb = true)
+    {
+        $nb = intval($nb);
+        $res = '';
+        if ($displayNb) {
+            $res .= $nb . ' ';
+        }
+        if ($nb == 0 && $zero) {
+            return $zero;
+        }
+        if ($nb <= 1) {
+            $res .= $singulier;
+        } else {
+            $res .= $pluriel;
+        }
+        return $res;
+    }
+
+    public static function normalizeLines($text, $os = 'nix')
+    {
+        $text = str_replace("\r\n", "\n", $text);
+        $text = str_replace("\r", "\n", $text);
+        if ($os == 'win') {
+            return str_replace("\n", "\r\n", $text);
+        }
+        return $text;
+    }
+
+    public static function underscoreToCamelCase($str, $upperFirst = false)
+    {
+        return Str::camel($str);
+    }
+
+    public static function camelCaseToUnderscore($str)
+    {
+        return Str::snake($str);
+    }
+
+    // Stops orphans in HTML by replacing the last space with a &nbsp;
+    public static function preventOrphans($str)
+    {
+
+        $find = ' '; // What to search for
+        $replace = '&nbsp;'; // What to replace it with
+
+        $last_space = strrpos($str, $find); // Find last occurrence in string
+
+        if ($last_space !== false) {
+            $str = substr_replace($str, $replace, $last_space, strlen($find));
+        }
+
+        // Also replace punctuation that has spaces before it (eg. in French)
+        $punctuations = array(' :', ' !', ' ?', '« ', ' »');
+        $replacements = array("{$replace}:", "{$replace}!", "{$replace}?", "«{$replace}", "{$replace}»");
+        $str = str_replace($punctuations, $replacements, $str);
+
+        return $str;
+    }
+
+    /**
+     * Check email address
+     *
+     * Returns true if $email is a valid email address.
+     *
+     * @param string $email Email string
+     * @return boolean
+     * @link http://www.iamcal.com/publish/articles/php/parsing_email/
+     *
+     * @copyright Cal Henderson
+     * @license http://creativecommons.org/licenses/by-sa/2.5/ CC-BY-SA
+     */
+    public static function isEmail($email)
+    {
+        $qtext = '[^\\x0d\\x22\\x5c\\x80-\\xff]';
+        $dtext = '[^\\x0d\\x5b-\\x5d\\x80-\\xff]';
+        $atom = '[^\\x00-\\x20\\x22\\x28\\x29\\x2c\\x2e\\x3a-\\x3c\\x3e\\x40\\x5b-\\x5d\\x7f-\\xff]+';
+        $quoted_pair = '\\x5c[\\x00-\\x7f]';
+        $domain_literal = "\\x5b($dtext|$quoted_pair)*\\x5d";
+        $quoted_string = "\\x22($qtext|$quoted_pair)*\\x22";
+        $domain_ref = $atom;
+        $sub_domain = "($domain_ref|$domain_literal)";
+        $word = "($atom|$quoted_string)";
+        $domain = "$sub_domain(\\x2e$sub_domain)*";
+        $local_part = "$word(\\x2e$word)*";
+        $addr_spec = "$local_part\\x40$domain";
+
+        return (boolean)preg_match("!^$addr_spec$!", $email);
+    }
+
+    /**
+     * Accents replacement
+     *
+     * Replaces some occidental accentuated characters by their ASCII
+     * representation.
+     *
+     * @param string $str String to deaccent
+     * @return    string
+     */
+    public static function deaccent($str)
+    {
+        $pattern['A'] = '\x{00C0}-\x{00C5}';
+        $pattern['AE'] = '\x{00C6}';
+        $pattern['C'] = '\x{00C7}';
+        $pattern['D'] = '\x{00D0}';
+        $pattern['E'] = '\x{00C8}-\x{00CB}';
+        $pattern['I'] = '\x{00CC}-\x{00CF}';
+        $pattern['N'] = '\x{00D1}';
+        $pattern['O'] = '\x{00D2}-\x{00D6}\x{00D8}';
+        $pattern['OE'] = '\x{0152}';
+        $pattern['S'] = '\x{0160}';
+        $pattern['U'] = '\x{00D9}-\x{00DC}';
+        $pattern['Y'] = '\x{00DD}';
+        $pattern['Z'] = '\x{017D}';
+
+        $pattern['a'] = '\x{00E0}-\x{00E5}';
+        $pattern['ae'] = '\x{00E6}';
+        $pattern['c'] = '\x{00E7}';
+        $pattern['d'] = '\x{00F0}';
+        $pattern['e'] = '\x{00E8}-\x{00EB}';
+        $pattern['i'] = '\x{00EC}-\x{00EF}';
+        $pattern['n'] = '\x{00F1}';
+        $pattern['o'] = '\x{00F2}-\x{00F6}\x{00F8}';
+        $pattern['oe'] = '\x{0153}';
+        $pattern['s'] = '\x{0161}';
+        $pattern['u'] = '\x{00F9}-\x{00FC}';
+        $pattern['y'] = '\x{00FD}\x{00FF}';
+        $pattern['z'] = '\x{017E}';
+
+        $pattern['ss'] = '\x{00DF}';
+
+        foreach ($pattern as $r => $p) {
+            $str = preg_replace('/[' . $p . ']/u', $r, $str);
+        }
+
+        return $str;
+    }
+
+    /**
+     * URL cleanup
+     *
+     * @param string $str URL to tidy
+     * @param boolean $keep_slashes Keep slashes in URL
+     * @param boolean $keep_spaces Keep spaces in URL
+     * @return string
+     */
+    public static function tidyURL($str, $keep_slashes = true, $keep_spaces = false)
+    {
+        $str = strip_tags($str);
+        $str = str_replace(array('?', '&', '#', '=', '+', '<', '>', '"', '%'), '', $str);
+        $str = str_replace("'", ' ', $str);
+        $str = preg_replace('/[\s]+/u', ' ', trim($str));
+
+        if (!$keep_slashes) {
+            $str = str_replace('/', '-', $str);
+        }
+
+        if (!$keep_spaces) {
+            $str = str_replace(' ', '-', $str);
+        }
+
+        $str = preg_replace('/[-]+/', '-', $str);
+
+        # Remove path changes in URL
+        $str = preg_replace('%^/%', '', $str);
+        $str = preg_replace('%\.+/%', '', $str);
+
+        return $str;
+    }
+
+    /**
+     * Cut string
+     *
+     * Returns a cuted string on spaced at given length $l.
+     *
+     * @param string $str String to cut
+     * @param integer $l Length to keep
+     * @return    string
+     */
+    public static function cutString($str, $l)
+    {
+        $s = preg_split('/([\s]+)/u', $str, -1, PREG_SPLIT_DELIM_CAPTURE);
+
+        $res = '';
+        $L = 0;
+
+        if (mb_strlen($s[0]) >= $l) {
+            return mb_substr($s[0], 0, $l);
+        }
+
+        foreach ($s as $v) {
+            $L = $L + mb_strlen($v);
+
+            if ($L > $l) {
+                break;
+            } else {
+                $res .= $v;
+            }
+        }
+
+        return trim($res);
+    }
+
+    /**
+     * Split words
+     *
+     * Returns an array of words from a given string.
+     *
+     * @param string $str Words to split
+     * @return array
+     */
+    public static function splitWords($str, $minChar = 3)
+    {
+        $non_word = '\x{0000}-\x{002F}\x{003A}-\x{0040}\x{005b}-\x{0060}\x{007B}-\x{007E}\x{00A0}-\x{00BF}\s';
+        if (preg_match_all('/([^' . $non_word . ']{' . $minChar . ',})/msu', html::clean($str), $match)) {
+            foreach ($match[1] as $i => $v) {
+                $match[1][$i] = mb_strtolower($v);
+            }
+            return $match[1];
+        }
+        return array();
+    }
+
+    /**
+     * Encoding detection
+     *
+     * Returns the encoding (in lowercase) of given $str.
+     *
+     * @param string $str String
+     * @return string
+     */
+    public static function detectEncoding($str)
+    {
+        return strtolower(mb_detect_encoding($str . ' ',
+            'UTF-8,ISO-8859-1,ISO-8859-2,ISO-8859-3,' .
+            'ISO-8859-4,ISO-8859-5,ISO-8859-6,ISO-8859-7,ISO-8859-8,' .
+            'ISO-8859-9,ISO-8859-10,ISO-8859-13,ISO-8859-14,ISO-8859-15'));
+    }
+
+    /**
+     * Find bad UTF8 tokens
+     *
+     * Locates the first bad byte in a UTF-8 string returning it's
+     * byte index in the string
+     * PCRE Pattern to locate bad bytes in a UTF-8 string
+     * Comes from W3 FAQ: Multilingual Forms
+     * Note: modified to include full ASCII range including control chars
+     *
+     * @param string $str String to search
+     * @return integer|false
+     * @link http://phputf8.sourceforge.net
+     *
+     * @copyright Harry Fuecks
+     * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html GNU LGPL 2.1
+     */
+    public static function utf8badFind($str)
+    {
+        $UTF8_BAD =
+            '([\x00-\x7F]' .                          # ASCII (including control chars)
+            '|[\xC2-\xDF][\x80-\xBF]' .               # non-overlong 2-byte
+            '|\xE0[\xA0-\xBF][\x80-\xBF]' .           # excluding overlongs
+            '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' .    # straight 3-byte
+            '|\xED[\x80-\x9F][\x80-\xBF]' .           # excluding surrogates
+            '|\xF0[\x90-\xBF][\x80-\xBF]{2}' .        # planes 1-3
+            '|[\xF1-\xF3][\x80-\xBF]{3}' .            # planes 4-15
+            '|\xF4[\x80-\x8F][\x80-\xBF]{2}' .        # plane 16
+            '|(.{1}))';                              # invalid byte
+        $pos = 0;
+        $badList = array();
+
+        while (preg_match('/' . $UTF8_BAD . '/S', $str, $matches)) {
+            $bytes = strlen($matches[0]);
+            if (isset($matches[2])) {
+                return $pos;
+            }
+            $pos += $bytes;
+            $str = substr($str, $bytes);
+        }
+        return false;
+    }
+
+
+    /**
+     * BOM removal
+     *
+     * Removes BOM from the begining of a string if present.
+     *
+     * @param string $str String to clean
+     * @return string
+     */
+    public static function removeBOM($str)
+    {
+        if (substr_count($str, '')) {
+            return str_replace('', '', $str);
+        }
+
+        return $str;
+    }
+
+    /**
+     * Quoted printable conversion
+     *
+     * Encodes given str to quoted printable
+     *
+     * @param string $str String to encode
+     * @return string
+     */
+    public static function QPEncode($str)
+    {
+        $res = '';
+
+        foreach (preg_split("/\r?\n/msu", $str) as $line) {
+            $l = '';
+            preg_match_all('/./', $line, $m);
+
+            foreach ($m[0] as $c) {
+                $a = ord($c);
+
+                if ($a < 32 || $a == 61 || $a > 126) {
+                    $c = sprintf('=%02X', $a);
+                }
+
+                $l .= $c;
+            }
+
+            $res .= $l . "\r\n";
+        }
+        return $res;
+    }
 
 }
\ No newline at end of file