--- /dev/null
+<?php
+
+namespace Cubist\Util;
+class Text
+{
+
+ public static function utf8_encode($text, $from = 'ISO-8859-1')
+ {
+ return self::toUTF8($text, $from);
+ }
+
+ public static function toUTF8($str, $encoding = null)
+ {
+ if (!$encoding) {
+ $encoding = self::detectEncoding($str);
+ }
+
+ $str = iconv($encoding, 'UTF-8//TRANSLIT', $str);
+ return self::removeOddStuff($str);
+ }
+
+ public static function removeOddStuff($str)
+ {
+ $pattern = array();
+ $pattern["'"] = '\x{0092}\x{00b4}\x{0060}\x{2018}\x{2019}';
+ $pattern['oe'] = '\x{009c}';
+ $pattern['...'] = '\x{0085}';
+ $pattern['Oe'] = '\x{008c}';
+ $pattern[' '] = '\x{0096}';
+ $pattern['«'] = '\x{0093}';
+ $pattern['»'] = '\x{0094}';
+
+ foreach ($pattern as $r => $p) {
+ $str = preg_replace('|[' . $p . ']|u', $r, $str);
+ }
+ return $str;
+ }
+
+ public static function getAccentsPattern()
+ {
+ $pattern = array();
+ $pattern['A'] = '\x{00C0}-\x{00C5}';
+ $pattern['AE'] = '\x{00C6}';
+ $pattern['C'] = '\x{00C7}';
+ $pattern['D'] = '\x{00D0}';
+ $pattern['E'] = '\x{00C8}-\x{00CB}';
+ $pattern['I'] = '\x{00CC}-\x{00CF}';
+ $pattern['N'] = '\x{00D1}';
+ $pattern['O'] = '\x{00D2}-\x{00D6}\x{00D8}';
+ $pattern['OE'] = '\x{0152}';
+ $pattern['S'] = '\x{0160}';
+ $pattern['U'] = '\x{00D9}-\x{00DC}';
+ $pattern['Y'] = '\x{00DD}';
+ $pattern['Z'] = '\x{017D}';
+
+ $pattern['a'] = '\x{00E0}-\x{00E5}';
+ $pattern['ae'] = '\x{00E6}';
+ $pattern['c'] = '\x{00E7}';
+ $pattern['d'] = '\x{00F0}';
+ $pattern['e'] = '\x{00E8}-\x{00EB}';
+ $pattern['i'] = '\x{00EC}-\x{00EF}';
+ $pattern['n'] = '\x{00F1}';
+ $pattern['o'] = '\x{00F2}-\x{00F6}\x{00F8}';
+ $pattern['oe'] = '\x{0153}';
+ $pattern['s'] = '\x{0161}';
+ $pattern['u'] = '\x{00F9}-\x{00FC}';
+ $pattern['y'] = '\x{00FD}\x{00FF}';
+ $pattern['z'] = '\x{017E}';
+
+ $pattern['ss'] = '\x{00DF}';
+ return $pattern;
+ }
+
+ public static function removeAccents($str, $clean = true)
+ {
+ $pattern = self::getAccentsPattern();
+ if ($clean) {
+ $str = self::cleanUTF8($str);
+ $del = array('’' => ' ', '”' => ' ', '“' => ' ', '•' => ' ', '…' => ' ', '€' => ' ',
+ '–' => ' ', '‘' => ' ');
+ foreach ($del as $d => $p) {
+ $str = str_replace($d, $p, $str);
+ }
+ }
+ foreach ($pattern as $r => $p) {
+ $str = preg_replace('/[' . $p . ']/u', $r, $str);
+ }
+
+ $from = 'o';
+ $to = 'o';
+
+ $str = strtr($str, $from, $to);
+
+ return $str;
+ }
+
+ public static function keepOnlyLettersAndDigits($str)
+ {
+ return self::condenseWhite(preg_replace('|[^0-9A-Za-z]|ui', ' ', self::removeAccents($str)));
+ }
+
+ public static function makeAccentInsensiblePattern($str)
+ {
+ $patterns = self::getAccentsPattern();
+ $chars = preg_split('//ui', $str, -1, PREG_SPLIT_NO_EMPTY);
+ $pattern = '|';
+ foreach ($chars as $char) {
+ if (isset($patterns[$char])) {
+ $pattern .= '[';
+ $pattern .= $char;
+ $pattern .= $patterns[$char];
+ $pattern .= ']{1}';
+ } else {
+ $pattern .= $char;
+ }
+ }
+ $pattern .= '|iu';
+ return $pattern;
+ }
+
+ public static function preg_areplace($search, $replace, $subject)
+ {
+ $pattern = self::makeAccentInsensiblePattern($search);
+ return preg_replace($pattern, $replace, $subject);
+ }
+
+ public static function multiExplode($separator, $str, $limit = null)
+ {
+ $seps = array('§', '£', '¤', '#', '¨', '^', '%');
+ foreach ($seps as $sep) {
+ if (stristr($str, $sep)) {
+ continue;
+ }
+ break;
+ }
+
+ $str = preg_replace('|[' . preg_quote($separator, '-') . ']|', $sep, $str);
+ if (is_null($limit)) {
+ return explode($sep, $str);
+ } else {
+ return explode($sep, $str, $limit);
+ }
+ }
+
+ public static function countWords($str)
+ {
+ return count(preg_split('|\s|', $str));
+ }
+
+ public static function explodeNewLines($str)
+ {
+ $str = trim($str);
+ if ($str === '') {
+ return [];
+ }
+ $str = self::condenseNewLines($str);
+ return preg_split('|\v|', $str);
+ }
+
+ public static function substrWord($str, $words, $end = '', $wordsorig = null)
+ {
+ if (is_null($wordsorig)) {
+ $wordsorig = $words;
+ }
+
+ $maxchars = $wordsorig * 6;
+
+ $o = self::countWords($str);
+ if ($o <= $words) {
+ $res = $str;
+ $addend = false;
+ } else {
+ $e = self::multiExplode(" \n", $str, $words);
+ array_pop($e);
+ $res = implode(' ', $e);
+ $addend = true;
+ }
+ if (mb_strlen($res) > $maxchars) {
+ return self::substrWord($str, $words - 1, $end, $wordsorig);
+ }
+
+ if ($addend) {
+ $res .= $end;
+ }
+
+ return $res;
+ }
+
+ public static function substrWordChars($str, $chars, $end = '')
+ {
+ if (strlen($str) <= $chars) {
+ return $str;
+ }
+
+ $str = trim(substr($str, 0, $chars));
+ $s = preg_split('|\s+|', $str);
+ array_pop($s);
+ return implode(' ', $s) . $end;
+ }
+
+ public static function ucfirst($str, $lower = false)
+ {
+ if ($lower) {
+ $str = mb_strtolower($str);
+ }
+ $first = mb_substr($str, 0, 1);
+ $suite = mb_substr($str, 1);
+ return mb_strtoupper($first) . $suite;
+ }
+
+ public static function removeNl($str)
+ {
+ $trans = array("\n" => ' ', "\r" => ' ');
+ $str = strtr($str, $trans);
+ return self::condenseWhite($str);
+ }
+
+ public static function condenseWhite($str)
+ {
+ return preg_replace('|[\s]{2,100}|u', ' ', $str);
+ }
+
+ public static function condenseNewLines($str)
+ {
+ $str = self::normalizeLines($str);
+ $str = preg_replace('|\n{2,100}|', "\n", $str);
+ return $str;
+ }
+
+ public static function html2text($str)
+ {
+ $res = self::strip_tags($str);
+ $res = str_replace(' ', ' ', $res);
+
+ return $res;
+ }
+
+ public static function strip_tags($str, $allowed_tags = array(), $trim = false)
+ {
+ // return preg_replace('|\<.*\>|uU', '', $str);
+ // http://www.php.net/manual/fr/function.strip-tags.php#86463
+ if (!is_array($allowed_tags)) {
+ $allowed_tags = !empty($allowed_tags) ? array($allowed_tags) : array();
+ }
+ $tags = implode('|', $allowed_tags);
+
+ if (empty($tags)) {
+ $tags = '[a-z]+';
+ }
+
+ preg_match_all('@</?\s*(' . $tags . ')(\s+[a-z_]+=(\'[^\']+\'|"[^"]+"))*\s*/?>@i', $str, $matches);
+
+ $full_tags = $matches[0];
+ $tag_names = $matches[1];
+
+ foreach ($full_tags as $i => $full_tag) {
+ if (!in_array($tag_names[$i], $allowed_tags)) {
+ if ($trim) {
+ unset($full_tags[$i]);
+ } else {
+ $str = str_replace($full_tag, '', $str);
+ }
+ }
+ }
+
+ return $trim ? implode('', $full_tags) : $str;
+ }
+
+ public static function str2URL($str, $replace = '-', $exclude_slashs = false, $exclude_dots = false)
+ {
+ if (is_object($str)) {
+ $str = json_encode($str);
+ }
+ $str = str_replace('&', '&', $str);
+ $str = str_replace(':', ' ', $str);
+ if (!$exclude_slashs) {
+ $str = str_replace('/', ' ', $str);
+ }
+
+ $str = self::deaccent($str);
+ $str = preg_replace('/[^A-Za-z0-9_\s\'\:\/[\]-]/', '', $str);
+
+ return self::tidyURL($str, true);
+
+ }
+
+ public static function cleanUTF8($str, $replace = '?')
+ {
+ while (($bad_index = self::utf8badFind($str)) !== false) {
+ $str = substr_replace($str, $replace, $bad_index, 1);
+ }
+ $str = str_replace('\16', $replace, $str);
+ $str = str_replace('\18', $replace, $str);
+ return $str;
+ }
+
+ public static function getChar($code)
+ {
+ $code = trim($code, '&;');
+ return html_entity_decode('&' . $code . ';', ENT_QUOTES, 'UTF-8');
+ }
+
+ public static function randText($length = 300)
+ {
+ $str = 'aeiouy azertyuiopqsdfghjklmwxcvbn eaiouaeiou ';
+ $list = str_split($str);
+ $nb = strlen($str) - 1;
+ $res = '';
+ for ($i = 0; $i <= $length; $i++) {
+ $pos = rand(0, $nb);
+ $res .= $list[$pos];
+ }
+ return $res;
+ }
+
+ public static function splitWordsWithCase($str)
+ {
+ $non_word = '\x{0000}-\x{002F}\x{003A}-\x{0040}\x{005b}-\x{0060}\x{007B}-\x{007E}\x{00A0}-\x{00BF}\s';
+ if (preg_match_all('/([^' . $non_word . ']{3,})/msu', html::clean($str), $match)) {
+ foreach ($match[1] as $i => $v) {
+ $match[1][$i] = $v;
+ }
+ return $match[1];
+ }
+ return array();
+ }
+
+ public static function find_words_from_list($str, $list)
+ {
+ $words = array_unique(self::splitWordsWithCase($str));;
+ if (is_array($list)) {
+ $liste = $list;
+ } else {
+ $liste = array_unique(self::splitWords($list));
+ }
+
+ $l = array();
+ foreach ($words as $ll) {
+ $lll = self::removeAccents($ll);
+ $lll = strtolower($lll);
+ $liste_real[$lll][] = $ll;
+ $l[] = $lll;
+ }
+
+ $diff = array_intersect($liste, $l);
+ $res = array();
+ if ($diff) {
+ foreach ($diff as $d) {
+ $res = array_merge($res, $liste_real[$d]);
+ }
+ return $res;
+ }
+ return false;
+ }
+
+ public static function mb_str_split($string)
+ {
+ $stop = mb_strlen($string);
+ $result = array();
+
+ for ($idx = 0; $idx < $stop; $idx++) {
+ $result[] = mb_substr($string, $idx, 1);
+ }
+
+ return $result;
+ }
+
+ public static function strToArray($str)
+ {
+ return self::mb_str_split($str);
+ }
+
+ public static function utf8ToUnicode($str)
+ {
+ $mState = 0; // cached expected number of octets after the current octet
+ // until the beginning of the next UTF8 character sequence
+ $mUcs4 = 0; // cached Unicode character
+ $mBytes = 1; // cached expected number of octets in the current sequence
+
+ $out = array();
+
+ $len = strlen($str);
+ for ($i = 0; $i < $len; $i++) {
+ $in = ord($str{$i});
+ if (0 == $mState) {
+ // When mState is zero we expect either a US-ASCII character or a
+ // multi-octet sequence.
+ if (0 == (0x80 & ($in))) {
+ // US-ASCII, pass straight through.
+ $out[] = $in;
+ $mBytes = 1;
+ } else if (0xC0 == (0xE0 & ($in))) {
+ // First octet of 2 octet sequence
+ $mUcs4 = ($in);
+ $mUcs4 = ($mUcs4 & 0x1F) << 6;
+ $mState = 1;
+ $mBytes = 2;
+ } else if (0xE0 == (0xF0 & ($in))) {
+ // First octet of 3 octet sequence
+ $mUcs4 = ($in);
+ $mUcs4 = ($mUcs4 & 0x0F) << 12;
+ $mState = 2;
+ $mBytes = 3;
+ } else if (0xF0 == (0xF8 & ($in))) {
+ // First octet of 4 octet sequence
+ $mUcs4 = ($in);
+ $mUcs4 = ($mUcs4 & 0x07) << 18;
+ $mState = 3;
+ $mBytes = 4;
+ } else if (0xF8 == (0xFC & ($in))) {
+ /* First octet of 5 octet sequence.
+ *
+ * This is illegal because the encoded codepoint must be either
+ * (a) not the shortest form or
+ * (b) outside the Unicode range of 0-0x10FFFF.
+ * Rather than trying to resynchronize, we will carry on until the end
+ * of the sequence and let the later error handling code catch it.
+ */
+ $mUcs4 = ($in);
+ $mUcs4 = ($mUcs4 & 0x03) << 24;
+ $mState = 4;
+ $mBytes = 5;
+ } else if (0xFC == (0xFE & ($in))) {
+ // First octet of 6 octet sequence, see comments for 5 octet sequence.
+ $mUcs4 = ($in);
+ $mUcs4 = ($mUcs4 & 1) << 30;
+ $mState = 5;
+ $mBytes = 6;
+ } else {
+ /* Current octet is neither in the US-ASCII range nor a legal first
+ * octet of a multi-octet sequence.
+ */
+ return false;
+ }
+ } else {
+ // When mState is non-zero, we expect a continuation of the multi-octet
+ // sequence
+ if (0x80 == (0xC0 & ($in))) {
+ // Legal continuation.
+ $shift = ($mState - 1) * 6;
+ $tmp = $in;
+ $tmp = ($tmp & 0x0000003F) << $shift;
+ $mUcs4 |= $tmp;
+
+ if (0 == --$mState) {
+ /* End of the multi-octet sequence. mUcs4 now contains the final
+ * Unicode codepoint to be output
+ *
+ * Check for illegal sequences and codepoints.
+ */
+ // From Unicode 3.1, non-shortest form is illegal
+ if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
+ ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
+ ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
+ (4 < $mBytes) ||
+ // From Unicode 3.2, surrogate characters are illegal
+ (($mUcs4 & 0xFFFFF800) == 0xD800) ||
+ // Codepoints outside the Unicode range are illegal
+ ($mUcs4 > 0x10FFFF)
+ ) {
+ return false;
+ }
+ if (0xFEFF != $mUcs4) {
+ // BOM is legal but we don't want to output it
+ $out[] = $mUcs4;
+ }
+ // initialize UTF8 cache
+ $mState = 0;
+ $mUcs4 = 0;
+ $mBytes = 1;
+ }
+ } else {
+ /* ((0xC0 & (*in) != 0x80) && (mState != 0))
+ *
+ * Incomplete multi-octet sequence.
+ */
+ return false;
+ }
+ }
+ }
+ return $out;
+ }
+
+ /**
+ * Takes an array of ints representing the Unicode characters and returns
+ * a UTF-8 string. Astral planes are supported ie. the ints in the
+ * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
+ * are not allowed.
+ *
+ * Returns false if the input array contains ints that represent
+ * surrogates or are outside the Unicode range.
+ */
+ public static function unicodeToUtf8($arr)
+ {
+ $dest = '';
+ foreach ($arr as $src) {
+ if ($src < 0) {
+ return false;
+ } else if ($src <= 0x007f) {
+ $dest .= chr($src);
+ } else if ($src <= 0x07ff) {
+ $dest .= chr(0xc0 | ($src >> 6));
+ $dest .= chr(0x80 | ($src & 0x003f));
+ } else if ($src == 0xFEFF) {
+ // nop -- zap the BOM
+ } else if ($src >= 0xD800 && $src <= 0xDFFF) {
+ // found a surrogate
+ return false;
+ } else if ($src <= 0xffff) {
+ $dest .= chr(0xe0 | ($src >> 12));
+ $dest .= chr(0x80 | (($src >> 6) & 0x003f));
+ $dest .= chr(0x80 | ($src & 0x003f));
+ } else if ($src <= 0x10ffff) {
+ $dest .= chr(0xf0 | ($src >> 18));
+ $dest .= chr(0x80 | (($src >> 12) & 0x3f));
+ $dest .= chr(0x80 | (($src >> 6) & 0x3f));
+ $dest .= chr(0x80 | ($src & 0x3f));
+ } else {
+ // out of range
+ return false;
+ }
+ }
+ return $dest;
+ }
+
+ public static function uchr($n)
+ {
+ return self::unicodeToUtf8(array($n));
+ }
+
+ public static function uord($c)
+ {
+ $r = self::utf8ToUnicode($c);
+ return array_shift($r);
+ }
+
+ public static function strcmp($s1, $s2, $ignoreCase = false, $ignoreAccents = false, $trim = false)
+ {
+ if ($trim !== false) {
+ $s1 = trim($s1, $trim);
+ $s2 = trim($s2, $trim);
+ }
+ if ($ignoreAccents) {
+ $s1 = self::removeAccents($s1);
+ $s2 = self::removeAccents($s2);
+ }
+ if ($ignoreCase) {
+ $s1 = mb_strtolower($s1);
+ $s2 = mb_strtolower($s2);
+ }
+
+ return strcmp($s1, $s2);
+ }
+
+ public static function removeNewLines($input)
+ {
+ $res = preg_replace("|\s+|", ' ', $input);
+ return $res;
+ }
+
+ /**
+ *
+ * @param string $str
+ * @param boolean $compact
+ * @return array
+ */
+ public static function splitLines($str, $compact = true)
+ {
+ $str = str_replace("\r\n", "\n", $str);
+ $str = str_replace("\r", "\n", $str);
+ $str = explode("\n", $str);
+
+ if (!$compact) {
+ return $str;
+ }
+
+ $res = array();
+ foreach ($str as $s) {
+ $s = trim($s);
+ if ($s == '') {
+ continue;
+ }
+ $res[] = $s;
+ }
+ return $res;
+ }
+
+ public static function parseUrl($url, $forceScheme = true)
+ {
+ $url = trim($url);
+ if (substr($url, 0, 2) == '//') {
+ $url = 'http:' . $url;
+ }
+ $res = parse_url($url);
+ if ($forceScheme && !isset($res['scheme'])) {
+ $url = 'http://' . $url;
+ $res = parse_url($url);
+ }
+
+ if (isset($res['query'])) {
+ parse_str($res['query'], $tmp);
+ $res['query_params'] = $tmp;
+ }
+
+ if (isset($res['path'])) {
+ $components = explode('/', trim($res['path'], '/'));
+ $filteredComponents = array();
+ foreach ($components as $c) {
+ if ($c == '') {
+ continue;
+ }
+ $filteredComponents[] = $c;
+ }
+ $res['path_components'] = $filteredComponents;
+ }
+ return $res;
+ }
+
+ public static function pluriel($nb, $singulier, $pluriel, $zero = false, $displayNb = true)
+ {
+ $nb = intval($nb);
+ $res = '';
+ if ($displayNb) {
+ $res .= $nb . ' ';
+ }
+ if ($nb == 0 && $zero) {
+ return $zero;
+ }
+ if ($nb <= 1) {
+ $res .= $singulier;
+ } else {
+ $res .= $pluriel;
+ }
+ return $res;
+ }
+
+ public static function normalizeLines($text, $os = 'nix')
+ {
+ $text = str_replace("\r\n", "\n", $text);
+ $text = str_replace("\r", "\n", $text);
+ if ($os == 'win') {
+ return str_replace("\n", "\r\n", $text);
+ }
+ return $text;
+ }
+
+ public static function underscoreToCamelCase($str, $upperFirst = false)
+ {
+ $inflector = new Zend_Filter_Inflector(':string');
+ $inflector->addRules(array(':string' => array('Word_UnderscoreToCamelCase')));
+ $str = $inflector->filter(array('string' => $str));
+ if (!$upperFirst) {
+ $str{0} = mb_strtolower($str{0});
+ }
+ return $str;
+ }
+
+ public static function camelCaseToUnderscore($str)
+ {
+
+ preg_match_all('!([A-Z][A-Z0-9]*(?=$|[A-Z][a-z0-9])|[A-Za-z][a-z0-9]+)!', $str, $matches);
+ $ret = $matches[0];
+ foreach ($ret as &$match) {
+ $match = $match == strtoupper($match) ? strtolower($match) : lcfirst($match);
+ }
+ return implode('_', $ret);
+ }
+
+ // Stops orphans in HTML by replacing the last space with a
+ public static function preventOrphans($str)
+ {
+
+ $find = ' '; // What to search for
+ $replace = ' '; // What to replace it with
+
+ $last_space = strrpos($str, $find); // Find last occurrence in string
+
+ if ($last_space !== false) {
+ $str = substr_replace($str, $replace, $last_space, strlen($find));
+ }
+
+ // Also replace punctuation that has spaces before it (eg. in French)
+ $punctuations = array(' :', ' !', ' ?', '« ', ' »');
+ $replacements = array("{$replace}:", "{$replace}!", "{$replace}?", "«{$replace}", "{$replace}»");
+ $str = str_replace($punctuations, $replacements, $str);
+
+ return $str;
+ }
+
+ /**
+ * Check email address
+ *
+ * Returns true if $email is a valid email address.
+ *
+ * @copyright Cal Henderson
+ * @license http://creativecommons.org/licenses/by-sa/2.5/ CC-BY-SA
+ * @link http://www.iamcal.com/publish/articles/php/parsing_email/
+ *
+ * @param string $email Email string
+ * @return boolean
+ */
+ public static function isEmail($email)
+ {
+ $qtext = '[^\\x0d\\x22\\x5c\\x80-\\xff]';
+ $dtext = '[^\\x0d\\x5b-\\x5d\\x80-\\xff]';
+ $atom = '[^\\x00-\\x20\\x22\\x28\\x29\\x2c\\x2e\\x3a-\\x3c\\x3e\\x40\\x5b-\\x5d\\x7f-\\xff]+';
+ $quoted_pair = '\\x5c[\\x00-\\x7f]';
+ $domain_literal = "\\x5b($dtext|$quoted_pair)*\\x5d";
+ $quoted_string = "\\x22($qtext|$quoted_pair)*\\x22";
+ $domain_ref = $atom;
+ $sub_domain = "($domain_ref|$domain_literal)";
+ $word = "($atom|$quoted_string)";
+ $domain = "$sub_domain(\\x2e$sub_domain)*";
+ $local_part = "$word(\\x2e$word)*";
+ $addr_spec = "$local_part\\x40$domain";
+
+ return (boolean)preg_match("!^$addr_spec$!", $email);
+ }
+
+ /**
+ * Accents replacement
+ *
+ * Replaces some occidental accentuated characters by their ASCII
+ * representation.
+ *
+ * @param string $str String to deaccent
+ * @return string
+ */
+ public static function deaccent($str)
+ {
+ $pattern['A'] = '\x{00C0}-\x{00C5}';
+ $pattern['AE'] = '\x{00C6}';
+ $pattern['C'] = '\x{00C7}';
+ $pattern['D'] = '\x{00D0}';
+ $pattern['E'] = '\x{00C8}-\x{00CB}';
+ $pattern['I'] = '\x{00CC}-\x{00CF}';
+ $pattern['N'] = '\x{00D1}';
+ $pattern['O'] = '\x{00D2}-\x{00D6}\x{00D8}';
+ $pattern['OE'] = '\x{0152}';
+ $pattern['S'] = '\x{0160}';
+ $pattern['U'] = '\x{00D9}-\x{00DC}';
+ $pattern['Y'] = '\x{00DD}';
+ $pattern['Z'] = '\x{017D}';
+
+ $pattern['a'] = '\x{00E0}-\x{00E5}';
+ $pattern['ae'] = '\x{00E6}';
+ $pattern['c'] = '\x{00E7}';
+ $pattern['d'] = '\x{00F0}';
+ $pattern['e'] = '\x{00E8}-\x{00EB}';
+ $pattern['i'] = '\x{00EC}-\x{00EF}';
+ $pattern['n'] = '\x{00F1}';
+ $pattern['o'] = '\x{00F2}-\x{00F6}\x{00F8}';
+ $pattern['oe'] = '\x{0153}';
+ $pattern['s'] = '\x{0161}';
+ $pattern['u'] = '\x{00F9}-\x{00FC}';
+ $pattern['y'] = '\x{00FD}\x{00FF}';
+ $pattern['z'] = '\x{017E}';
+
+ $pattern['ss'] = '\x{00DF}';
+
+ foreach ($pattern as $r => $p) {
+ $str = preg_replace('/[' . $p . ']/u', $r, $str);
+ }
+
+ return $str;
+ }
+
+ /**
+ * URL cleanup
+ *
+ * @param string $str URL to tidy
+ * @param boolean $keep_slashes Keep slashes in URL
+ * @param boolean $keep_spaces Keep spaces in URL
+ * @return string
+ */
+ public static function tidyURL($str, $keep_slashes = true, $keep_spaces = false)
+ {
+ $str = strip_tags($str);
+ $str = str_replace(array('?', '&', '#', '=', '+', '<', '>', '"', '%'), '', $str);
+ $str = str_replace("'", ' ', $str);
+ $str = preg_replace('/[\s]+/u', ' ', trim($str));
+
+ if (!$keep_slashes) {
+ $str = str_replace('/', '-', $str);
+ }
+
+ if (!$keep_spaces) {
+ $str = str_replace(' ', '-', $str);
+ }
+
+ $str = preg_replace('/[-]+/', '-', $str);
+
+ # Remove path changes in URL
+ $str = preg_replace('%^/%', '', $str);
+ $str = preg_replace('%\.+/%', '', $str);
+
+ return $str;
+ }
+
+ /**
+ * Cut string
+ *
+ * Returns a cuted string on spaced at given length $l.
+ *
+ * @param string $str String to cut
+ * @param integer $l Length to keep
+ * @return string
+ */
+ public static function cutString($str, $l)
+ {
+ $s = preg_split('/([\s]+)/u', $str, -1, PREG_SPLIT_DELIM_CAPTURE);
+
+ $res = '';
+ $L = 0;
+
+ if (mb_strlen($s[0]) >= $l) {
+ return mb_substr($s[0], 0, $l);
+ }
+
+ foreach ($s as $v) {
+ $L = $L + mb_strlen($v);
+
+ if ($L > $l) {
+ break;
+ } else {
+ $res .= $v;
+ }
+ }
+
+ return trim($res);
+ }
+
+ /**
+ * Split words
+ *
+ * Returns an array of words from a given string.
+ *
+ * @param string $str Words to split
+ * @return array
+ */
+ public static function splitWords($str, $minChar = 3)
+ {
+ $non_word = '\x{0000}-\x{002F}\x{003A}-\x{0040}\x{005b}-\x{0060}\x{007B}-\x{007E}\x{00A0}-\x{00BF}\s';
+ if (preg_match_all('/([^' . $non_word . ']{' . $minChar . ',})/msu', html::clean($str), $match)) {
+ foreach ($match[1] as $i => $v) {
+ $match[1][$i] = mb_strtolower($v);
+ }
+ return $match[1];
+ }
+ return array();
+ }
+
+ /**
+ * Encoding detection
+ *
+ * Returns the encoding (in lowercase) of given $str.
+ *
+ * @param string $str String
+ * @return string
+ */
+ public static function detectEncoding($str)
+ {
+ return strtolower(mb_detect_encoding($str . ' ',
+ 'UTF-8,ISO-8859-1,ISO-8859-2,ISO-8859-3,' .
+ 'ISO-8859-4,ISO-8859-5,ISO-8859-6,ISO-8859-7,ISO-8859-8,' .
+ 'ISO-8859-9,ISO-8859-10,ISO-8859-13,ISO-8859-14,ISO-8859-15'));
+ }
+
+ /**
+ * Find bad UTF8 tokens
+ *
+ * Locates the first bad byte in a UTF-8 string returning it's
+ * byte index in the string
+ * PCRE Pattern to locate bad bytes in a UTF-8 string
+ * Comes from W3 FAQ: Multilingual Forms
+ * Note: modified to include full ASCII range including control chars
+ *
+ * @copyright Harry Fuecks
+ * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html GNU LGPL 2.1
+ * @link http://phputf8.sourceforge.net
+ *
+ * @param string $str String to search
+ * @return integer|false
+ */
+ public static function utf8badFind($str)
+ {
+ $UTF8_BAD =
+ '([\x00-\x7F]' . # ASCII (including control chars)
+ '|[\xC2-\xDF][\x80-\xBF]' . # non-overlong 2-byte
+ '|\xE0[\xA0-\xBF][\x80-\xBF]' . # excluding overlongs
+ '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' . # straight 3-byte
+ '|\xED[\x80-\x9F][\x80-\xBF]' . # excluding surrogates
+ '|\xF0[\x90-\xBF][\x80-\xBF]{2}' . # planes 1-3
+ '|[\xF1-\xF3][\x80-\xBF]{3}' . # planes 4-15
+ '|\xF4[\x80-\x8F][\x80-\xBF]{2}' . # plane 16
+ '|(.{1}))'; # invalid byte
+ $pos = 0;
+ $badList = array();
+
+ while (preg_match('/' . $UTF8_BAD . '/S', $str, $matches)) {
+ $bytes = strlen($matches[0]);
+ if (isset($matches[2])) {
+ return $pos;
+ }
+ $pos += $bytes;
+ $str = substr($str, $bytes);
+ }
+ return false;
+ }
+
+
+ /**
+ * BOM removal
+ *
+ * Removes BOM from the begining of a string if present.
+ *
+ * @param string $str String to clean
+ * @return string
+ */
+ public static function removeBOM($str)
+ {
+ if (substr_count($str, '')) {
+ return str_replace('', '', $str);
+ }
+
+ return $str;
+ }
+
+ /**
+ * Quoted printable conversion
+ *
+ * Encodes given str to quoted printable
+ *
+ * @param string $str String to encode
+ * @return string
+ */
+ public static function QPEncode($str)
+ {
+ $res = '';
+
+ foreach (preg_split("/\r?\n/msu", $str) as $line) {
+ $l = '';
+ preg_match_all('/./', $line, $m);
+
+ foreach ($m[0] as $c) {
+ $a = ord($c);
+
+ if ($a < 32 || $a == 61 || $a > 126) {
+ $c = sprintf('=%02X', $a);
+ }
+
+ $l .= $c;
+ }
+
+ $res .= $l . "\r\n";
+ }
+ return $res;
+ }
+
+}
\ No newline at end of file