<?php
namespace Cubist\Util;
+
+use Illuminate\Support\Str;
+
class Text
{
- public static function utf8_encode($text, $from = 'ISO-8859-1')
- {
- return self::toUTF8($text, $from);
- }
-
- public static function toUTF8($str, $encoding = null)
- {
- if (!$encoding) {
- $encoding = self::detectEncoding($str);
- }
-
- $str = iconv($encoding, 'UTF-8//TRANSLIT', $str);
- return self::removeOddStuff($str);
- }
-
- public static function removeOddStuff($str)
- {
- $pattern = array();
- $pattern["'"] = '\x{0092}\x{00b4}\x{0060}\x{2018}\x{2019}';
- $pattern['oe'] = '\x{009c}';
- $pattern['...'] = '\x{0085}';
- $pattern['Oe'] = '\x{008c}';
- $pattern[' '] = '\x{0096}';
- $pattern['«'] = '\x{0093}';
- $pattern['»'] = '\x{0094}';
-
- foreach ($pattern as $r => $p) {
- $str = preg_replace('|[' . $p . ']|u', $r, $str);
- }
- return $str;
- }
-
- public static function getAccentsPattern()
- {
- $pattern = array();
- $pattern['A'] = '\x{00C0}-\x{00C5}';
- $pattern['AE'] = '\x{00C6}';
- $pattern['C'] = '\x{00C7}';
- $pattern['D'] = '\x{00D0}';
- $pattern['E'] = '\x{00C8}-\x{00CB}';
- $pattern['I'] = '\x{00CC}-\x{00CF}';
- $pattern['N'] = '\x{00D1}';
- $pattern['O'] = '\x{00D2}-\x{00D6}\x{00D8}';
- $pattern['OE'] = '\x{0152}';
- $pattern['S'] = '\x{0160}';
- $pattern['U'] = '\x{00D9}-\x{00DC}';
- $pattern['Y'] = '\x{00DD}';
- $pattern['Z'] = '\x{017D}';
-
- $pattern['a'] = '\x{00E0}-\x{00E5}';
- $pattern['ae'] = '\x{00E6}';
- $pattern['c'] = '\x{00E7}';
- $pattern['d'] = '\x{00F0}';
- $pattern['e'] = '\x{00E8}-\x{00EB}';
- $pattern['i'] = '\x{00EC}-\x{00EF}';
- $pattern['n'] = '\x{00F1}';
- $pattern['o'] = '\x{00F2}-\x{00F6}\x{00F8}';
- $pattern['oe'] = '\x{0153}';
- $pattern['s'] = '\x{0161}';
- $pattern['u'] = '\x{00F9}-\x{00FC}';
- $pattern['y'] = '\x{00FD}\x{00FF}';
- $pattern['z'] = '\x{017E}';
-
- $pattern['ss'] = '\x{00DF}';
- return $pattern;
- }
-
- public static function removeAccents($str, $clean = true)
- {
- $pattern = self::getAccentsPattern();
- if ($clean) {
- $str = self::cleanUTF8($str);
- $del = array('’' => ' ', '”' => ' ', '“' => ' ', '•' => ' ', '…' => ' ', '€' => ' ',
- '–' => ' ', '‘' => ' ');
- foreach ($del as $d => $p) {
- $str = str_replace($d, $p, $str);
- }
- }
- foreach ($pattern as $r => $p) {
- $str = preg_replace('/[' . $p . ']/u', $r, $str);
- }
-
- $from = 'o';
- $to = 'o';
-
- $str = strtr($str, $from, $to);
-
- return $str;
- }
-
- public static function keepOnlyLettersAndDigits($str)
- {
- return self::condenseWhite(preg_replace('|[^0-9A-Za-z]|ui', ' ', self::removeAccents($str)));
- }
-
- public static function makeAccentInsensiblePattern($str)
- {
- $patterns = self::getAccentsPattern();
- $chars = preg_split('//ui', $str, -1, PREG_SPLIT_NO_EMPTY);
- $pattern = '|';
- foreach ($chars as $char) {
- if (isset($patterns[$char])) {
- $pattern .= '[';
- $pattern .= $char;
- $pattern .= $patterns[$char];
- $pattern .= ']{1}';
- } else {
- $pattern .= $char;
- }
- }
- $pattern .= '|iu';
- return $pattern;
- }
-
- public static function preg_areplace($search, $replace, $subject)
- {
- $pattern = self::makeAccentInsensiblePattern($search);
- return preg_replace($pattern, $replace, $subject);
- }
-
- public static function multiExplode($separator, $str, $limit = null)
- {
- $seps = array('§', '£', '¤', '#', '¨', '^', '%');
- foreach ($seps as $sep) {
- if (stristr($str, $sep)) {
- continue;
- }
- break;
- }
-
- $str = preg_replace('|[' . preg_quote($separator, '-') . ']|', $sep, $str);
- if (is_null($limit)) {
- return explode($sep, $str);
- } else {
- return explode($sep, $str, $limit);
- }
- }
-
- public static function countWords($str)
- {
- return count(preg_split('|\s|', $str));
- }
-
- public static function explodeNewLines($str)
- {
- $str = trim($str);
- if ($str === '') {
- return [];
- }
- $str = self::condenseNewLines($str);
- return preg_split('|\v|', $str);
- }
-
- public static function substrWord($str, $words, $end = '', $wordsorig = null)
- {
- if (is_null($wordsorig)) {
- $wordsorig = $words;
- }
-
- $maxchars = $wordsorig * 6;
-
- $o = self::countWords($str);
- if ($o <= $words) {
- $res = $str;
- $addend = false;
- } else {
- $e = self::multiExplode(" \n", $str, $words);
- array_pop($e);
- $res = implode(' ', $e);
- $addend = true;
- }
- if (mb_strlen($res) > $maxchars) {
- return self::substrWord($str, $words - 1, $end, $wordsorig);
- }
-
- if ($addend) {
- $res .= $end;
- }
-
- return $res;
- }
-
- public static function substrWordChars($str, $chars, $end = '')
- {
- if (strlen($str) <= $chars) {
- return $str;
- }
-
- $str = trim(substr($str, 0, $chars));
- $s = preg_split('|\s+|', $str);
- array_pop($s);
- return implode(' ', $s) . $end;
- }
-
- public static function ucfirst($str, $lower = false)
- {
- if ($lower) {
- $str = mb_strtolower($str);
- }
- $first = mb_substr($str, 0, 1);
- $suite = mb_substr($str, 1);
- return mb_strtoupper($first) . $suite;
- }
-
- public static function removeNl($str)
- {
- $trans = array("\n" => ' ', "\r" => ' ');
- $str = strtr($str, $trans);
- return self::condenseWhite($str);
- }
-
- public static function condenseWhite($str)
- {
- return preg_replace('|[\s]{2,100}|u', ' ', $str);
- }
-
- public static function condenseNewLines($str)
- {
- $str = self::normalizeLines($str);
- $str = preg_replace('|\n{2,100}|', "\n", $str);
- return $str;
- }
-
- public static function html2text($str)
- {
- $res = self::strip_tags($str);
- $res = str_replace(' ', ' ', $res);
-
- return $res;
- }
-
- public static function strip_tags($str, $allowed_tags = array(), $trim = false)
- {
- // return preg_replace('|\<.*\>|uU', '', $str);
- // http://www.php.net/manual/fr/function.strip-tags.php#86463
- if (!is_array($allowed_tags)) {
- $allowed_tags = !empty($allowed_tags) ? array($allowed_tags) : array();
- }
- $tags = implode('|', $allowed_tags);
-
- if (empty($tags)) {
- $tags = '[a-z]+';
- }
-
- preg_match_all('@</?\s*(' . $tags . ')(\s+[a-z_]+=(\'[^\']+\'|"[^"]+"))*\s*/?>@i', $str, $matches);
-
- $full_tags = $matches[0];
- $tag_names = $matches[1];
-
- foreach ($full_tags as $i => $full_tag) {
- if (!in_array($tag_names[$i], $allowed_tags)) {
- if ($trim) {
- unset($full_tags[$i]);
- } else {
- $str = str_replace($full_tag, '', $str);
- }
- }
- }
-
- return $trim ? implode('', $full_tags) : $str;
- }
-
- public static function str2URL($str, $replace = '-', $exclude_slashs = false, $exclude_dots = false)
- {
- if (is_object($str)) {
- $str = json_encode($str);
- }
- $str = str_replace('&', '&', $str);
- $str = str_replace(':', ' ', $str);
- if (!$exclude_slashs) {
- $str = str_replace('/', ' ', $str);
- }
-
- $str = self::deaccent($str);
- $str = preg_replace('/[^A-Za-z0-9_\s\'\:\/[\]-]/', '', $str);
-
- return self::tidyURL($str, true);
-
- }
-
- public static function cleanUTF8($str, $replace = '?')
- {
- while (($bad_index = self::utf8badFind($str)) !== false) {
- $str = substr_replace($str, $replace, $bad_index, 1);
- }
- $str = str_replace('\16', $replace, $str);
- $str = str_replace('\18', $replace, $str);
- return $str;
- }
-
- public static function getChar($code)
- {
- $code = trim($code, '&;');
- return html_entity_decode('&' . $code . ';', ENT_QUOTES, 'UTF-8');
- }
-
- public static function randText($length = 300)
- {
- $str = 'aeiouy azertyuiopqsdfghjklmwxcvbn eaiouaeiou ';
- $list = str_split($str);
- $nb = strlen($str) - 1;
- $res = '';
- for ($i = 0; $i <= $length; $i++) {
- $pos = rand(0, $nb);
- $res .= $list[$pos];
- }
- return $res;
- }
-
- public static function splitWordsWithCase($str)
- {
- $non_word = '\x{0000}-\x{002F}\x{003A}-\x{0040}\x{005b}-\x{0060}\x{007B}-\x{007E}\x{00A0}-\x{00BF}\s';
- if (preg_match_all('/([^' . $non_word . ']{3,})/msu', html::clean($str), $match)) {
- foreach ($match[1] as $i => $v) {
- $match[1][$i] = $v;
- }
- return $match[1];
- }
- return array();
- }
-
- public static function find_words_from_list($str, $list)
- {
- $words = array_unique(self::splitWordsWithCase($str));;
- if (is_array($list)) {
- $liste = $list;
- } else {
- $liste = array_unique(self::splitWords($list));
- }
-
- $l = array();
- foreach ($words as $ll) {
- $lll = self::removeAccents($ll);
- $lll = strtolower($lll);
- $liste_real[$lll][] = $ll;
- $l[] = $lll;
- }
-
- $diff = array_intersect($liste, $l);
- $res = array();
- if ($diff) {
- foreach ($diff as $d) {
- $res = array_merge($res, $liste_real[$d]);
- }
- return $res;
- }
- return false;
- }
-
- public static function mb_str_split($string)
- {
- $stop = mb_strlen($string);
- $result = array();
-
- for ($idx = 0; $idx < $stop; $idx++) {
- $result[] = mb_substr($string, $idx, 1);
- }
-
- return $result;
- }
-
- public static function strToArray($str)
- {
- return self::mb_str_split($str);
- }
-
- public static function utf8ToUnicode($str)
- {
- $mState = 0; // cached expected number of octets after the current octet
- // until the beginning of the next UTF8 character sequence
- $mUcs4 = 0; // cached Unicode character
- $mBytes = 1; // cached expected number of octets in the current sequence
-
- $out = array();
-
- $len = strlen($str);
- for ($i = 0; $i < $len; $i++) {
- $in = ord($str{$i});
- if (0 == $mState) {
- // When mState is zero we expect either a US-ASCII character or a
- // multi-octet sequence.
- if (0 == (0x80 & ($in))) {
- // US-ASCII, pass straight through.
- $out[] = $in;
- $mBytes = 1;
- } else if (0xC0 == (0xE0 & ($in))) {
- // First octet of 2 octet sequence
- $mUcs4 = ($in);
- $mUcs4 = ($mUcs4 & 0x1F) << 6;
- $mState = 1;
- $mBytes = 2;
- } else if (0xE0 == (0xF0 & ($in))) {
- // First octet of 3 octet sequence
- $mUcs4 = ($in);
- $mUcs4 = ($mUcs4 & 0x0F) << 12;
- $mState = 2;
- $mBytes = 3;
- } else if (0xF0 == (0xF8 & ($in))) {
- // First octet of 4 octet sequence
- $mUcs4 = ($in);
- $mUcs4 = ($mUcs4 & 0x07) << 18;
- $mState = 3;
- $mBytes = 4;
- } else if (0xF8 == (0xFC & ($in))) {
- /* First octet of 5 octet sequence.
- *
- * This is illegal because the encoded codepoint must be either
- * (a) not the shortest form or
- * (b) outside the Unicode range of 0-0x10FFFF.
- * Rather than trying to resynchronize, we will carry on until the end
- * of the sequence and let the later error handling code catch it.
- */
- $mUcs4 = ($in);
- $mUcs4 = ($mUcs4 & 0x03) << 24;
- $mState = 4;
- $mBytes = 5;
- } else if (0xFC == (0xFE & ($in))) {
- // First octet of 6 octet sequence, see comments for 5 octet sequence.
- $mUcs4 = ($in);
- $mUcs4 = ($mUcs4 & 1) << 30;
- $mState = 5;
- $mBytes = 6;
- } else {
- /* Current octet is neither in the US-ASCII range nor a legal first
- * octet of a multi-octet sequence.
- */
- return false;
- }
- } else {
- // When mState is non-zero, we expect a continuation of the multi-octet
- // sequence
- if (0x80 == (0xC0 & ($in))) {
- // Legal continuation.
- $shift = ($mState - 1) * 6;
- $tmp = $in;
- $tmp = ($tmp & 0x0000003F) << $shift;
- $mUcs4 |= $tmp;
-
- if (0 == --$mState) {
- /* End of the multi-octet sequence. mUcs4 now contains the final
- * Unicode codepoint to be output
- *
- * Check for illegal sequences and codepoints.
- */
- // From Unicode 3.1, non-shortest form is illegal
- if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
- ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
- ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
- (4 < $mBytes) ||
- // From Unicode 3.2, surrogate characters are illegal
- (($mUcs4 & 0xFFFFF800) == 0xD800) ||
- // Codepoints outside the Unicode range are illegal
- ($mUcs4 > 0x10FFFF)
- ) {
- return false;
- }
- if (0xFEFF != $mUcs4) {
- // BOM is legal but we don't want to output it
- $out[] = $mUcs4;
- }
- // initialize UTF8 cache
- $mState = 0;
- $mUcs4 = 0;
- $mBytes = 1;
- }
- } else {
- /* ((0xC0 & (*in) != 0x80) && (mState != 0))
- *
- * Incomplete multi-octet sequence.
- */
- return false;
- }
- }
- }
- return $out;
- }
-
- /**
- * Takes an array of ints representing the Unicode characters and returns
- * a UTF-8 string. Astral planes are supported ie. the ints in the
- * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
- * are not allowed.
- *
- * Returns false if the input array contains ints that represent
- * surrogates or are outside the Unicode range.
- */
- public static function unicodeToUtf8($arr)
- {
- $dest = '';
- foreach ($arr as $src) {
- if ($src < 0) {
- return false;
- } else if ($src <= 0x007f) {
- $dest .= chr($src);
- } else if ($src <= 0x07ff) {
- $dest .= chr(0xc0 | ($src >> 6));
- $dest .= chr(0x80 | ($src & 0x003f));
- } else if ($src == 0xFEFF) {
- // nop -- zap the BOM
- } else if ($src >= 0xD800 && $src <= 0xDFFF) {
- // found a surrogate
- return false;
- } else if ($src <= 0xffff) {
- $dest .= chr(0xe0 | ($src >> 12));
- $dest .= chr(0x80 | (($src >> 6) & 0x003f));
- $dest .= chr(0x80 | ($src & 0x003f));
- } else if ($src <= 0x10ffff) {
- $dest .= chr(0xf0 | ($src >> 18));
- $dest .= chr(0x80 | (($src >> 12) & 0x3f));
- $dest .= chr(0x80 | (($src >> 6) & 0x3f));
- $dest .= chr(0x80 | ($src & 0x3f));
- } else {
- // out of range
- return false;
- }
- }
- return $dest;
- }
-
- public static function uchr($n)
- {
- return self::unicodeToUtf8(array($n));
- }
-
- public static function uord($c)
- {
- $r = self::utf8ToUnicode($c);
- return array_shift($r);
- }
-
- public static function strcmp($s1, $s2, $ignoreCase = false, $ignoreAccents = false, $trim = false)
- {
- if ($trim !== false) {
- $s1 = trim($s1, $trim);
- $s2 = trim($s2, $trim);
- }
- if ($ignoreAccents) {
- $s1 = self::removeAccents($s1);
- $s2 = self::removeAccents($s2);
- }
- if ($ignoreCase) {
- $s1 = mb_strtolower($s1);
- $s2 = mb_strtolower($s2);
- }
-
- return strcmp($s1, $s2);
- }
-
- public static function removeNewLines($input)
- {
- $res = preg_replace("|\s+|", ' ', $input);
- return $res;
- }
-
- /**
- *
- * @param string $str
- * @param boolean $compact
- * @return array
- */
- public static function splitLines($str, $compact = true)
- {
- $str = str_replace("\r\n", "\n", $str);
- $str = str_replace("\r", "\n", $str);
- $str = explode("\n", $str);
-
- if (!$compact) {
- return $str;
- }
-
- $res = array();
- foreach ($str as $s) {
- $s = trim($s);
- if ($s == '') {
- continue;
- }
- $res[] = $s;
- }
- return $res;
- }
-
- public static function parseUrl($url, $forceScheme = true)
- {
- $url = trim($url);
- if (substr($url, 0, 2) == '//') {
- $url = 'http:' . $url;
- }
- $res = parse_url($url);
- if ($forceScheme && !isset($res['scheme'])) {
- $url = 'http://' . $url;
- $res = parse_url($url);
- }
-
- if (isset($res['query'])) {
- parse_str($res['query'], $tmp);
- $res['query_params'] = $tmp;
- }
-
- if (isset($res['path'])) {
- $components = explode('/', trim($res['path'], '/'));
- $filteredComponents = array();
- foreach ($components as $c) {
- if ($c == '') {
- continue;
- }
- $filteredComponents[] = $c;
- }
- $res['path_components'] = $filteredComponents;
- }
- return $res;
- }
-
- public static function pluriel($nb, $singulier, $pluriel, $zero = false, $displayNb = true)
- {
- $nb = intval($nb);
- $res = '';
- if ($displayNb) {
- $res .= $nb . ' ';
- }
- if ($nb == 0 && $zero) {
- return $zero;
- }
- if ($nb <= 1) {
- $res .= $singulier;
- } else {
- $res .= $pluriel;
- }
- return $res;
- }
-
- public static function normalizeLines($text, $os = 'nix')
- {
- $text = str_replace("\r\n", "\n", $text);
- $text = str_replace("\r", "\n", $text);
- if ($os == 'win') {
- return str_replace("\n", "\r\n", $text);
- }
- return $text;
- }
-
- public static function underscoreToCamelCase($str, $upperFirst = false)
- {
- $inflector = new Zend_Filter_Inflector(':string');
- $inflector->addRules(array(':string' => array('Word_UnderscoreToCamelCase')));
- $str = $inflector->filter(array('string' => $str));
- if (!$upperFirst) {
- $str{0} = mb_strtolower($str{0});
- }
- return $str;
- }
-
- public static function camelCaseToUnderscore($str)
- {
-
- preg_match_all('!([A-Z][A-Z0-9]*(?=$|[A-Z][a-z0-9])|[A-Za-z][a-z0-9]+)!', $str, $matches);
- $ret = $matches[0];
- foreach ($ret as &$match) {
- $match = $match == strtoupper($match) ? strtolower($match) : lcfirst($match);
- }
- return implode('_', $ret);
- }
-
- // Stops orphans in HTML by replacing the last space with a
- public static function preventOrphans($str)
- {
-
- $find = ' '; // What to search for
- $replace = ' '; // What to replace it with
-
- $last_space = strrpos($str, $find); // Find last occurrence in string
-
- if ($last_space !== false) {
- $str = substr_replace($str, $replace, $last_space, strlen($find));
- }
-
- // Also replace punctuation that has spaces before it (eg. in French)
- $punctuations = array(' :', ' !', ' ?', '« ', ' »');
- $replacements = array("{$replace}:", "{$replace}!", "{$replace}?", "«{$replace}", "{$replace}»");
- $str = str_replace($punctuations, $replacements, $str);
-
- return $str;
- }
-
- /**
- * Check email address
- *
- * Returns true if $email is a valid email address.
- *
- * @copyright Cal Henderson
- * @license http://creativecommons.org/licenses/by-sa/2.5/ CC-BY-SA
- * @link http://www.iamcal.com/publish/articles/php/parsing_email/
- *
- * @param string $email Email string
- * @return boolean
- */
- public static function isEmail($email)
- {
- $qtext = '[^\\x0d\\x22\\x5c\\x80-\\xff]';
- $dtext = '[^\\x0d\\x5b-\\x5d\\x80-\\xff]';
- $atom = '[^\\x00-\\x20\\x22\\x28\\x29\\x2c\\x2e\\x3a-\\x3c\\x3e\\x40\\x5b-\\x5d\\x7f-\\xff]+';
- $quoted_pair = '\\x5c[\\x00-\\x7f]';
- $domain_literal = "\\x5b($dtext|$quoted_pair)*\\x5d";
- $quoted_string = "\\x22($qtext|$quoted_pair)*\\x22";
- $domain_ref = $atom;
- $sub_domain = "($domain_ref|$domain_literal)";
- $word = "($atom|$quoted_string)";
- $domain = "$sub_domain(\\x2e$sub_domain)*";
- $local_part = "$word(\\x2e$word)*";
- $addr_spec = "$local_part\\x40$domain";
-
- return (boolean)preg_match("!^$addr_spec$!", $email);
- }
-
- /**
- * Accents replacement
- *
- * Replaces some occidental accentuated characters by their ASCII
- * representation.
- *
- * @param string $str String to deaccent
- * @return string
- */
- public static function deaccent($str)
- {
- $pattern['A'] = '\x{00C0}-\x{00C5}';
- $pattern['AE'] = '\x{00C6}';
- $pattern['C'] = '\x{00C7}';
- $pattern['D'] = '\x{00D0}';
- $pattern['E'] = '\x{00C8}-\x{00CB}';
- $pattern['I'] = '\x{00CC}-\x{00CF}';
- $pattern['N'] = '\x{00D1}';
- $pattern['O'] = '\x{00D2}-\x{00D6}\x{00D8}';
- $pattern['OE'] = '\x{0152}';
- $pattern['S'] = '\x{0160}';
- $pattern['U'] = '\x{00D9}-\x{00DC}';
- $pattern['Y'] = '\x{00DD}';
- $pattern['Z'] = '\x{017D}';
-
- $pattern['a'] = '\x{00E0}-\x{00E5}';
- $pattern['ae'] = '\x{00E6}';
- $pattern['c'] = '\x{00E7}';
- $pattern['d'] = '\x{00F0}';
- $pattern['e'] = '\x{00E8}-\x{00EB}';
- $pattern['i'] = '\x{00EC}-\x{00EF}';
- $pattern['n'] = '\x{00F1}';
- $pattern['o'] = '\x{00F2}-\x{00F6}\x{00F8}';
- $pattern['oe'] = '\x{0153}';
- $pattern['s'] = '\x{0161}';
- $pattern['u'] = '\x{00F9}-\x{00FC}';
- $pattern['y'] = '\x{00FD}\x{00FF}';
- $pattern['z'] = '\x{017E}';
-
- $pattern['ss'] = '\x{00DF}';
-
- foreach ($pattern as $r => $p) {
- $str = preg_replace('/[' . $p . ']/u', $r, $str);
- }
-
- return $str;
- }
-
- /**
- * URL cleanup
- *
- * @param string $str URL to tidy
- * @param boolean $keep_slashes Keep slashes in URL
- * @param boolean $keep_spaces Keep spaces in URL
- * @return string
- */
- public static function tidyURL($str, $keep_slashes = true, $keep_spaces = false)
- {
- $str = strip_tags($str);
- $str = str_replace(array('?', '&', '#', '=', '+', '<', '>', '"', '%'), '', $str);
- $str = str_replace("'", ' ', $str);
- $str = preg_replace('/[\s]+/u', ' ', trim($str));
-
- if (!$keep_slashes) {
- $str = str_replace('/', '-', $str);
- }
-
- if (!$keep_spaces) {
- $str = str_replace(' ', '-', $str);
- }
-
- $str = preg_replace('/[-]+/', '-', $str);
-
- # Remove path changes in URL
- $str = preg_replace('%^/%', '', $str);
- $str = preg_replace('%\.+/%', '', $str);
-
- return $str;
- }
-
- /**
- * Cut string
- *
- * Returns a cuted string on spaced at given length $l.
- *
- * @param string $str String to cut
- * @param integer $l Length to keep
- * @return string
- */
- public static function cutString($str, $l)
- {
- $s = preg_split('/([\s]+)/u', $str, -1, PREG_SPLIT_DELIM_CAPTURE);
-
- $res = '';
- $L = 0;
-
- if (mb_strlen($s[0]) >= $l) {
- return mb_substr($s[0], 0, $l);
- }
-
- foreach ($s as $v) {
- $L = $L + mb_strlen($v);
-
- if ($L > $l) {
- break;
- } else {
- $res .= $v;
- }
- }
-
- return trim($res);
- }
-
- /**
- * Split words
- *
- * Returns an array of words from a given string.
- *
- * @param string $str Words to split
- * @return array
- */
- public static function splitWords($str, $minChar = 3)
- {
- $non_word = '\x{0000}-\x{002F}\x{003A}-\x{0040}\x{005b}-\x{0060}\x{007B}-\x{007E}\x{00A0}-\x{00BF}\s';
- if (preg_match_all('/([^' . $non_word . ']{' . $minChar . ',})/msu', html::clean($str), $match)) {
- foreach ($match[1] as $i => $v) {
- $match[1][$i] = mb_strtolower($v);
- }
- return $match[1];
- }
- return array();
- }
-
- /**
- * Encoding detection
- *
- * Returns the encoding (in lowercase) of given $str.
- *
- * @param string $str String
- * @return string
- */
- public static function detectEncoding($str)
- {
- return strtolower(mb_detect_encoding($str . ' ',
- 'UTF-8,ISO-8859-1,ISO-8859-2,ISO-8859-3,' .
- 'ISO-8859-4,ISO-8859-5,ISO-8859-6,ISO-8859-7,ISO-8859-8,' .
- 'ISO-8859-9,ISO-8859-10,ISO-8859-13,ISO-8859-14,ISO-8859-15'));
- }
-
- /**
- * Find bad UTF8 tokens
- *
- * Locates the first bad byte in a UTF-8 string returning it's
- * byte index in the string
- * PCRE Pattern to locate bad bytes in a UTF-8 string
- * Comes from W3 FAQ: Multilingual Forms
- * Note: modified to include full ASCII range including control chars
- *
- * @copyright Harry Fuecks
- * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html GNU LGPL 2.1
- * @link http://phputf8.sourceforge.net
- *
- * @param string $str String to search
- * @return integer|false
- */
- public static function utf8badFind($str)
- {
- $UTF8_BAD =
- '([\x00-\x7F]' . # ASCII (including control chars)
- '|[\xC2-\xDF][\x80-\xBF]' . # non-overlong 2-byte
- '|\xE0[\xA0-\xBF][\x80-\xBF]' . # excluding overlongs
- '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' . # straight 3-byte
- '|\xED[\x80-\x9F][\x80-\xBF]' . # excluding surrogates
- '|\xF0[\x90-\xBF][\x80-\xBF]{2}' . # planes 1-3
- '|[\xF1-\xF3][\x80-\xBF]{3}' . # planes 4-15
- '|\xF4[\x80-\x8F][\x80-\xBF]{2}' . # plane 16
- '|(.{1}))'; # invalid byte
- $pos = 0;
- $badList = array();
-
- while (preg_match('/' . $UTF8_BAD . '/S', $str, $matches)) {
- $bytes = strlen($matches[0]);
- if (isset($matches[2])) {
- return $pos;
- }
- $pos += $bytes;
- $str = substr($str, $bytes);
- }
- return false;
- }
-
-
- /**
- * BOM removal
- *
- * Removes BOM from the begining of a string if present.
- *
- * @param string $str String to clean
- * @return string
- */
- public static function removeBOM($str)
- {
- if (substr_count($str, '')) {
- return str_replace('', '', $str);
- }
-
- return $str;
- }
-
- /**
- * Quoted printable conversion
- *
- * Encodes given str to quoted printable
- *
- * @param string $str String to encode
- * @return string
- */
- public static function QPEncode($str)
- {
- $res = '';
-
- foreach (preg_split("/\r?\n/msu", $str) as $line) {
- $l = '';
- preg_match_all('/./', $line, $m);
-
- foreach ($m[0] as $c) {
- $a = ord($c);
-
- if ($a < 32 || $a == 61 || $a > 126) {
- $c = sprintf('=%02X', $a);
- }
-
- $l .= $c;
- }
-
- $res .= $l . "\r\n";
- }
- return $res;
- }
+ public static function utf8_encode($text, $from = 'ISO-8859-1')
+ {
+ return self::toUTF8($text, $from);
+ }
+
+ public static function toUTF8($str, $encoding = null)
+ {
+ if (!$encoding) {
+ $encoding = self::detectEncoding($str);
+ }
+
+ $str = iconv($encoding, 'UTF-8//TRANSLIT', $str);
+ return self::removeOddStuff($str);
+ }
+
+ public static function removeOddStuff($str)
+ {
+ $pattern = array();
+ $pattern["'"] = '\x{0092}\x{00b4}\x{0060}\x{2018}\x{2019}';
+ $pattern['oe'] = '\x{009c}';
+ $pattern['...'] = '\x{0085}';
+ $pattern['Oe'] = '\x{008c}';
+ $pattern[' '] = '\x{0096}';
+ $pattern['«'] = '\x{0093}';
+ $pattern['»'] = '\x{0094}';
+
+ foreach ($pattern as $r => $p) {
+ $str = preg_replace('|[' . $p . ']|u', $r, $str);
+ }
+ return $str;
+ }
+
+ public static function getAccentsPattern()
+ {
+ $pattern = array();
+ $pattern['A'] = '\x{00C0}-\x{00C5}';
+ $pattern['AE'] = '\x{00C6}';
+ $pattern['C'] = '\x{00C7}';
+ $pattern['D'] = '\x{00D0}';
+ $pattern['E'] = '\x{00C8}-\x{00CB}';
+ $pattern['I'] = '\x{00CC}-\x{00CF}';
+ $pattern['N'] = '\x{00D1}';
+ $pattern['O'] = '\x{00D2}-\x{00D6}\x{00D8}';
+ $pattern['OE'] = '\x{0152}';
+ $pattern['S'] = '\x{0160}';
+ $pattern['U'] = '\x{00D9}-\x{00DC}';
+ $pattern['Y'] = '\x{00DD}';
+ $pattern['Z'] = '\x{017D}';
+
+ $pattern['a'] = '\x{00E0}-\x{00E5}';
+ $pattern['ae'] = '\x{00E6}';
+ $pattern['c'] = '\x{00E7}';
+ $pattern['d'] = '\x{00F0}';
+ $pattern['e'] = '\x{00E8}-\x{00EB}';
+ $pattern['i'] = '\x{00EC}-\x{00EF}';
+ $pattern['n'] = '\x{00F1}';
+ $pattern['o'] = '\x{00F2}-\x{00F6}\x{00F8}';
+ $pattern['oe'] = '\x{0153}';
+ $pattern['s'] = '\x{0161}';
+ $pattern['u'] = '\x{00F9}-\x{00FC}';
+ $pattern['y'] = '\x{00FD}\x{00FF}';
+ $pattern['z'] = '\x{017E}';
+
+ $pattern['ss'] = '\x{00DF}';
+ return $pattern;
+ }
+
+ public static function removeAccents($str, $clean = true)
+ {
+ $pattern = self::getAccentsPattern();
+ if ($clean) {
+ $str = self::cleanUTF8($str);
+ $del = array('’' => ' ', '”' => ' ', '“' => ' ', '•' => ' ', '…' => ' ', '€' => ' ',
+ '–' => ' ', '‘' => ' ');
+ foreach ($del as $d => $p) {
+ $str = str_replace($d, $p, $str);
+ }
+ }
+ foreach ($pattern as $r => $p) {
+ $str = preg_replace('/[' . $p . ']/u', $r, $str);
+ }
+
+ $from = 'o';
+ $to = 'o';
+
+ $str = strtr($str, $from, $to);
+
+ return $str;
+ }
+
+ public static function keepOnlyLettersAndDigits($str)
+ {
+ return self::condenseWhite(preg_replace('|[^0-9A-Za-z]|ui', ' ', self::removeAccents($str)));
+ }
+
+ public static function makeAccentInsensiblePattern($str)
+ {
+ $patterns = self::getAccentsPattern();
+ $chars = preg_split('//ui', $str, -1, PREG_SPLIT_NO_EMPTY);
+ $pattern = '|';
+ foreach ($chars as $char) {
+ if (isset($patterns[$char])) {
+ $pattern .= '[';
+ $pattern .= $char;
+ $pattern .= $patterns[$char];
+ $pattern .= ']{1}';
+ } else {
+ $pattern .= $char;
+ }
+ }
+ $pattern .= '|iu';
+ return $pattern;
+ }
+
+ public static function preg_areplace($search, $replace, $subject)
+ {
+ $pattern = self::makeAccentInsensiblePattern($search);
+ return preg_replace($pattern, $replace, $subject);
+ }
+
+ public static function multiExplode($separator, $str, $limit = null)
+ {
+ $seps = array('§', '£', '¤', '#', '¨', '^', '%');
+ foreach ($seps as $sep) {
+ if (stristr($str, $sep)) {
+ continue;
+ }
+ break;
+ }
+
+ $str = preg_replace('|[' . preg_quote($separator, '-') . ']|', $sep, $str);
+ if (is_null($limit)) {
+ return explode($sep, $str);
+ } else {
+ return explode($sep, $str, $limit);
+ }
+ }
+
+ public static function countWords($str)
+ {
+ return count(preg_split('|\s|', $str));
+ }
+
+ public static function explodeNewLines($str)
+ {
+ $str = trim($str);
+ if ($str === '') {
+ return [];
+ }
+ $str = self::condenseNewLines($str);
+ return preg_split('|\v|', $str);
+ }
+
+ public static function substrWord($str, $words, $end = '', $wordsorig = null)
+ {
+ if (is_null($wordsorig)) {
+ $wordsorig = $words;
+ }
+
+ $maxchars = $wordsorig * 6;
+
+ $o = self::countWords($str);
+ if ($o <= $words) {
+ $res = $str;
+ $addend = false;
+ } else {
+ $e = self::multiExplode(" \n", $str, $words);
+ array_pop($e);
+ $res = implode(' ', $e);
+ $addend = true;
+ }
+ if (mb_strlen($res) > $maxchars) {
+ return self::substrWord($str, $words - 1, $end, $wordsorig);
+ }
+
+ if ($addend) {
+ $res .= $end;
+ }
+
+ return $res;
+ }
+
+ public static function substrWordChars($str, $chars, $end = '')
+ {
+ if (strlen($str) <= $chars) {
+ return $str;
+ }
+
+ $str = trim(substr($str, 0, $chars));
+ $s = preg_split('|\s+|', $str);
+ array_pop($s);
+ return implode(' ', $s) . $end;
+ }
+
+ public static function ucfirst($str, $lower = false)
+ {
+ if ($lower) {
+ $str = mb_strtolower($str);
+ }
+ $first = mb_substr($str, 0, 1);
+ $suite = mb_substr($str, 1);
+ return mb_strtoupper($first) . $suite;
+ }
+
+ public static function removeNl($str)
+ {
+ $trans = array("\n" => ' ', "\r" => ' ');
+ $str = strtr($str, $trans);
+ return self::condenseWhite($str);
+ }
+
+ public static function condenseWhite($str)
+ {
+ return preg_replace('|[\s]{2,100}|u', ' ', $str);
+ }
+
+ public static function condenseNewLines($str)
+ {
+ $str = self::normalizeLines($str);
+ $str = preg_replace('|\n{2,100}|', "\n", $str);
+ return $str;
+ }
+
+ public static function html2text($str)
+ {
+ $res = self::strip_tags($str);
+ $res = str_replace(' ', ' ', $res);
+
+ return $res;
+ }
+
+ public static function strip_tags($str, $allowed_tags = array(), $trim = false)
+ {
+ // return preg_replace('|\<.*\>|uU', '', $str);
+ // http://www.php.net/manual/fr/function.strip-tags.php#86463
+ if (!is_array($allowed_tags)) {
+ $allowed_tags = !empty($allowed_tags) ? array($allowed_tags) : array();
+ }
+ $tags = implode('|', $allowed_tags);
+
+ if (empty($tags)) {
+ $tags = '[a-z]+';
+ }
+
+ preg_match_all('@</?\s*(' . $tags . ')(\s+[a-z_]+=(\'[^\']+\'|"[^"]+"))*\s*/?>@i', $str, $matches);
+
+ $full_tags = $matches[0];
+ $tag_names = $matches[1];
+
+ foreach ($full_tags as $i => $full_tag) {
+ if (!in_array($tag_names[$i], $allowed_tags)) {
+ if ($trim) {
+ unset($full_tags[$i]);
+ } else {
+ $str = str_replace($full_tag, '', $str);
+ }
+ }
+ }
+
+ return $trim ? implode('', $full_tags) : $str;
+ }
+
+ public static function str2URL($str, $replace = '-', $exclude_slashs = false, $exclude_dots = false)
+ {
+ if (is_object($str)) {
+ $str = json_encode($str);
+ }
+ $str = str_replace('&', '&', $str);
+ $str = str_replace(':', ' ', $str);
+ if (!$exclude_slashs) {
+ $str = str_replace('/', ' ', $str);
+ }
+
+ $str = self::deaccent($str);
+ $str = preg_replace('/[^A-Za-z0-9_\s\'\:\/[\]-]/', '', $str);
+
+ return self::tidyURL($str, true);
+
+ }
+
+ public static function cleanUTF8($str, $replace = '?')
+ {
+ while (($bad_index = self::utf8badFind($str)) !== false) {
+ $str = substr_replace($str, $replace, $bad_index, 1);
+ }
+ $str = str_replace('\16', $replace, $str);
+ $str = str_replace('\18', $replace, $str);
+ return $str;
+ }
+
+ public static function getChar($code)
+ {
+ $code = trim($code, '&;');
+ return html_entity_decode('&' . $code . ';', ENT_QUOTES, 'UTF-8');
+ }
+
+ public static function randText($length = 300)
+ {
+ $str = 'aeiouy azertyuiopqsdfghjklmwxcvbn eaiouaeiou ';
+ $list = str_split($str);
+ $nb = strlen($str) - 1;
+ $res = '';
+ for ($i = 0; $i <= $length; $i++) {
+ $pos = rand(0, $nb);
+ $res .= $list[$pos];
+ }
+ return $res;
+ }
+
+ public static function splitWordsWithCase($str)
+ {
+ $non_word = '\x{0000}-\x{002F}\x{003A}-\x{0040}\x{005b}-\x{0060}\x{007B}-\x{007E}\x{00A0}-\x{00BF}\s';
+ if (preg_match_all('/([^' . $non_word . ']{3,})/msu', html::clean($str), $match)) {
+ foreach ($match[1] as $i => $v) {
+ $match[1][$i] = $v;
+ }
+ return $match[1];
+ }
+ return array();
+ }
+
+ public static function find_words_from_list($str, $list)
+ {
+ $words = array_unique(self::splitWordsWithCase($str));;
+ if (is_array($list)) {
+ $liste = $list;
+ } else {
+ $liste = array_unique(self::splitWords($list));
+ }
+
+ $l = array();
+ foreach ($words as $ll) {
+ $lll = self::removeAccents($ll);
+ $lll = strtolower($lll);
+ $liste_real[$lll][] = $ll;
+ $l[] = $lll;
+ }
+
+ $diff = array_intersect($liste, $l);
+ $res = array();
+ if ($diff) {
+ foreach ($diff as $d) {
+ $res = array_merge($res, $liste_real[$d]);
+ }
+ return $res;
+ }
+ return false;
+ }
+
+ public static function mb_str_split($string)
+ {
+ $stop = mb_strlen($string);
+ $result = array();
+
+ for ($idx = 0; $idx < $stop; $idx++) {
+ $result[] = mb_substr($string, $idx, 1);
+ }
+
+ return $result;
+ }
+
+ public static function strToArray($str)
+ {
+ return self::mb_str_split($str);
+ }
+
+ public static function utf8ToUnicode($str)
+ {
+ $mState = 0; // cached expected number of octets after the current octet
+ // until the beginning of the next UTF8 character sequence
+ $mUcs4 = 0; // cached Unicode character
+ $mBytes = 1; // cached expected number of octets in the current sequence
+
+ $out = array();
+
+ $len = strlen($str);
+ for ($i = 0; $i < $len; $i++) {
+ $in = ord($str{$i});
+ if (0 == $mState) {
+ // When mState is zero we expect either a US-ASCII character or a
+ // multi-octet sequence.
+ if (0 == (0x80 & ($in))) {
+ // US-ASCII, pass straight through.
+ $out[] = $in;
+ $mBytes = 1;
+ } else if (0xC0 == (0xE0 & ($in))) {
+ // First octet of 2 octet sequence
+ $mUcs4 = ($in);
+ $mUcs4 = ($mUcs4 & 0x1F) << 6;
+ $mState = 1;
+ $mBytes = 2;
+ } else if (0xE0 == (0xF0 & ($in))) {
+ // First octet of 3 octet sequence
+ $mUcs4 = ($in);
+ $mUcs4 = ($mUcs4 & 0x0F) << 12;
+ $mState = 2;
+ $mBytes = 3;
+ } else if (0xF0 == (0xF8 & ($in))) {
+ // First octet of 4 octet sequence
+ $mUcs4 = ($in);
+ $mUcs4 = ($mUcs4 & 0x07) << 18;
+ $mState = 3;
+ $mBytes = 4;
+ } else if (0xF8 == (0xFC & ($in))) {
+ /* First octet of 5 octet sequence.
+ *
+ * This is illegal because the encoded codepoint must be either
+ * (a) not the shortest form or
+ * (b) outside the Unicode range of 0-0x10FFFF.
+ * Rather than trying to resynchronize, we will carry on until the end
+ * of the sequence and let the later error handling code catch it.
+ */
+ $mUcs4 = ($in);
+ $mUcs4 = ($mUcs4 & 0x03) << 24;
+ $mState = 4;
+ $mBytes = 5;
+ } else if (0xFC == (0xFE & ($in))) {
+ // First octet of 6 octet sequence, see comments for 5 octet sequence.
+ $mUcs4 = ($in);
+ $mUcs4 = ($mUcs4 & 1) << 30;
+ $mState = 5;
+ $mBytes = 6;
+ } else {
+ /* Current octet is neither in the US-ASCII range nor a legal first
+ * octet of a multi-octet sequence.
+ */
+ return false;
+ }
+ } else {
+ // When mState is non-zero, we expect a continuation of the multi-octet
+ // sequence
+ if (0x80 == (0xC0 & ($in))) {
+ // Legal continuation.
+ $shift = ($mState - 1) * 6;
+ $tmp = $in;
+ $tmp = ($tmp & 0x0000003F) << $shift;
+ $mUcs4 |= $tmp;
+
+ if (0 == --$mState) {
+ /* End of the multi-octet sequence. mUcs4 now contains the final
+ * Unicode codepoint to be output
+ *
+ * Check for illegal sequences and codepoints.
+ */
+ // From Unicode 3.1, non-shortest form is illegal
+ if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
+ ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
+ ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
+ (4 < $mBytes) ||
+ // From Unicode 3.2, surrogate characters are illegal
+ (($mUcs4 & 0xFFFFF800) == 0xD800) ||
+ // Codepoints outside the Unicode range are illegal
+ ($mUcs4 > 0x10FFFF)
+ ) {
+ return false;
+ }
+ if (0xFEFF != $mUcs4) {
+ // BOM is legal but we don't want to output it
+ $out[] = $mUcs4;
+ }
+ // initialize UTF8 cache
+ $mState = 0;
+ $mUcs4 = 0;
+ $mBytes = 1;
+ }
+ } else {
+ /* ((0xC0 & (*in) != 0x80) && (mState != 0))
+ *
+ * Incomplete multi-octet sequence.
+ */
+ return false;
+ }
+ }
+ }
+ return $out;
+ }
+
+ /**
+ * Takes an array of ints representing the Unicode characters and returns
+ * a UTF-8 string. Astral planes are supported ie. the ints in the
+ * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
+ * are not allowed.
+ *
+ * Returns false if the input array contains ints that represent
+ * surrogates or are outside the Unicode range.
+ */
+ public static function unicodeToUtf8($arr)
+ {
+ $dest = '';
+ foreach ($arr as $src) {
+ if ($src < 0) {
+ return false;
+ } else if ($src <= 0x007f) {
+ $dest .= chr($src);
+ } else if ($src <= 0x07ff) {
+ $dest .= chr(0xc0 | ($src >> 6));
+ $dest .= chr(0x80 | ($src & 0x003f));
+ } else if ($src == 0xFEFF) {
+ // nop -- zap the BOM
+ } else if ($src >= 0xD800 && $src <= 0xDFFF) {
+ // found a surrogate
+ return false;
+ } else if ($src <= 0xffff) {
+ $dest .= chr(0xe0 | ($src >> 12));
+ $dest .= chr(0x80 | (($src >> 6) & 0x003f));
+ $dest .= chr(0x80 | ($src & 0x003f));
+ } else if ($src <= 0x10ffff) {
+ $dest .= chr(0xf0 | ($src >> 18));
+ $dest .= chr(0x80 | (($src >> 12) & 0x3f));
+ $dest .= chr(0x80 | (($src >> 6) & 0x3f));
+ $dest .= chr(0x80 | ($src & 0x3f));
+ } else {
+ // out of range
+ return false;
+ }
+ }
+ return $dest;
+ }
+
+ public static function uchr($n)
+ {
+ return self::unicodeToUtf8(array($n));
+ }
+
+ public static function uord($c)
+ {
+ $r = self::utf8ToUnicode($c);
+ return array_shift($r);
+ }
+
+ public static function strcmp($s1, $s2, $ignoreCase = false, $ignoreAccents = false, $trim = false)
+ {
+ if ($trim !== false) {
+ $s1 = trim($s1, $trim);
+ $s2 = trim($s2, $trim);
+ }
+ if ($ignoreAccents) {
+ $s1 = self::removeAccents($s1);
+ $s2 = self::removeAccents($s2);
+ }
+ if ($ignoreCase) {
+ $s1 = mb_strtolower($s1);
+ $s2 = mb_strtolower($s2);
+ }
+
+ return strcmp($s1, $s2);
+ }
+
+ public static function removeNewLines($input)
+ {
+ $res = preg_replace("|\s+|", ' ', $input);
+ return $res;
+ }
+
+ /**
+ *
+ * @param string $str
+ * @param boolean $compact
+ * @return array
+ */
+ public static function splitLines($str, $compact = true)
+ {
+ $str = str_replace("\r\n", "\n", $str);
+ $str = str_replace("\r", "\n", $str);
+ $str = explode("\n", $str);
+
+ if (!$compact) {
+ return $str;
+ }
+
+ $res = array();
+ foreach ($str as $s) {
+ $s = trim($s);
+ if ($s == '') {
+ continue;
+ }
+ $res[] = $s;
+ }
+ return $res;
+ }
+
+ public static function parseUrl($url, $forceScheme = true)
+ {
+ $url = trim($url);
+ if (substr($url, 0, 2) == '//') {
+ $url = 'http:' . $url;
+ }
+ $res = parse_url($url);
+ if ($forceScheme && !isset($res['scheme'])) {
+ $url = 'http://' . $url;
+ $res = parse_url($url);
+ }
+
+ if (isset($res['query'])) {
+ parse_str($res['query'], $tmp);
+ $res['query_params'] = $tmp;
+ }
+
+ if (isset($res['path'])) {
+ $components = explode('/', trim($res['path'], '/'));
+ $filteredComponents = array();
+ foreach ($components as $c) {
+ if ($c == '') {
+ continue;
+ }
+ $filteredComponents[] = $c;
+ }
+ $res['path_components'] = $filteredComponents;
+ }
+ return $res;
+ }
+
+ public static function pluriel($nb, $singulier, $pluriel, $zero = false, $displayNb = true)
+ {
+ $nb = intval($nb);
+ $res = '';
+ if ($displayNb) {
+ $res .= $nb . ' ';
+ }
+ if ($nb == 0 && $zero) {
+ return $zero;
+ }
+ if ($nb <= 1) {
+ $res .= $singulier;
+ } else {
+ $res .= $pluriel;
+ }
+ return $res;
+ }
+
+ public static function normalizeLines($text, $os = 'nix')
+ {
+ $text = str_replace("\r\n", "\n", $text);
+ $text = str_replace("\r", "\n", $text);
+ if ($os == 'win') {
+ return str_replace("\n", "\r\n", $text);
+ }
+ return $text;
+ }
+
+ public static function underscoreToCamelCase($str, $upperFirst = false)
+ {
+ return Str::camel($str);
+ }
+
+ public static function camelCaseToUnderscore($str)
+ {
+ return Str::snake($str);
+ }
+
+ // Stops orphans in HTML by replacing the last space with a
+ public static function preventOrphans($str)
+ {
+
+ $find = ' '; // What to search for
+ $replace = ' '; // What to replace it with
+
+ $last_space = strrpos($str, $find); // Find last occurrence in string
+
+ if ($last_space !== false) {
+ $str = substr_replace($str, $replace, $last_space, strlen($find));
+ }
+
+ // Also replace punctuation that has spaces before it (eg. in French)
+ $punctuations = array(' :', ' !', ' ?', '« ', ' »');
+ $replacements = array("{$replace}:", "{$replace}!", "{$replace}?", "«{$replace}", "{$replace}»");
+ $str = str_replace($punctuations, $replacements, $str);
+
+ return $str;
+ }
+
+ /**
+ * Check email address
+ *
+ * Returns true if $email is a valid email address.
+ *
+ * @param string $email Email string
+ * @return boolean
+ * @link http://www.iamcal.com/publish/articles/php/parsing_email/
+ *
+ * @copyright Cal Henderson
+ * @license http://creativecommons.org/licenses/by-sa/2.5/ CC-BY-SA
+ */
+ public static function isEmail($email)
+ {
+ $qtext = '[^\\x0d\\x22\\x5c\\x80-\\xff]';
+ $dtext = '[^\\x0d\\x5b-\\x5d\\x80-\\xff]';
+ $atom = '[^\\x00-\\x20\\x22\\x28\\x29\\x2c\\x2e\\x3a-\\x3c\\x3e\\x40\\x5b-\\x5d\\x7f-\\xff]+';
+ $quoted_pair = '\\x5c[\\x00-\\x7f]';
+ $domain_literal = "\\x5b($dtext|$quoted_pair)*\\x5d";
+ $quoted_string = "\\x22($qtext|$quoted_pair)*\\x22";
+ $domain_ref = $atom;
+ $sub_domain = "($domain_ref|$domain_literal)";
+ $word = "($atom|$quoted_string)";
+ $domain = "$sub_domain(\\x2e$sub_domain)*";
+ $local_part = "$word(\\x2e$word)*";
+ $addr_spec = "$local_part\\x40$domain";
+
+ return (boolean)preg_match("!^$addr_spec$!", $email);
+ }
+
+ /**
+ * Accents replacement
+ *
+ * Replaces some occidental accentuated characters by their ASCII
+ * representation.
+ *
+ * @param string $str String to deaccent
+ * @return string
+ */
+ public static function deaccent($str)
+ {
+ $pattern['A'] = '\x{00C0}-\x{00C5}';
+ $pattern['AE'] = '\x{00C6}';
+ $pattern['C'] = '\x{00C7}';
+ $pattern['D'] = '\x{00D0}';
+ $pattern['E'] = '\x{00C8}-\x{00CB}';
+ $pattern['I'] = '\x{00CC}-\x{00CF}';
+ $pattern['N'] = '\x{00D1}';
+ $pattern['O'] = '\x{00D2}-\x{00D6}\x{00D8}';
+ $pattern['OE'] = '\x{0152}';
+ $pattern['S'] = '\x{0160}';
+ $pattern['U'] = '\x{00D9}-\x{00DC}';
+ $pattern['Y'] = '\x{00DD}';
+ $pattern['Z'] = '\x{017D}';
+
+ $pattern['a'] = '\x{00E0}-\x{00E5}';
+ $pattern['ae'] = '\x{00E6}';
+ $pattern['c'] = '\x{00E7}';
+ $pattern['d'] = '\x{00F0}';
+ $pattern['e'] = '\x{00E8}-\x{00EB}';
+ $pattern['i'] = '\x{00EC}-\x{00EF}';
+ $pattern['n'] = '\x{00F1}';
+ $pattern['o'] = '\x{00F2}-\x{00F6}\x{00F8}';
+ $pattern['oe'] = '\x{0153}';
+ $pattern['s'] = '\x{0161}';
+ $pattern['u'] = '\x{00F9}-\x{00FC}';
+ $pattern['y'] = '\x{00FD}\x{00FF}';
+ $pattern['z'] = '\x{017E}';
+
+ $pattern['ss'] = '\x{00DF}';
+
+ foreach ($pattern as $r => $p) {
+ $str = preg_replace('/[' . $p . ']/u', $r, $str);
+ }
+
+ return $str;
+ }
+
+ /**
+ * URL cleanup
+ *
+ * @param string $str URL to tidy
+ * @param boolean $keep_slashes Keep slashes in URL
+ * @param boolean $keep_spaces Keep spaces in URL
+ * @return string
+ */
+ public static function tidyURL($str, $keep_slashes = true, $keep_spaces = false)
+ {
+ $str = strip_tags($str);
+ $str = str_replace(array('?', '&', '#', '=', '+', '<', '>', '"', '%'), '', $str);
+ $str = str_replace("'", ' ', $str);
+ $str = preg_replace('/[\s]+/u', ' ', trim($str));
+
+ if (!$keep_slashes) {
+ $str = str_replace('/', '-', $str);
+ }
+
+ if (!$keep_spaces) {
+ $str = str_replace(' ', '-', $str);
+ }
+
+ $str = preg_replace('/[-]+/', '-', $str);
+
+ # Remove path changes in URL
+ $str = preg_replace('%^/%', '', $str);
+ $str = preg_replace('%\.+/%', '', $str);
+
+ return $str;
+ }
+
+ /**
+ * Cut string
+ *
+ * Returns a cuted string on spaced at given length $l.
+ *
+ * @param string $str String to cut
+ * @param integer $l Length to keep
+ * @return string
+ */
+ public static function cutString($str, $l)
+ {
+ $s = preg_split('/([\s]+)/u', $str, -1, PREG_SPLIT_DELIM_CAPTURE);
+
+ $res = '';
+ $L = 0;
+
+ if (mb_strlen($s[0]) >= $l) {
+ return mb_substr($s[0], 0, $l);
+ }
+
+ foreach ($s as $v) {
+ $L = $L + mb_strlen($v);
+
+ if ($L > $l) {
+ break;
+ } else {
+ $res .= $v;
+ }
+ }
+
+ return trim($res);
+ }
+
+ /**
+ * Split words
+ *
+ * Returns an array of words from a given string.
+ *
+ * @param string $str Words to split
+ * @return array
+ */
+ public static function splitWords($str, $minChar = 3)
+ {
+ $non_word = '\x{0000}-\x{002F}\x{003A}-\x{0040}\x{005b}-\x{0060}\x{007B}-\x{007E}\x{00A0}-\x{00BF}\s';
+ if (preg_match_all('/([^' . $non_word . ']{' . $minChar . ',})/msu', html::clean($str), $match)) {
+ foreach ($match[1] as $i => $v) {
+ $match[1][$i] = mb_strtolower($v);
+ }
+ return $match[1];
+ }
+ return array();
+ }
+
+ /**
+ * Encoding detection
+ *
+ * Returns the encoding (in lowercase) of given $str.
+ *
+ * @param string $str String
+ * @return string
+ */
+ public static function detectEncoding($str)
+ {
+ return strtolower(mb_detect_encoding($str . ' ',
+ 'UTF-8,ISO-8859-1,ISO-8859-2,ISO-8859-3,' .
+ 'ISO-8859-4,ISO-8859-5,ISO-8859-6,ISO-8859-7,ISO-8859-8,' .
+ 'ISO-8859-9,ISO-8859-10,ISO-8859-13,ISO-8859-14,ISO-8859-15'));
+ }
+
+ /**
+ * Find bad UTF8 tokens
+ *
+ * Locates the first bad byte in a UTF-8 string returning it's
+ * byte index in the string
+ * PCRE Pattern to locate bad bytes in a UTF-8 string
+ * Comes from W3 FAQ: Multilingual Forms
+ * Note: modified to include full ASCII range including control chars
+ *
+ * @param string $str String to search
+ * @return integer|false
+ * @link http://phputf8.sourceforge.net
+ *
+ * @copyright Harry Fuecks
+ * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html GNU LGPL 2.1
+ */
+ public static function utf8badFind($str)
+ {
+ $UTF8_BAD =
+ '([\x00-\x7F]' . # ASCII (including control chars)
+ '|[\xC2-\xDF][\x80-\xBF]' . # non-overlong 2-byte
+ '|\xE0[\xA0-\xBF][\x80-\xBF]' . # excluding overlongs
+ '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' . # straight 3-byte
+ '|\xED[\x80-\x9F][\x80-\xBF]' . # excluding surrogates
+ '|\xF0[\x90-\xBF][\x80-\xBF]{2}' . # planes 1-3
+ '|[\xF1-\xF3][\x80-\xBF]{3}' . # planes 4-15
+ '|\xF4[\x80-\x8F][\x80-\xBF]{2}' . # plane 16
+ '|(.{1}))'; # invalid byte
+ $pos = 0;
+ $badList = array();
+
+ while (preg_match('/' . $UTF8_BAD . '/S', $str, $matches)) {
+ $bytes = strlen($matches[0]);
+ if (isset($matches[2])) {
+ return $pos;
+ }
+ $pos += $bytes;
+ $str = substr($str, $bytes);
+ }
+ return false;
+ }
+
+
+ /**
+ * BOM removal
+ *
+ * Removes BOM from the begining of a string if present.
+ *
+ * @param string $str String to clean
+ * @return string
+ */
+ public static function removeBOM($str)
+ {
+ if (substr_count($str, '')) {
+ return str_replace('', '', $str);
+ }
+
+ return $str;
+ }
+
+ /**
+ * Quoted printable conversion
+ *
+ * Encodes given str to quoted printable
+ *
+ * @param string $str String to encode
+ * @return string
+ */
+ public static function QPEncode($str)
+ {
+ $res = '';
+
+ foreach (preg_split("/\r?\n/msu", $str) as $line) {
+ $l = '';
+ preg_match_all('/./', $line, $m);
+
+ foreach ($m[0] as $c) {
+ $a = ord($c);
+
+ if ($a < 32 || $a == 61 || $a > 126) {
+ $c = sprintf('=%02X', $a);
+ }
+
+ $l .= $c;
+ }
+
+ $res .= $l . "\r\n";
+ }
+ return $res;
+ }
}
\ No newline at end of file