* @copyright four for business AG * @license http://www.contenido.org/license/LIZENZ.txt * @link http://www.4fb.de * @link http://www.contenido.org */ defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.'); /** * String helper class. * * @package Core * @subpackage Util */ class cString extends cStringMultiByteWrapper { /** * Replaces a string only once. * * Caution: This function only takes strings as parameters, not arrays! * * @param string $find * String to find * @param string $replace * String to replace * @param string $subject * String to process * @return string * Processed string */ public static function iReplaceOnce($find, $replace, $subject) { $start = parent::findFirstPos(parent::toLowerCase($subject), parent::toLowerCase($find)); if ($start === false) { return $subject; } $end = $start + parent::getStringLength($find); $first = parent::getPartOfString($subject, 0, $start); $last = parent::getPartOfString($subject, $end, parent::getStringLength($subject) - $end); $result = $first . $replace . $last; return $result; } /** * Replaces a string only once, in reverse direction. * * Caution: This function only takes strings as parameters, not arrays! * * @param string $find * String to find * @param string $replace * String to replace * @param string $subject * String to process * @return string * Processed string */ public static function iReplaceOnceReverse($find, $replace, $subject) { $start = self::posReverse(parent::toLowerCase($subject), parent::toLowerCase($find)); if ($start === false) { return $subject; } $end = $start + parent::getStringLength($find); $first = parent::getPartOfString($subject, 0, $start); $last = parent::getPartOfString($subject, $end, parent::getStringLength($subject) - $end); $result = $first . $replace . $last; return $result; } /** * Finds a string position in reverse direction. * * NOTE: The original cString::findLastPos-function of PHP4 only finds a single character * as needle. * * @param string $haystack * String to search in * @param string $needle * String to search for * @param int $start [optional] * Offset * @return int * String position */ public static function posReverse($haystack, $needle, $start = 0) { $tempPos = parent::findFirstPos($haystack, $needle, $start); if ($tempPos === false) { if ($start == 0) { // Needle not in string at all return false; } else { // No more occurances found return $start - parent::getStringLength($needle); } } else { // Find the next occurance return self::posReverse($haystack, $needle, $tempPos + parent::getStringLength($needle)); } } /** * Adds slashes to passed variable or array. * * @param string|array $value * Either a string or a multi-dimensional array of values * @return string|array */ public static function addSlashes($value) { $value = is_array($value) ? array_map(array('cString', 'addSlashes'), $value) : addslashes($value); return $value; } /** * Removes slashes from passed variable or array. * * @param string|array $value * Either a string or a multi-dimensional array of values * @return string|array */ public static function stripSlashes($value) { $value = is_array($value) ? array_map(array('cString', 'stripSlashes'), $value) : stripslashes($value); return $value; } /** * Checks if the string haystack ends with needle. * * @param string $haystack * The string to check * @param string $needle * The string with which it should end * @return bool */ public static function endsWith($haystack, $needle) { $length = parent::getStringLength($needle); if ($length == 0) { return true; } return parent::getPartOfString($haystack, -$length) === $needle; } /** * Returns true if needle can be found in haystack. * * @param string $haystack * String to be searched * @param string $needle * String to search for * @return bool */ public static function contains($haystack, $needle) { return !(parent::findFirstPos($haystack, $needle) === false); } /** * Implementation of PHP 5.3's strstr with beforeNeedle. * * @param string $haystack * String to be searched * @param string $needle * String to search for * @param bool $beforeNeedle [optional] * If true, return everything BEFORE needle * @return string * @link http://php.net/manual/de/function.mb-strstr.php * @link http://php.net/manual/de/function.strstr.php */ public static function strstr($haystack, $needle, $beforeNeedle = false) { if (!$beforeNeedle) { if (self::_functionExists('mb_strstr')) { return mb_strstr($haystack, $needle); } else { return strstr($haystack, $needle); } } else { return strtok($haystack, $needle); } } /** * This function checks if a given format is accepted by php's date function. * * @param string $format * format according to date function specification * @return bool * true if format is correct, false otherwise */ public static function validateDateFormat($format) { // try to create a DateTime instance based on php's date function format specification // return true if date is valid (no wrong format) return false !== DateTime::createFromFormat($format, date($format, time())); } /** * Extract a number from a string. * * @param string $string * String var by reference * @return string */ public static function extractNumber(&$string) { $string = preg_replace('/[^0-9]/', '', $string); return $string; } /** * Returns whether a string is UTF-8 encoded or not. * * @param string $input * @return bool */ public static function isUtf8($input) { $len = parent::getStringLength($input); for ($i = 0; $i < $len; $i++) { $char = ord($input[$i]); if ($char < 0x80) { // ASCII char continue; } else if (($char & 0xE0) === 0xC0 && $char > 0xC1) { // 2 byte long char $n = 1; } else if (($char & 0xF0) === 0xE0) { // 3 byte long char $n = 2; } else if (($char & 0xF8) === 0xF0 && $char < 0xF5) { // 4 byte long char $n = 3; } else { return false; } for ($j = 0; $j < $n; $j++) { $i++; if ($i == $len || (ord($input[$i]) & 0xC0) !== 0x80) { return false; } } } return true; } /** * Checks if a value is alphanumeric. * * @param mixed $test * Value to test * @param bool $umlauts [optional] * Use german umlauts * @return bool * Value is alphanumeric */ public static function isAlphanumeric($test, $umlauts = true) { if ($umlauts == true) { $match = "/^[a-z0-9ÄäÖöÜüß ]+$/i"; } else { $match = "/^[a-z0-9 ]+$/i"; } return preg_match($match, $test); } /** * Trims a string to a given length and makes sure that all words up to * $maxlen are preserved, without exceeding $maxlen. * * Warning: Currently, this function uses a regular ASCII-Whitespace to do * the separation test. If you are using ' ' to create spaces, this * function will fail. * * Example: * $string = "This is a simple test"; * echo cString::trimAfterWord($string, 15); * * This would output "This is a", since this function respects word * boundaries and doesn't operate beyond the limit given by $maxlen. * * @param string $string * The string to operate on * @param int $maxlen * The maximum number of characters * @return string * The resulting string */ public static function trimAfterWord($string, $maxlen) { // If the string is smaller than the maximum lenght, it makes no sense to // process it any further. Return it. if (parent::getStringLength($string) < $maxlen) { return $string; } // If the character after the $maxlen position is a space, we can return // the string until $maxlen. if (parent::getPartOfString($string, $maxlen, 1) == ' ') { return parent::getPartOfString($string, 0, $maxlen); } // Cut the string up to $maxlen so we can use cString::findLastPos (reverse str position) $cutted_string = parent::getPartOfString($string, 0, $maxlen); // Extract the end of the last word $last_word_position = cString::findLastPos($cutted_string, ' '); return parent::getPartOfString($cutted_string, 0, $last_word_position); } /** * Trims a string to a specific length. * * If the string is longer than $maxlen, dots are inserted ("...") right * before $maxlen. * * Example: * $string = "This is a simple test"; * echo cString::trimHard ($string, 15); * * This would output "This is a si...", since the string is longer than * $maxlen and the resulting string matches 15 characters including the dots. * * @param string $string * The string to operate on * @param int $maxlen * The maximum number of characters * @param string $fillup [optional] * @return string * The resulting string */ public static function trimHard($string, $maxlen, $fillup = '...') { // If the string is smaller than the maximum lenght, it makes no sense to // process it any further. Return it. if (parent::getStringLength($string) < $maxlen) { return $string; } // Calculate the maximum text length $maximum_text_length = $maxlen - parent::getStringLength($fillup); // If text length is over zero cut it if ($maximum_text_length > 0) { if (preg_match('/(*UTF8)^.{0,' . $maximum_text_length . '}/', $string, $result_array)) { $cutted_string = $result_array[0]; } else if (preg_match('/^.{0,' . $maximum_text_length . '}/u', $string, $result_array)) { $cutted_string = $result_array[0]; } else { $cutted_string = parent::getPartOfString($string, 0, $maximum_text_length); } } else { $cutted_string = $string; } // Append the fillup string $cutted_string .= $fillup; return $cutted_string; } /** * Trims a string to a approximate length preserving sentence boundaries. * * The algorithm inside calculates the sentence length to the previous and * next sentences. The distance to the next sentence which is smaller will * be taken to trim the string to match the approximate length parameter. * * Example: * * $string = "This contains two sentences. "; * $string .= "Lets play around with them. "; * * echo cString::trimSentence($string, 40); * echo cString::trimSentence($string, 50); * * The first example would only output the first sentence, the second * example both sentences. * * Explanation: * * To match the given max length closely, the function calculates the * distance to the next and previous sentences. Using the maxlength of 40 * characters, the distance to the previous sentence would be 8 characters, * and to the next sentence it would be 19 characters. Therefore, only the * previous sentence is displayed. * * The second example displays the second sentence also, since the distance * to the next sentence is only 9 characters, but to the previous it is 18 * characters. * * If you specify the boolean flag "$hard", the limit parameter creates a * hard limit instead of calculating the distance. * * This function ensures that at least one sentence is returned. * * @param string $string * The string to operate on * @param int $approxlen * The approximate number of characters * @param bool $hard [optional] * If true, use a hard limit for the number of characters * @return string * The resulting string */ public static function trimSentence($string, $approxlen, $hard = false) { // If the string is smaller than the maximum lenght, it makes no sense to // process it any further. Return it. if (parent::getStringLength($string) < $approxlen) { return $string; } // Find out the start of the next sentence $next_sentence_start = parent::findFirstPos($string, '.', $approxlen); // If there's no next sentence (somebody forgot the dot?), set it to the end // of the string. if ($next_sentence_start === false) { $next_sentence_start = parent::getStringLength($string); } // Cut the previous sentence so we can use cString::findLastPos $previous_sentence_cutted = parent::getPartOfString($string, 0, $approxlen); // Get out the previous sentence start $previous_sentence_start = cString::findLastPos($previous_sentence_cutted, '.'); // If the sentence doesn't contain a dot, use the text start. if ($previous_sentence_start === false) { $previous_sentence_start = 0; } // If we have a hard limit, we only want to process everything before // $approxlen if (($hard == true) && ($next_sentence_start > $approxlen)) { return parent::getPartOfString($string, 0, $previous_sentence_start + 1); } // Calculate next and previous sentence distances $distance_previous_sentence = $approxlen - $previous_sentence_start; $distance_next_sentence = $next_sentence_start - $approxlen; // Sanity: Return at least one sentence. $sanity = parent::getPartOfString($string, 0, $previous_sentence_start + 1); if (parent::findFirstPos($sanity, '.') === false) { return parent::getPartOfString($string, 0, $next_sentence_start + 1); } // Decide wether the next or previous sentence is nearer if ($distance_previous_sentence > $distance_next_sentence) { return parent::getPartOfString($string, 0, $next_sentence_start + 1); } else { return parent::getPartOfString($string, 0, $previous_sentence_start + 1); } } /** * Converts diactritics to english characters whenever possible. * * For german umlauts, this function converts the umlauts to their ASCII * equivalents (e.g. ä => ae). * * For more information about diacritics, refer to * http://en.wikipedia.org/wiki/Diacritic * * For other languages, the diacritic marks are removed, if possible. * * @param string $string * The string to operate on * @param string $sourceEncoding [optional; default: UTF-8] * The source encoding * @param string $targetEncoding [optional; default: UTF-8] * The target encoding * * @return string * The resulting string * @throws cInvalidArgumentException */ public static function replaceDiacritics($string, $sourceEncoding = 'UTF-8', $targetEncoding = 'UTF-8') { if ($sourceEncoding != 'UTF-8') { $string = self::recodeString($string, $sourceEncoding, "UTF-8"); } // replace regular german umlauts and other common characters with // diacritics static $search, $replace; if (!isset($search)) { $search = array( 'Ä', 'Ö', 'Ü', 'ä', 'ö', 'ü', 'ß', 'Á', 'À', 'Â', 'á', 'à', 'â', 'É', 'È', 'Ê', 'é', 'è', 'ê', 'Í', 'Ì', 'Î', 'í', 'ì', 'î', 'Ó', 'Ò', 'Ô', 'ó', 'ò', 'ô', 'Ú', 'Ù', 'Û', 'ú', 'ù', 'û' ); $replace = array( 'Ae', 'Oe', 'Ue', 'ae', 'oe', 'ue', 'ss', 'A', 'A', 'A', 'a', 'a', 'a', 'E', 'E', 'E', 'e', 'e', 'e', 'I', 'I', 'I', 'i', 'i', 'i', 'O', 'O', 'O', 'o', 'o', 'o', 'U', 'U', 'U', 'u', 'u', 'u' ); } $string = str_replace($search, $replace, $string); // TODO: Additional converting return self::recodeString($string, "UTF-8", $targetEncoding); } /** * Converts a string to another encoding. * * This function tries to detect which function to use (either recode or * iconv). * * If $sourceEncoding and $targetEncoding are the same, this function * returns immediately. * * For more information about encodings, refer to * http://en.wikipedia.org/wiki/Character_encoding * * For more information about the supported encodings in recode, refer to * http://www.delorie.com/gnu/docs/recode/recode_toc.html * * Note: depending on whether recode or iconv is used, the supported * charsets differ. The following ones are commonly used and are most likely * supported by both converters: * * - ISO-8859-1 to ISO-8859-15 * - ASCII * - UTF-8 * * @todo Check if the charset names are the same for both converters * @todo Implement a converter and charset checker to ensure compilance. * * @param string $string * The string to operate on * @param string $sourceEncoding * The source encoding * @param string $targetEncoding * The target encoding (if false, use source encoding) * * @return string * The resulting string * @throws cInvalidArgumentException */ public static function recodeString($string, $sourceEncoding, $targetEncoding) { // If sourceEncoding and targetEncoding are the same, return if (parent::toLowerCase($sourceEncoding) == parent::toLowerCase($targetEncoding)) { return $string; } // Check for the "recode" support if (function_exists('recode')) { $sResult = recode_string("$sourceEncoding..$targetEncoding", $string); return $sResult; } // Check for the "iconv" support if (function_exists('iconv')) { $sResult = iconv($sourceEncoding, $targetEncoding, $string); return $sResult; } // No charset converters found; return with warning cWarning(__FILE__, __LINE__, 'cString::recodeString could not find either recode or iconv to do charset conversion.'); return $string; } /** * Removes or converts all "evil" URL characters. * * This function removes or converts all characters which can make an URL * invalid. * * Clean characters include: * - All characters between 32 and 126 which are not alphanumeric and * aren't one of the following: _-. * * @param string $string * The string to operate on * @param bool $replace [optional] * If true, all "unclean" characters are replaced * * @return string * The resulting string * @throws cInvalidArgumentException */ public static function cleanURLCharacters($string, $replace = false) { $string = self::replaceDiacritics($string); $string = str_replace(' ', '-', $string); $string = str_replace('/', '-', $string); $string = str_replace('&', '-', $string); $string = str_replace('+', '-', $string); $iStrLen = parent::getStringLength($string); $sResultString = ''; for ($i = 0; $i < $iStrLen; $i++) { $sChar = parent::getPartOfString($string, $i, 1); if (preg_match('/^[a-z0-9]*$/i', $sChar) || $sChar == '-' || $sChar == '_' || $sChar == '.') { $sResultString .= $sChar; } else { if ($replace == true) { $sResultString .= '_'; } } } return $sResultString; } /** * Normalizes line endings in passed string. * * @param string $string * @param string $lineEnding [optional] * Feasible values are "\n", "\r" or "\r\n" * @return string */ public static function normalizeLineEndings($string, $lineEnding = "\n") { if ($lineEnding !== "\n" && $lineEnding !== "\r" && $lineEnding !== "\r\n") { $lineEnding = "\n"; } $string = str_replace("\r\n", "\n", $string); $string = str_replace("\r", "\n", $string); if ($lineEnding !== "\n") { $string = str_replace("\n", $lineEnding, $string); } return $string; } }