733 Zeilen
Kein EOL
23 KiB
PHP
733 Zeilen
Kein EOL
23 KiB
PHP
<?php
|
|
/**
|
|
* This file contains the string utility class.
|
|
*
|
|
* @package Core
|
|
* @subpackage Util
|
|
* @author Murat Purc <murat@purc.de>
|
|
* @copyright four for business AG <www.4fb.de>
|
|
* @license http://www.contenido.org/license/LIZENZ.txt
|
|
* @link http://www.4fb.de
|
|
* @link http://www.contenido.org
|
|
*/
|
|
|
|
defined('CON_FRAMEWORK') || die('Illegal call: Missing framework initialization - request aborted.');
|
|
|
|
/**
|
|
* String helper class.
|
|
*
|
|
* @package Core
|
|
* @subpackage Util
|
|
*/
|
|
class cString extends cStringMultiByteWrapper {
|
|
|
|
/**
|
|
* Replaces a string only once.
|
|
*
|
|
* Caution: This function only takes strings as parameters, not arrays!
|
|
*
|
|
* @param string $find
|
|
* String to find
|
|
* @param string $replace
|
|
* String to replace
|
|
* @param string $subject
|
|
* String to process
|
|
* @return string
|
|
* Processed string
|
|
*/
|
|
public static function iReplaceOnce($find, $replace, $subject) {
|
|
$start = parent::findFirstPos(parent::toLowerCase($subject), parent::toLowerCase($find));
|
|
|
|
if ($start === false) {
|
|
return $subject;
|
|
}
|
|
|
|
$end = $start + parent::getStringLength($find);
|
|
$first = parent::getPartOfString($subject, 0, $start);
|
|
$last = parent::getPartOfString($subject, $end, parent::getStringLength($subject) - $end);
|
|
|
|
$result = $first . $replace . $last;
|
|
|
|
return $result;
|
|
}
|
|
|
|
/**
|
|
* Replaces a string only once, in reverse direction.
|
|
*
|
|
* Caution: This function only takes strings as parameters, not arrays!
|
|
*
|
|
* @param string $find
|
|
* String to find
|
|
* @param string $replace
|
|
* String to replace
|
|
* @param string $subject
|
|
* String to process
|
|
* @return string
|
|
* Processed string
|
|
*/
|
|
public static function iReplaceOnceReverse($find, $replace, $subject) {
|
|
$start = self::posReverse(parent::toLowerCase($subject), parent::toLowerCase($find));
|
|
|
|
if ($start === false) {
|
|
return $subject;
|
|
}
|
|
|
|
$end = $start + parent::getStringLength($find);
|
|
|
|
$first = parent::getPartOfString($subject, 0, $start);
|
|
$last = parent::getPartOfString($subject, $end, parent::getStringLength($subject) - $end);
|
|
|
|
$result = $first . $replace . $last;
|
|
|
|
return $result;
|
|
}
|
|
|
|
/**
|
|
* Finds a string position in reverse direction.
|
|
*
|
|
* NOTE: The original cString::findLastPos-function of PHP4 only finds a single character
|
|
* as needle.
|
|
*
|
|
* @param string $haystack
|
|
* String to search in
|
|
* @param string $needle
|
|
* String to search for
|
|
* @param int $start [optional]
|
|
* Offset
|
|
* @return int
|
|
* String position
|
|
*/
|
|
public static function posReverse($haystack, $needle, $start = 0) {
|
|
$tempPos = parent::findFirstPos($haystack, $needle, $start);
|
|
|
|
if ($tempPos === false) {
|
|
if ($start == 0) {
|
|
// Needle not in string at all
|
|
return false;
|
|
} else {
|
|
// No more occurances found
|
|
return $start - parent::getStringLength($needle);
|
|
}
|
|
} else {
|
|
// Find the next occurance
|
|
return self::posReverse($haystack, $needle, $tempPos + parent::getStringLength($needle));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Adds slashes to passed variable or array.
|
|
*
|
|
* @param string|array $value
|
|
* Either a string or a multi-dimensional array of values
|
|
* @return string|array
|
|
*/
|
|
public static function addSlashes($value) {
|
|
$value = is_array($value) ? array_map(array('cString', 'addSlashes'), $value) : addslashes($value);
|
|
return $value;
|
|
}
|
|
|
|
/**
|
|
* Removes slashes from passed variable or array.
|
|
*
|
|
* @param string|array $value
|
|
* Either a string or a multi-dimensional array of values
|
|
* @return string|array
|
|
*/
|
|
public static function stripSlashes($value) {
|
|
$value = is_array($value) ? array_map(array('cString', 'stripSlashes'), $value) : stripslashes($value);
|
|
return $value;
|
|
}
|
|
|
|
/**
|
|
* Checks if the string haystack ends with needle.
|
|
*
|
|
* @param string $haystack
|
|
* The string to check
|
|
* @param string $needle
|
|
* The string with which it should end
|
|
* @return bool
|
|
*/
|
|
public static function endsWith($haystack, $needle) {
|
|
$length = parent::getStringLength($needle);
|
|
if ($length == 0) {
|
|
return true;
|
|
}
|
|
|
|
return parent::getPartOfString($haystack, -$length) === $needle;
|
|
}
|
|
|
|
/**
|
|
* Returns true if needle can be found in haystack.
|
|
*
|
|
* @param string $haystack
|
|
* String to be searched
|
|
* @param string $needle
|
|
* String to search for
|
|
* @return bool
|
|
*/
|
|
public static function contains($haystack, $needle) {
|
|
return !(parent::findFirstPos($haystack, $needle) === false);
|
|
}
|
|
|
|
/**
|
|
* Implementation of PHP 5.3's strstr with beforeNeedle.
|
|
*
|
|
* @param string $haystack
|
|
* String to be searched
|
|
* @param string $needle
|
|
* String to search for
|
|
* @param bool $beforeNeedle [optional]
|
|
* If true, return everything BEFORE needle
|
|
* @return string
|
|
* @link http://php.net/manual/de/function.mb-strstr.php
|
|
* @link http://php.net/manual/de/function.strstr.php
|
|
*/
|
|
public static function strstr($haystack, $needle, $beforeNeedle = false) {
|
|
|
|
if (!$beforeNeedle) {
|
|
if (self::_functionExists('mb_strstr')) {
|
|
return mb_strstr($haystack, $needle);
|
|
} else {
|
|
return strstr($haystack, $needle);
|
|
}
|
|
} else {
|
|
return strtok($haystack, $needle);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* This function checks if a given format is accepted by php's date function.
|
|
*
|
|
* @param string $format
|
|
* format according to date function specification
|
|
* @return bool
|
|
* true if format is correct, false otherwise
|
|
*/
|
|
public static function validateDateFormat($format) {
|
|
// try to create a DateTime instance based on php's date function format specification
|
|
// return true if date is valid (no wrong format)
|
|
return false !== DateTime::createFromFormat($format, date($format, time()));
|
|
}
|
|
|
|
/**
|
|
* Extract a number from a string.
|
|
*
|
|
* @param string $string
|
|
* String var by reference
|
|
* @return string
|
|
*/
|
|
public static function extractNumber(&$string) {
|
|
$string = preg_replace('/[^0-9]/', '', $string);
|
|
return $string;
|
|
}
|
|
|
|
|
|
/**
|
|
* Returns whether a string is UTF-8 encoded or not.
|
|
*
|
|
* @param string $input
|
|
* @return bool
|
|
*/
|
|
public static function isUtf8($input) {
|
|
$len = parent::getStringLength($input);
|
|
|
|
for ($i = 0; $i < $len; $i++) {
|
|
$char = ord($input[$i]);
|
|
|
|
if ($char < 0x80) {
|
|
// ASCII char
|
|
continue;
|
|
} else if (($char & 0xE0) === 0xC0 && $char > 0xC1) {
|
|
// 2 byte long char
|
|
$n = 1;
|
|
} else if (($char & 0xF0) === 0xE0) {
|
|
// 3 byte long char
|
|
$n = 2;
|
|
} else if (($char & 0xF8) === 0xF0 && $char < 0xF5) {
|
|
// 4 byte long char
|
|
$n = 3;
|
|
} else {
|
|
return false;
|
|
}
|
|
|
|
for ($j = 0; $j < $n; $j++) {
|
|
$i++;
|
|
|
|
if ($i == $len || (ord($input[$i]) & 0xC0) !== 0x80) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
/**
|
|
* Checks if a value is alphanumeric.
|
|
*
|
|
* @param mixed $test
|
|
* Value to test
|
|
* @param bool $umlauts [optional]
|
|
* Use german umlauts
|
|
* @return bool
|
|
* Value is alphanumeric
|
|
*/
|
|
public static function isAlphanumeric($test, $umlauts = true) {
|
|
if ($umlauts == true) {
|
|
$match = "/^[a-z0-9ÄäÖöÜüß ]+$/i";
|
|
} else {
|
|
$match = "/^[a-z0-9 ]+$/i";
|
|
}
|
|
|
|
return preg_match($match, $test);
|
|
}
|
|
|
|
/**
|
|
* Trims a string to a given length and makes sure that all words up to
|
|
* $maxlen are preserved, without exceeding $maxlen.
|
|
*
|
|
* Warning: Currently, this function uses a regular ASCII-Whitespace to do
|
|
* the separation test. If you are using ' ' to create spaces, this
|
|
* function will fail.
|
|
*
|
|
* Example:
|
|
* $string = "This is a simple test";
|
|
* echo cString::trimAfterWord($string, 15);
|
|
*
|
|
* This would output "This is a", since this function respects word
|
|
* boundaries and doesn't operate beyond the limit given by $maxlen.
|
|
*
|
|
* @param string $string
|
|
* The string to operate on
|
|
* @param int $maxlen
|
|
* The maximum number of characters
|
|
* @return string
|
|
* The resulting string
|
|
*/
|
|
public static function trimAfterWord($string, $maxlen) {
|
|
// If the string is smaller than the maximum lenght, it makes no sense to
|
|
// process it any further. Return it.
|
|
if (parent::getStringLength($string) < $maxlen) {
|
|
return $string;
|
|
}
|
|
|
|
// If the character after the $maxlen position is a space, we can return
|
|
// the string until $maxlen.
|
|
if (parent::getPartOfString($string, $maxlen, 1) == ' ') {
|
|
return parent::getPartOfString($string, 0, $maxlen);
|
|
}
|
|
|
|
// Cut the string up to $maxlen so we can use cString::findLastPos (reverse str position)
|
|
$cutted_string = parent::getPartOfString($string, 0, $maxlen);
|
|
|
|
// Extract the end of the last word
|
|
$last_word_position = cString::findLastPos($cutted_string, ' ');
|
|
|
|
return parent::getPartOfString($cutted_string, 0, $last_word_position);
|
|
}
|
|
|
|
/**
|
|
* Trims a string to a specific length.
|
|
*
|
|
* If the string is longer than $maxlen, dots are inserted ("...") right
|
|
* before $maxlen.
|
|
*
|
|
* Example:
|
|
* $string = "This is a simple test";
|
|
* echo cString::trimHard ($string, 15);
|
|
*
|
|
* This would output "This is a si...", since the string is longer than
|
|
* $maxlen and the resulting string matches 15 characters including the dots.
|
|
*
|
|
* @param string $string
|
|
* The string to operate on
|
|
* @param int $maxlen
|
|
* The maximum number of characters
|
|
* @param string $fillup [optional]
|
|
* @return string
|
|
* The resulting string
|
|
*/
|
|
public static function trimHard($string, $maxlen, $fillup = '...') {
|
|
// If the string is smaller than the maximum lenght, it makes no sense to
|
|
// process it any further. Return it.
|
|
if (parent::getStringLength($string) < $maxlen) {
|
|
return $string;
|
|
}
|
|
|
|
// Calculate the maximum text length
|
|
$maximum_text_length = $maxlen - parent::getStringLength($fillup);
|
|
|
|
// If text length is over zero cut it
|
|
if ($maximum_text_length > 0) {
|
|
if (preg_match('/(*UTF8)^.{0,' . $maximum_text_length . '}/', $string, $result_array)) {
|
|
$cutted_string = $result_array[0];
|
|
} else if (preg_match('/^.{0,' . $maximum_text_length . '}/u', $string, $result_array)) {
|
|
$cutted_string = $result_array[0];
|
|
} else {
|
|
$cutted_string = parent::getPartOfString($string, 0, $maximum_text_length);
|
|
}
|
|
} else {
|
|
$cutted_string = $string;
|
|
}
|
|
|
|
// Append the fillup string
|
|
$cutted_string .= $fillup;
|
|
|
|
return $cutted_string;
|
|
}
|
|
|
|
/**
|
|
* Trims a string to a approximate length preserving sentence boundaries.
|
|
*
|
|
* The algorithm inside calculates the sentence length to the previous and
|
|
* next sentences. The distance to the next sentence which is smaller will
|
|
* be taken to trim the string to match the approximate length parameter.
|
|
*
|
|
* Example:
|
|
*
|
|
* $string = "This contains two sentences. ";
|
|
* $string .= "Lets play around with them. ";
|
|
*
|
|
* echo cString::trimSentence($string, 40);
|
|
* echo cString::trimSentence($string, 50);
|
|
*
|
|
* The first example would only output the first sentence, the second
|
|
* example both sentences.
|
|
*
|
|
* Explanation:
|
|
*
|
|
* To match the given max length closely, the function calculates the
|
|
* distance to the next and previous sentences. Using the maxlength of 40
|
|
* characters, the distance to the previous sentence would be 8 characters,
|
|
* and to the next sentence it would be 19 characters. Therefore, only the
|
|
* previous sentence is displayed.
|
|
*
|
|
* The second example displays the second sentence also, since the distance
|
|
* to the next sentence is only 9 characters, but to the previous it is 18
|
|
* characters.
|
|
*
|
|
* If you specify the boolean flag "$hard", the limit parameter creates a
|
|
* hard limit instead of calculating the distance.
|
|
*
|
|
* This function ensures that at least one sentence is returned.
|
|
*
|
|
* @param string $string
|
|
* The string to operate on
|
|
* @param int $approxlen
|
|
* The approximate number of characters
|
|
* @param bool $hard [optional]
|
|
* If true, use a hard limit for the number of characters
|
|
* @return string
|
|
* The resulting string
|
|
*/
|
|
public static function trimSentence($string, $approxlen, $hard = false) {
|
|
// If the string is smaller than the maximum lenght, it makes no sense to
|
|
// process it any further. Return it.
|
|
if (parent::getStringLength($string) < $approxlen) {
|
|
return $string;
|
|
}
|
|
|
|
// Find out the start of the next sentence
|
|
$next_sentence_start = parent::findFirstPos($string, '.', $approxlen);
|
|
|
|
// If there's no next sentence (somebody forgot the dot?), set it to the end
|
|
// of the string.
|
|
if ($next_sentence_start === false) {
|
|
$next_sentence_start = parent::getStringLength($string);
|
|
}
|
|
|
|
// Cut the previous sentence so we can use cString::findLastPos
|
|
$previous_sentence_cutted = parent::getPartOfString($string, 0, $approxlen);
|
|
|
|
// Get out the previous sentence start
|
|
$previous_sentence_start = cString::findLastPos($previous_sentence_cutted, '.');
|
|
|
|
// If the sentence doesn't contain a dot, use the text start.
|
|
if ($previous_sentence_start === false) {
|
|
$previous_sentence_start = 0;
|
|
}
|
|
|
|
// If we have a hard limit, we only want to process everything before
|
|
// $approxlen
|
|
if (($hard == true) && ($next_sentence_start > $approxlen)) {
|
|
return parent::getPartOfString($string, 0, $previous_sentence_start + 1);
|
|
}
|
|
|
|
// Calculate next and previous sentence distances
|
|
$distance_previous_sentence = $approxlen - $previous_sentence_start;
|
|
$distance_next_sentence = $next_sentence_start - $approxlen;
|
|
|
|
// Sanity: Return at least one sentence.
|
|
$sanity = parent::getPartOfString($string, 0, $previous_sentence_start + 1);
|
|
|
|
if (parent::findFirstPos($sanity, '.') === false) {
|
|
return parent::getPartOfString($string, 0, $next_sentence_start + 1);
|
|
}
|
|
|
|
// Decide wether the next or previous sentence is nearer
|
|
if ($distance_previous_sentence > $distance_next_sentence) {
|
|
return parent::getPartOfString($string, 0, $next_sentence_start + 1);
|
|
} else {
|
|
return parent::getPartOfString($string, 0, $previous_sentence_start + 1);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Converts diactritics to english characters whenever possible.
|
|
*
|
|
* For german umlauts, this function converts the umlauts to their ASCII
|
|
* equivalents (e.g. ä => ae).
|
|
*
|
|
* For more information about diacritics, refer to
|
|
* http://en.wikipedia.org/wiki/Diacritic
|
|
*
|
|
* For other languages, the diacritic marks are removed, if possible.
|
|
*
|
|
* @param string $string
|
|
* The string to operate on
|
|
* @param string $sourceEncoding [optional; default: UTF-8]
|
|
* The source encoding
|
|
* @param string $targetEncoding [optional; default: UTF-8]
|
|
* The target encoding
|
|
*
|
|
* @return string
|
|
* The resulting string
|
|
* @throws cInvalidArgumentException
|
|
*/
|
|
public static function replaceDiacritics($string, $sourceEncoding = 'UTF-8', $targetEncoding = 'UTF-8') {
|
|
if ($sourceEncoding != 'UTF-8') {
|
|
$string = self::recodeString($string, $sourceEncoding, "UTF-8");
|
|
}
|
|
|
|
// replace regular german umlauts and other common characters with
|
|
// diacritics
|
|
static $search, $replace;
|
|
if (!isset($search)) {
|
|
$search = array(
|
|
'Ä',
|
|
'Ö',
|
|
'Ü',
|
|
'ä',
|
|
'ö',
|
|
'ü',
|
|
'ß',
|
|
'Á',
|
|
'À',
|
|
'Â',
|
|
'á',
|
|
'à',
|
|
'â',
|
|
'É',
|
|
'È',
|
|
'Ê',
|
|
'é',
|
|
'è',
|
|
'ê',
|
|
'Í',
|
|
'Ì',
|
|
'Î',
|
|
'í',
|
|
'ì',
|
|
'î',
|
|
'Ó',
|
|
'Ò',
|
|
'Ô',
|
|
'ó',
|
|
'ò',
|
|
'ô',
|
|
'Ú',
|
|
'Ù',
|
|
'Û',
|
|
'ú',
|
|
'ù',
|
|
'û'
|
|
);
|
|
$replace = array(
|
|
'Ae',
|
|
'Oe',
|
|
'Ue',
|
|
'ae',
|
|
'oe',
|
|
'ue',
|
|
'ss',
|
|
'A',
|
|
'A',
|
|
'A',
|
|
'a',
|
|
'a',
|
|
'a',
|
|
'E',
|
|
'E',
|
|
'E',
|
|
'e',
|
|
'e',
|
|
'e',
|
|
'I',
|
|
'I',
|
|
'I',
|
|
'i',
|
|
'i',
|
|
'i',
|
|
'O',
|
|
'O',
|
|
'O',
|
|
'o',
|
|
'o',
|
|
'o',
|
|
'U',
|
|
'U',
|
|
'U',
|
|
'u',
|
|
'u',
|
|
'u'
|
|
);
|
|
}
|
|
$string = str_replace($search, $replace, $string);
|
|
|
|
// TODO: Additional converting
|
|
|
|
return self::recodeString($string, "UTF-8", $targetEncoding);
|
|
}
|
|
|
|
/**
|
|
* Converts a string to another encoding.
|
|
*
|
|
* This function tries to detect which function to use (either recode or
|
|
* iconv).
|
|
*
|
|
* If $sourceEncoding and $targetEncoding are the same, this function
|
|
* returns immediately.
|
|
*
|
|
* For more information about encodings, refer to
|
|
* http://en.wikipedia.org/wiki/Character_encoding
|
|
*
|
|
* For more information about the supported encodings in recode, refer to
|
|
* http://www.delorie.com/gnu/docs/recode/recode_toc.html
|
|
*
|
|
* Note: depending on whether recode or iconv is used, the supported
|
|
* charsets differ. The following ones are commonly used and are most likely
|
|
* supported by both converters:
|
|
*
|
|
* - ISO-8859-1 to ISO-8859-15
|
|
* - ASCII
|
|
* - UTF-8
|
|
*
|
|
* @todo Check if the charset names are the same for both converters
|
|
* @todo Implement a converter and charset checker to ensure compilance.
|
|
*
|
|
* @param string $string
|
|
* The string to operate on
|
|
* @param string $sourceEncoding
|
|
* The source encoding
|
|
* @param string $targetEncoding
|
|
* The target encoding (if false, use source encoding)
|
|
*
|
|
* @return string
|
|
* The resulting string
|
|
* @throws cInvalidArgumentException
|
|
*/
|
|
public static function recodeString($string, $sourceEncoding, $targetEncoding) {
|
|
// If sourceEncoding and targetEncoding are the same, return
|
|
if (parent::toLowerCase($sourceEncoding) == parent::toLowerCase($targetEncoding)) {
|
|
return $string;
|
|
}
|
|
|
|
// Check for the "recode" support
|
|
if (function_exists('recode')) {
|
|
$sResult = recode_string("$sourceEncoding..$targetEncoding", $string);
|
|
return $sResult;
|
|
}
|
|
|
|
// Check for the "iconv" support
|
|
if (function_exists('iconv')) {
|
|
$sResult = iconv($sourceEncoding, $targetEncoding, $string);
|
|
return $sResult;
|
|
}
|
|
|
|
// No charset converters found; return with warning
|
|
cWarning(__FILE__, __LINE__, 'cString::recodeString could not find either recode or iconv to do charset conversion.');
|
|
return $string;
|
|
}
|
|
|
|
/**
|
|
* Removes or converts all "evil" URL characters.
|
|
*
|
|
* This function removes or converts all characters which can make an URL
|
|
* invalid.
|
|
*
|
|
* Clean characters include:
|
|
* - All characters between 32 and 126 which are not alphanumeric and
|
|
* aren't one of the following: _-.
|
|
*
|
|
* @param string $string
|
|
* The string to operate on
|
|
* @param bool $replace [optional]
|
|
* If true, all "unclean" characters are replaced
|
|
*
|
|
* @return string
|
|
* The resulting string
|
|
* @throws cInvalidArgumentException
|
|
*/
|
|
public static function cleanURLCharacters($string, $replace = false) {
|
|
$string = self::replaceDiacritics($string);
|
|
$string = str_replace(' ', '-', $string);
|
|
$string = str_replace('/', '-', $string);
|
|
$string = str_replace('&', '-', $string);
|
|
$string = str_replace('+', '-', $string);
|
|
|
|
$iStrLen = parent::getStringLength($string);
|
|
|
|
$sResultString = '';
|
|
|
|
for ($i = 0; $i < $iStrLen; $i++) {
|
|
$sChar = parent::getPartOfString($string, $i, 1);
|
|
|
|
if (preg_match('/^[a-z0-9]*$/i', $sChar) || $sChar == '-' || $sChar == '_' || $sChar == '.') {
|
|
$sResultString .= $sChar;
|
|
} else {
|
|
if ($replace == true) {
|
|
$sResultString .= '_';
|
|
}
|
|
}
|
|
}
|
|
|
|
return $sResultString;
|
|
}
|
|
|
|
/**
|
|
* Normalizes line endings in passed string.
|
|
*
|
|
* @param string $string
|
|
* @param string $lineEnding [optional]
|
|
* Feasible values are "\n", "\r" or "\r\n"
|
|
* @return string
|
|
*/
|
|
public static function normalizeLineEndings($string, $lineEnding = "\n") {
|
|
if ($lineEnding !== "\n" && $lineEnding !== "\r" && $lineEnding !== "\r\n") {
|
|
$lineEnding = "\n";
|
|
}
|
|
|
|
$string = str_replace("\r\n", "\n", $string);
|
|
$string = str_replace("\r", "\n", $string);
|
|
if ($lineEnding !== "\n") {
|
|
$string = str_replace("\n", $lineEnding, $string);
|
|
}
|
|
|
|
return $string;
|
|
}
|
|
|
|
/**
|
|
* Convert null string to empty string
|
|
*
|
|
* @param string $string
|
|
* @return string
|
|
*/
|
|
public static function nullToString($string) {
|
|
//var_dump($string);
|
|
if(empty($string) || is_null($string)) {
|
|
$string = '';
|
|
}
|
|
//var_dump($string);
|
|
return $string;
|
|
}
|
|
} |