ConLite/conlite/includes/functions.api.string.php

<?php
/**
 * Project:
 * Contenido Content Management System
 *
 * Description:
 * Contenido Strign API functions
 *
 * Requirements:
 * @con_php_req 5.0
 *
 *
 * @package    Contenido Backend includes
 * @version    1.6.0
 * @author     Timo A. Hummel
 * @copyright  four for business AG <www.4fb.de>
 * @license    http://www.contenido.org/license/LIZENZ.txt
 * @link       http://www.4fb.de
 * @link       http://www.contenido.org
 * @since      file available since contenido release <= 4.6
 *
 * {@internal
 *   created 2003-08-08
 *   modified 2008-06-25, Frederic Schneider, add security fix
 *   modified 2008-09-15, Murat Purc, add replacement of characters with diacritics
 *   modified 2009-04-30, Ortwin Pinke, CON-252
 *   modified 2010-01-07, Ingo van Peren, CON-293
 *
 *   $Id$:
 * }}
 *
 */

if(!defined('CON_FRAMEWORK')) {
	die('Illegal call');
}

/**
 * Info:
 * This file contains Contenido String API functions.
 *
 * If you are planning to add a function, please make sure that:
 * 1.) The function is in the correct place
 * 2.) The function is documented
 * 3.) The function makes sense and is generically usable
 *
 */


/**
 * capiStrTrimAfterWord: Trims a string to a given
 * length and makes sure that all words up to
 * $maxlen are preserved, without exceeding $maxlen.
 *
 * Warning: Currently, this function uses a regular
 * ASCII-Whitespace to do the seperation test. If
 * you are using '&nbsp' to create spaces, this
 * function will fail.
 *
 * Example:
 * $string = "This is a simple test";
 * echo capiStrTrimAfterWord ($string, 15);
 *
 * This would output "This is a", since
 * this function respects word boundaries
 * and doesn't operate beyond the limit given
 * by $maxlen.
 *
 * @param $string string The string to operate on
 * @param $maxlen int The maximum number of characters
 *
 * @return string The resulting string
 */
function capiStrTrimAfterWord ($string, $maxlen)
{
	/* If the string is smaller than the maximum
       lenght, it makes no sense to process it any
       further. Return it. */
	if (strlen($string) < $maxlen)
	{
		return $string;
	}

	/* If the character after the $maxlen
       position is a space, we can return
       the string until $maxlen */
	if (substr($string, $maxlen,1) == ' ')
	{
		return substr($string, 0, $maxlen);
	}

	/* Cut the string up to $maxlen so we can
       use strrpos (reverse str position) */
	$cutted_string = substr($string, 0, $maxlen);

	/* Extract the end of the last word */
	$last_word_position = strrpos($cutted_string, ' ');

	return (substr($cutted_string, 0, $last_word_position));
}

/**
 * capiStrTrimHard: Trims a string to a specific
 * length. If the string is longer than $maxlen,
 * dots are inserted ("...") right before $maxlen.
 *
 * Example:
 * $string = "This is a simple test";
 * echo capiStrTrimHard ($string, 15);
 *
 * This would output "This is a si...", since
 * the string is longer than $maxlen and the
 * resulting string matches 15 characters including
 * the dots.
 *
 * @param $string string The string to operate on
 * @param $maxlen int The maximum number of characters
 *
 * @return string The resulting string
 */
function capiStrTrimHard ($string, $maxlen, $fillup = "...")
{

	/* If the string is smaller than the maximum
       lenght, it makes no sense to process it any
       further. Return it. */
	if (strlen($string) < $maxlen)
	{
		return $string;
	}

	/* Calculate the maximum text length */
	$maximum_text_length = $maxlen - strlen($fillup);

	/* Cut it */
	$cutted_string = substr($string, 0, $maximum_text_length);

	/* Append the fillup string */
	$cutted_string .= $fillup;

	return ($cutted_string);
}

/**
 * capiStrTrimSentence: Trims a string to a
 * approximate length. Sentence boundaries are
 * preserved.
 *
 * The algorythm inside calculates the sentence
 * length to the previous and next sentences.
 * The distance to the next sentence which is
 * smaller will be taken to trim the string
 * to match the approximate length parameter.
 *
 * Example:
 *
 * $string  = "This contains two sentences. ";
 * $string .= "Lets play around with them. ";
 *
 * echo capiStrTrimSentence($string, 40);
 * echo capiStrTrimSentence($string, 50);
 *
 * The first example would only output the first sentence,
 * the second example both sentences.
 *
 * Explanation:
 *
 * To match the given max length closely,
 * the function calculates the distance to
 * the next and previous sentences. Using
 * the maxlength of 40 characters, the
 * distance to the previous sentence would
 * be 8 characters, and to the next sentence
 * it would be 19 characters. Therefore,
 * only the previous sentence is displayed.
 *
 * The second example displays the second
 * sentence also, since the distance to the
 * next sentence is only 9 characters, but
 * to the previous it is 18 characters.
 *
 * If you specify the boolean flag "$hard",
 * the limit parameter creates a hard limit
 * instead of calculating the distance.
 *
 * This function ensures that at least one
 * sentence is returned.
 *
 * @param $string string The string to operate on
 * @param $approxlen int The approximate number of characters
 * @param $hard boolean If true, use a hard limit for the number of characters (default: false)
 * @return string The resulting string
 */
function capiStrTrimSentence ($string, $approxlen, $hard = false)
{

	/* If the string is smaller than the maximum
       lenght, it makes no sense to process it any
       further. Return it. */
	if (strlen($string) < $approxlen)
	{
		return $string;
	}

	/* Find out the start of the next sentence */
	$next_sentence_start = strpos($string, '.', $approxlen);

	/* If there's no next sentence (somebody forgot the dot?),
       set it to the end of the string. */
	if ($next_sentence_start === false)
	{
		$next_sentence_start = strlen($string);
	}

	/* Cut the previous sentence so we can use strrpos */
	$previous_sentence_cutted = substr($string, 0, $approxlen);

	/* Get out the previous sentence start */
	$previous_sentence_start = strrpos($previous_sentence_cutted, '.');

	/* If the sentence doesn't contain a dot, use the text start. */
	if ($previous_sentence_start === false)
	{
		$previous_sentence_start = 0;
	}

	/* If we have a hard limit, we only want to process
       everything before $approxlen */
	if (($hard == true) && ($next_sentence_start > $approxlen))
	{
		return (substr($string, 0, $previous_sentence_start+1));
	}

	/* Calculate next and previous sentence distances */
	$distance_previous_sentence = $approxlen - $previous_sentence_start;
	$distance_next_sentence = $next_sentence_start - $approxlen;

	/* Sanity: Return at least one sentence. */
	$sanity = substr($string, 0, $previous_sentence_start + 1);

	if (strpos($sanity,'.') === false)
	{
		return (substr($string, 0, $next_sentence_start + 1));
	}

	/* Decide wether the next or previous sentence is nearer */
	if ($distance_previous_sentence > $distance_next_sentence)
	{
		return (substr($string, 0, $next_sentence_start+1));
	} else {
		return (substr($string, 0, $previous_sentence_start+1));
	}
}

/**
 * capiStrReplaceDiacritics: Converts diactritics
 * to english characters whenever possible.
 *
 * Source and Target-Encoding isn't used anymore, remain for compatibility reasons
 * string is always converted to utf-8
 *
 * @since CL 2.0
 *
 * @staticvar array $aSearch
 * @staticvar array $aReplace
 * @param string $sString
 * @param string $sourceEncoding
 * @param string $targetEncoding
 * @return string returns cleaned string
 */
function capiStrReplaceDiacritics($sString, $sourceEncoding = null, $targetEncoding = null) {
    static $aSearch, $aReplace;
    if (!isset($aSearch) || !isset($aReplace)) {
        $aSearch  = array('Ä','Ö','Ü','ä','ö','ü','ß','Á','À','Â','á','à','â','É','È','Ê','é','è','ê','Í','Ì','Î','í','ì','î','Ó','Ò','Ô','ó','ò','ô','Ú','Ù','Û','ú','ù','û');
        $aReplace = array('Ae','Oe','Ue','ae','oe','ue','ss','A','A','A','a','a','a','E','E','E','e','e','e','I','I','I','i','i','i','O','O','O','o','o','o','U','U','U','u','u','u');
    }
    // always use utf-8
    if(function_exists("mb_convert_encoding")) {
        $sString = mb_convert_encoding($sString, "UTF-8", mb_detect_encoding($sString, "UTF-8, ISO-8859-1, ISO-8859-15", true));
    } else {
        $sString = utf8_encode($sString);
    }
    $sString = str_replace($aSearch, $aReplace, $sString);

    return $sString;
}


/**
 * capiStrRecodeString: Converts a string to another encoding.
 * This function tries to detect which function to use (either
 * recode or iconv).
 *
 * If $sourceEncoding and $targetEncoding are the same, this
 * function returns immediately.
 *
 * For more information about encodings, refer to
 * http://en.wikipedia.org/wiki/Character_encoding
 *
 * For more information about the supported encodings in recode,
 * refer to
 * http://www.delorie.com/gnu/docs/recode/recode_toc.html
 *
 * Note: depending on whether recode or iconv is used, the
 * supported charsets differ. The following ones are commonly used
 * and are most likely supported by both converters:
 *
 * - ISO-8859-1 to ISO-8859-15
 * - ASCII
 * - UTF-8
 *
 * @todo Check if the charset names are the same for both converters
 * @todo Implement a converter and charset checker to ensure compilance.
 *
 * @param $sString 			string 	The string to operate on
 * @param $targetEncoding	string	The target encoding (if false, use source encoding)
 * @param $sourceEncoding	string	The source encoding (default: ISO-8859-1)
 * @return 					string	The resulting string
 *
 * @author Timo A. Hummel
 * @copyright four for business AG, http://www.4fb.de
 */
function capiStrRecodeString ($sString, $sourceEncoding, $targetEncoding)
{
	/* If sourceEncoding and targetEncoding are the same, return */
	if ($sourceEncoding == $targetEncoding)
	{
		return $sString;
	}

	/* Check for the "recode" support */
	if (function_exists("recode"))
	{
		$sResult = recode_string("$sourceEncoding..$targetEncoding", $sString);

		return ($sResult);
	}

	/* Check for the "iconv" support */
	if (function_exists("iconv"))
	{
		$sResult = iconv($sourceEncoding, $targetEncoding, $sString);

		return ($sResult);
	}

	/* No charset converters found; return with warning */
	cWarning(__FILE__, __LINE__, "capiStrRecodeString could not find either recode or iconv to do charset conversion.");
	return ($sString);
}

/**
 * capiStrCleanURLCharacters: Removes or converts all "evil"
 * URL characters.
 *
 * This function removes or converts all characters which can
 * make an URL invalid.
 *
 * Clean characters include:
 * - All characters between 32 and 126 which are not alphanumeric and
 *   aren't one of the following: _-.
 *
 * @param $sString 			string 	The string to operate on
 * @param $bReplace			string	If true, all "unclean" characters are replaced
 * @return 					string	The resulting string
 *
 * @author Timo A. Hummel
 * @copyright four for business AG, http://www.4fb.de
 */
function capiStrCleanURLCharacters ($sString, $bReplace = false)
{
	$sString = capiStrReplaceDiacritics($sString);
	$sString = str_replace(" ", "-", $sString);
	$sString = str_replace("/", "-", $sString);
	$sString = str_replace("&", "-", $sString);
	$sString = str_replace("+", "-", $sString);

	$iStrLen = strlen($sString);

	for ($i=0; $i < $iStrLen; $i++)
	{
		$sChar = substr($sString, $i, 1);

		if (preg_match('/^[a-z0-9]*$/i', $sChar) || $sChar ==  "-" || $sChar == "_" || $sChar == ".")
		{
			$sResultString .= $sChar;
		} else {
			if ($bReplace == true)
			{
				$sResultString .= "_";
			}
		}

	}

	return ($sResultString);
}
?>