diff --git a/conlite/classes/class.article.collector.php b/conlite/classes/class.article.collector.php
index 0d9a25e..26a3bdb 100644
--- a/conlite/classes/class.article.collector.php
+++ b/conlite/classes/class.article.collector.php
@@ -34,7 +34,7 @@ class cArticleCollector implements SeekableIterator, Countable {
protected $_aStartArticles = array();
protected $_aOptions = array();
protected $_aOptionsDefault = array();
- private $_bAsObject = TRUE;
+ private $_bAsObject = true;
/**
*
@@ -65,14 +65,12 @@ class cArticleCollector implements SeekableIterator, Countable {
}
if (count($this->_aStartArticles) > 0) {
- print_r($this->_aStartArticles);
if ($this->_aOptions['start'] == false) {
$oArtLangColl->setWhere("cApiArticleLanguageCollection.idartlang", $this->_aStartArticles, "NOTIN");
//$sqlStartArticles = "a.idartlang NOT IN ('" . implode("','", $this->_startArticles) . "') AND ";
}
if ($this->_aOptions['startonly'] == true) {
- echo "startonly";
$oArtLangColl->setWhere("cApiArticleLanguageCollection.idartlang", $this->_aStartArticles, "IN");
//$sqlStartArticles = "a.idartlang IN ('" . implode("','", $this->_startArticles) . "') AND ";
}
@@ -89,7 +87,6 @@ class cArticleCollector implements SeekableIterator, Countable {
$oArtLangColl->setWhere("cApiArticleLanguageCollection.idlang", $this->_aOptions['lang']);
$oArtLangColl->query();
- echo $oArtLangColl->_lastSQL;
if ($oArtLangColl->count() > 0) {
$aTable = $oArtLangColl->fetchTable();
//echo $oArtLangColl->_lastSQL;
@@ -97,7 +94,6 @@ class cArticleCollector implements SeekableIterator, Countable {
foreach ($aTable as $aItem) {
$this->_aArticles[] = $aItem['idartlang'];
}
- print_r($this->_aArticles);
}
}
@@ -162,7 +158,7 @@ class cArticleCollector implements SeekableIterator, Countable {
*
* @return cApiArticleLanguage|int returns article language object or idartlang
*/
- public function current() {
+ public function current() :cApiArticleLanguage|int{
$iIdartlang = $this->_aArticles[$this->_iCurrentPosition];
if ($this->_bAsObject) {
$oArticle = new cApiArticleLanguage($iIdartlang);
diff --git a/conlite/classes/class.search.php b/conlite/classes/class.search.php
index b2f6599..5d314b5 100644
--- a/conlite/classes/class.search.php
+++ b/conlite/classes/class.search.php
@@ -225,13 +225,13 @@ class Index extends SearchBaseAbstract {
*
* @var array
*/
- var $cms_type = array();
+ protected static $_cms_type = [];
/**
* the suffix of all available cms types
* @var array
*/
- var $cms_type_suffix = array();
+ protected static $_cms_type_suffix = [];
/**
* Constructor, set object properties
@@ -270,6 +270,8 @@ class Index extends SearchBaseAbstract {
$this->idart = $idart;
}
+ $this->_debug('Start Index for ', $this->idart);
+
$this->place = $place;
$this->keycode = $aContent;
$this->setStopwords($aStopwords);
@@ -283,7 +285,14 @@ class Index extends SearchBaseAbstract {
$old_keys = array_keys($this->keywords_old);
$this->keywords_del = array_diff($old_keys, $new_keys);
-
+ /*
+ echo '
';
+ print_r($new_keys);
+ print_r($old_keys);
+ print_r($this->keywords_del);
+ echo '
';
+ *
+ */
if (count($this->keywords_del) > 0) {
$this->deleteKeywords();
}
@@ -312,7 +321,7 @@ class Index extends SearchBaseAbstract {
foreach ($this->keycode as $idtype => $data) {
if ($this->checkCmsType($idtype)) {
foreach ($data as $typeid => $code) {
- $this->_debug('code', $code);
+ $this->_debug('createKeywords: raw code from data array', $code);
$code = stripslashes($code); // remove backslash
$code = str_ireplace(array('
', '
'), "\n", $code); // replace HTML line breaks with newlines
@@ -320,13 +329,18 @@ class Index extends SearchBaseAbstract {
if (strlen($code) > 0) {
$code = clHtmlEntityDecode($code);
}
- $this->_debug('code', $code);
+ $this->_debug('createKeywords: code after clean', $code);
$tmp_keys = preg_split('/[\s,]+/', trim($code)); // split content by any number of commas or space characters
- $this->_debug('tmp_keys', $tmp_keys);
+ $this->_debug('createKeywords: tmp_keys', $tmp_keys);
foreach ($tmp_keys as $value) {
$value = strtolower($value); // index terms are stored with lower case
+ $value = preg_replace('/[^\w]+/u', '', $value);
+
+ if (empty(trim($value))) {
+ continue;
+ }
if (!in_array($value, $this->stopwords)) {
// eliminate stopwords
@@ -335,6 +349,7 @@ class Index extends SearchBaseAbstract {
if (strlen($value) > 1) {
// do not index single characters
$this->keywords[$value] = $this->keywords[$value] . $idtype . '-' . $typeid . ' ';
+ $this->_debug('createKeywords: entry array keywords', $this->keywords);
}
}
}
@@ -345,7 +360,7 @@ class Index extends SearchBaseAbstract {
}
}
- $this->_debug('keywords', $this->keywords);
+ $this->_debug('createKeywords: keywords returned', $this->keywords);
}
/**
@@ -357,9 +372,10 @@ class Index extends SearchBaseAbstract {
$tmp_count = array();
foreach ($this->keywords as $keyword => $count) {
+ $bProceed = true;
+ $this->_debug('keyword', $keyword);
$tmp_count = preg_split('/[\s]/', trim($count));
$this->_debug('tmp_count', $tmp_count);
-
$occurrence = count($tmp_count);
$tmp_count = array_unique($tmp_count);
$cms_types = implode(',', $tmp_count);
@@ -376,8 +392,12 @@ class Index extends SearchBaseAbstract {
('" . Contenido_Security::escapeDB($keyword, $this->db) . "', '" . Contenido_Security::escapeDB($index_string, $this->db) . "', " . Contenido_Security::toInteger($this->lang) . ", " . Contenido_Security::toInteger($nextid) . ")";
} else {
// if keyword allready exists, create new index_string
- if (preg_match("/&$this->idart=/", $this->keywords_old[$keyword])) {
- $index_string = preg_replace("/&$this->idart=[0-9]+\([\w-,]+\)/", $index_string, $this->keywords_old[$keyword]);
+ if (preg_match("/&" . $this->idart . "=/", $this->keywords_old[$keyword])) {
+ $index_string = preg_replace("/&" . $this->idart . "=[0-9]+\([,\w-]+\)/", $index_string, $this->keywords_old[$keyword]);
+ if ($index_string === $this->keywords_old[$keyword]) {
+ $bProceed = false;
+ $this->_debug('db update', 'no update needed');
+ }
} else {
$index_string = $this->keywords_old[$keyword] . $index_string;
}
@@ -386,9 +406,11 @@ class Index extends SearchBaseAbstract {
SET " . $this->place . " = '" . $index_string . "'
WHERE idlang='" . Contenido_Security::toInteger($this->lang) . "' AND keyword='" . Contenido_Security::escapeDB($keyword, $this->db) . "'";
}
- $this->_debug('sql', $sql);
- $this->db->query($sql);
+ if ($bProceed) {
+ $this->_debug('sql', $sql);
+ $this->db->query($sql);
+ }
}
}
@@ -431,7 +453,7 @@ class Index extends SearchBaseAbstract {
idlang=" . Contenido_Security::toInteger($this->lang) . " AND
(keyword IN ('" . $keys . "') OR " . $this->place . " REGEXP '&" . Contenido_Security::toInteger($this->idart) . "=')";
- $this->_debug('sql', $sql);
+ $this->_debug('getKeywords: sql', $sql);
$this->db->query($sql);
@@ -440,6 +462,8 @@ class Index extends SearchBaseAbstract {
while ($this->db->next_record()) {
$this->keywords_old[$this->db->f('keyword')] = $this->db->f($place);
}
+
+ $this->_debug('getKeywords: array keywords_old', $this->keywords_old);
}
/**
@@ -448,6 +472,7 @@ class Index extends SearchBaseAbstract {
* @return $key
*/
function removeSpecialChars($key) {
+
$aSpecialChars = array(
"-", "_", "'", ".", "!", "\"", "#", "$", "%", "&", "(", ")", "*", "+", ",", "/",
":", ";", "<", "=", ">", "?", "@", "[", "\\", "]", "^", "`", "{", "|", "}", "~"
@@ -461,6 +486,7 @@ class Index extends SearchBaseAbstract {
// a client and should not be treated in this method.
// modified 2007-10-01, H. Librenz - added as hotfix for encoding problems (doesn't find any words with
// umlaut vowels in it since you turn on UTF-8 as language encoding)
+
$sEncoding = getEncodingByLanguage($this->db, $this->lang, $this->cfg);
if (strtolower($sEncoding) != 'iso-8859-2') {
@@ -486,6 +512,9 @@ class Index extends SearchBaseAbstract {
$key = clHtmlEntityDecode($key);
$key = str_replace($aSpecialChars, '', $key);
+ ini_set('mbstring.substitute_character', "none");
+ $key = mb_convert_encoding($key, 'UTF-8', 'UTF-8');
+
return $key;
}
@@ -516,6 +545,21 @@ class Index extends SearchBaseAbstract {
return $key;
}
+ /**
+ *
+ * @return array array with arrays of type and typesuffix
+ */
+ public function getContentTypes(): array {
+ if (empty(self::$_cms_type)) {
+ $this->setContentTypes();
+ }
+
+ return array(
+ 'cms_type' => self::$_cms_type,
+ 'cms_type_suffix' => self::$_cms_type_suffix
+ );
+ }
+
/**
* set the array of stopwords which should not be indexed
* @param array $aStopwords
@@ -537,8 +581,8 @@ class Index extends SearchBaseAbstract {
$this->_debug('sql', $sql);
$this->db->query($sql);
while ($this->db->next_record()) {
- $this->cms_type[$this->db->f('type')] = $this->db->f('idtype');
- $this->cms_type_suffix[$this->db->f('idtype')] = substr($this->db->f('type'), 4, strlen($this->db->f('type')));
+ self::$_cms_type[$this->db->f('type')] = $this->db->f('idtype');
+ self::$_cms_type_suffix[$this->db->f('idtype')] = substr($this->db->f('type'), 4, strlen($this->db->f('type')));
}
}
@@ -554,11 +598,11 @@ class Index extends SearchBaseAbstract {
if (strlen($opt) > 0) {
if (!stristr($opt, 'cms_')) {
- if (in_array($opt, $this->cms_type_suffix)) {
+ if (in_array($opt, $this->getContentTypes()['cms_type_suffix'])) {
$this->cms_options[$opt] = 'CMS_' . $opt;
}
} else {
- if (array_key_exists($opt, $this->cms_type)) {
+ if (array_key_exists($opt, $this->getContentTypes()['cms_type'])) {
$this->cms_options[$opt] = $opt;
}
}
@@ -788,8 +832,8 @@ class Search extends SearchBaseAbstract {
$this->index = new Index($oDB);
- $this->cms_type = $this->index->cms_type;
- $this->cms_type_suffix = $this->index->cms_type_suffix;
+ $this->cms_type = $this->index->getContentTypes()['cms_type'];
+ $this->cms_type_suffix = $this->index->getContentTypes()['cms_type_suffix'];
$this->search_option = (array_key_exists('db', $options)) ? strtolower($options['db']) : 'regexp';
$this->search_combination = (array_key_exists('combine', $options)) ? strtolower($options['combine']) : 'or';
@@ -1339,11 +1383,11 @@ class SearchResult extends SearchBaseAbstract {
$cms_type = strtoupper($cms_type);
if (strlen($cms_type) > 0) {
if (!stristr($cms_type, 'cms_')) {
- if (in_array($cms_type, $this->index->cms_type_suffix)) {
+ if (in_array($cms_type, $this->index->getContentTypes()['cms_type'])) {
$cms_type = 'CMS_' . $cms_type;
}
} else {
- if (!array_key_exists($cms_type, $this->index->cms_type)) {
+ if (!array_key_exists($cms_type, $this->index->getContentTypes()['cms_type_suffix'])) {
return array();
}
}
diff --git a/conlite/classes/contenido/class.articlelanguage.php b/conlite/classes/contenido/class.articlelanguage.php
index 3f12816..3327008 100644
--- a/conlite/classes/contenido/class.articlelanguage.php
+++ b/conlite/classes/contenido/class.articlelanguage.php
@@ -1,4 +1,5 @@
select($select);
}
}
-
+
public function getIdArtLang($iIdart, $iIdlang) {
$this->setWhere('idart', Contenido_Security::toInteger($iIdart));
$this->setWhere('idlang', Contenido_Security::toInteger($iIdlang));
- if($this->query() && $this->count() > 0) {
+ if ($this->query() && $this->count() > 0) {
return $this->next()->get('idartlang');
}
return false;
}
+
}
+class cApiArticleLanguage extends Item {
-class cApiArticleLanguage extends Item
-{
/**
- * Constructor Function
- * @param mixed $mId Specifies the ID of item to load
+ *
+ * @global type $cfg
+ * @param type $mId
*/
- public function __construct($mId = false)
- {
+ public function __construct($mId = false) {
global $cfg;
parent::__construct($cfg["tab"]["art_lang"], "idartlang");
$this->setFilters(array(), array());
@@ -63,24 +62,23 @@ class cApiArticleLanguage extends Item
$this->loadByPrimaryKey($mId);
}
}
-
+
public function loadByArticleAndLanguageId($idart, $idlang) {
$result = true;
- if (!$this->isLoaded()) {
+ if (!$this->isLoaded()) {
$idartlang = $this->_getIdArtLang($idart, $idlang);
$result = $this->loadByPrimaryKey($idartlang);
}
return $result;
}
-
-
+
protected function _getIdArtLang($idart, $idlang) {
$sql = sprintf('SELECT idartlang FROM `%s` WHERE idart = %d AND idlang = %d', cRegistry::getConfigValue('tab', 'art_lang'), $idart, $idlang);
$this->db->query($sql);
$this->db->next_record();
return $this->db->f('idartlang');
}
-
+
public function getContent($type = '', $id = NULL) {
if (NULL === $this->content) {
$this->_loadArticleContent();
@@ -108,16 +106,16 @@ class cApiArticleLanguage extends Item
// return String
return (isset($this->content[$type][$id])) ? $this->content[$type][$id] : '';
}
-
+
protected function _loadArticleContent() {
if (NULL !== $this->content) {
return;
}
- $sql = "SELECT b.type, a.typeid, a.value FROM `".cRegistry::getConfigValue('tab', 'content')
- ."` AS a, `".cRegistry::getConfigValue('tab', 'type')
- ."` AS b WHERE a.idartlang = ".$this->get('idartlang')
- ." AND b.idtype = a.idtype ORDER BY a.idtype, a.typeid";
+ $sql = "SELECT b.type, a.typeid, a.value FROM `" . cRegistry::getConfigValue('tab', 'content')
+ . "` AS a, `" . cRegistry::getConfigValue('tab', 'type')
+ . "` AS b WHERE a.idartlang = " . $this->get('idartlang')
+ . " AND b.idtype = a.idtype ORDER BY a.idtype, a.typeid";
$this->db->query($sql);
@@ -126,5 +124,7 @@ class cApiArticleLanguage extends Item
$this->content[strtolower($this->db->f('type'))][$this->db->f('typeid')] = urldecode($this->db->f('value'));
}
}
+
}
+
?>
\ No newline at end of file
diff --git a/conlite/includes/functions.con2.php b/conlite/includes/functions.con2.php
index b424656..51cf99d 100644
--- a/conlite/includes/functions.con2.php
+++ b/conlite/includes/functions.con2.php
@@ -619,48 +619,30 @@ function conSetMetaValue($idartlang, $idmetatype, $value) {
}
/**
- * (re)generate keywords for all articles of a given client (with specified language)
- * @param $client Client
- * @param $lang Language of a client
- * @return void
- *
- * @author Willi Man
- * Created : 12.05.2004
- * Modified : 13.05.2004
- * @copyright four for business AG 2003
+ *
+ * @param int $client
+ * @param int $lang
*/
-function conGenerateKeywords($client, $lang) {
- global $cfg;
- $db_art = new DB_ConLite;
+function conGenerateKeywords(int $client = null, int $lang = null) {
+ $aOptions = [];
+ $aOptions['start'] = true;
+ $aOptions['offline'] = true;
+ $aOptions['client'] = $client ?? 0;
+ $aOptions['lang'] = $lang ?? 0;
- $options = array("img", "link", "linktarget", "swf"); // cms types to be excluded from indexing
-
- $sql = "SELECT
- a.idart, b.idartlang
- FROM
- " . $cfg["tab"]["art"] . " AS a,
- " . $cfg["tab"]["art_lang"] . " AS b
- WHERE
- a.idart = b.idart AND
- a.idclient = " . Contenido_Security::escapeDB($client, $db) . " AND
- b.idlang = " . Contenido_Security::escapeDB($lang, $db);
-
- $db_art->query($sql);
-
- $articles = array();
- while ($db_art->next_record()) {
- $articles[$db_art->f("idart")] = $db_art->f("idartlang");
- }
-
- if (count($articles) > 0) {
- foreach ($articles as $artid => $article_lang) {
- $article_content = array();
- $article_content = conGetContentFromArticle($article_lang);
-
- if (count($article_content) > 0) {
- $art_index = new Index($db_art);
- $art_index->lang = $lang;
- $art_index->start($artid, $article_content, 'auto', $options);
+ $oArticleCollector = new cArticleCollector();
+ $oArticleCollector->setOptions($aOptions);
+ $oArticleCollector->loadArticles();
+ /* @var $oArticle cApiArticleLanguage */
+ if ($oArticleCollector->count() > 0) {
+ foreach ($oArticleCollector as $oArticle) {
+ $aArticleContent = [];
+ $aArticleContent = $oArticle->getContent();
+ if(!empty($aArticleContent)) {
+ /* @var $oIndex Index */
+ $oIndex = new Index();
+ //$oIndex->setDebug(true);
+ $oIndex->start($oArticle->get('idart'), $aArticleContent, 'auto', array("img", "link", "linktarget", "swf"));
}
}
}
diff --git a/conlite/includes/functions.general.php b/conlite/includes/functions.general.php
index 8c190c5..f4cb963 100644
--- a/conlite/includes/functions.general.php
+++ b/conlite/includes/functions.general.php
@@ -2215,7 +2215,6 @@ function clHtmlEntityDecode(string $value, ?int $flags = ENT_QUOTES | ENT_SUBSTI
* @return string Returns the converted string
*/
function clHtmlEntities(string $value,?int $flags = ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401, string $encoding = 'UTF-8') {
- var_dump($flags);
return htmlentities($value, $flags, $encoding);
}