bugfixes search and keywords generation
This commit is contained in:
parent
434b483fac
commit
6729446154
5 changed files with 110 additions and 89 deletions
|
@ -34,7 +34,7 @@ class cArticleCollector implements SeekableIterator, Countable {
|
|||
protected $_aStartArticles = array();
|
||||
protected $_aOptions = array();
|
||||
protected $_aOptionsDefault = array();
|
||||
private $_bAsObject = TRUE;
|
||||
private $_bAsObject = true;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -65,14 +65,12 @@ class cArticleCollector implements SeekableIterator, Countable {
|
|||
}
|
||||
|
||||
if (count($this->_aStartArticles) > 0) {
|
||||
print_r($this->_aStartArticles);
|
||||
if ($this->_aOptions['start'] == false) {
|
||||
$oArtLangColl->setWhere("cApiArticleLanguageCollection.idartlang", $this->_aStartArticles, "NOTIN");
|
||||
//$sqlStartArticles = "a.idartlang NOT IN ('" . implode("','", $this->_startArticles) . "') AND ";
|
||||
}
|
||||
|
||||
if ($this->_aOptions['startonly'] == true) {
|
||||
echo "startonly";
|
||||
$oArtLangColl->setWhere("cApiArticleLanguageCollection.idartlang", $this->_aStartArticles, "IN");
|
||||
//$sqlStartArticles = "a.idartlang IN ('" . implode("','", $this->_startArticles) . "') AND ";
|
||||
}
|
||||
|
@ -89,7 +87,6 @@ class cArticleCollector implements SeekableIterator, Countable {
|
|||
$oArtLangColl->setWhere("cApiArticleLanguageCollection.idlang", $this->_aOptions['lang']);
|
||||
|
||||
$oArtLangColl->query();
|
||||
echo $oArtLangColl->_lastSQL;
|
||||
if ($oArtLangColl->count() > 0) {
|
||||
$aTable = $oArtLangColl->fetchTable();
|
||||
//echo $oArtLangColl->_lastSQL;
|
||||
|
@ -97,7 +94,6 @@ class cArticleCollector implements SeekableIterator, Countable {
|
|||
foreach ($aTable as $aItem) {
|
||||
$this->_aArticles[] = $aItem['idartlang'];
|
||||
}
|
||||
print_r($this->_aArticles);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -162,7 +158,7 @@ class cArticleCollector implements SeekableIterator, Countable {
|
|||
*
|
||||
* @return cApiArticleLanguage|int returns article language object or idartlang
|
||||
*/
|
||||
public function current() {
|
||||
public function current() :cApiArticleLanguage|int{
|
||||
$iIdartlang = $this->_aArticles[$this->_iCurrentPosition];
|
||||
if ($this->_bAsObject) {
|
||||
$oArticle = new cApiArticleLanguage($iIdartlang);
|
||||
|
|
|
@ -225,13 +225,13 @@ class Index extends SearchBaseAbstract {
|
|||
*
|
||||
* @var array
|
||||
*/
|
||||
var $cms_type = array();
|
||||
protected static $_cms_type = [];
|
||||
|
||||
/**
|
||||
* the suffix of all available cms types
|
||||
* @var array
|
||||
*/
|
||||
var $cms_type_suffix = array();
|
||||
protected static $_cms_type_suffix = [];
|
||||
|
||||
/**
|
||||
* Constructor, set object properties
|
||||
|
@ -270,6 +270,8 @@ class Index extends SearchBaseAbstract {
|
|||
$this->idart = $idart;
|
||||
}
|
||||
|
||||
$this->_debug('Start Index for ', $this->idart);
|
||||
|
||||
$this->place = $place;
|
||||
$this->keycode = $aContent;
|
||||
$this->setStopwords($aStopwords);
|
||||
|
@ -283,7 +285,14 @@ class Index extends SearchBaseAbstract {
|
|||
$old_keys = array_keys($this->keywords_old);
|
||||
|
||||
$this->keywords_del = array_diff($old_keys, $new_keys);
|
||||
|
||||
/*
|
||||
echo '<pre>';
|
||||
print_r($new_keys);
|
||||
print_r($old_keys);
|
||||
print_r($this->keywords_del);
|
||||
echo '</pre>';
|
||||
*
|
||||
*/
|
||||
if (count($this->keywords_del) > 0) {
|
||||
$this->deleteKeywords();
|
||||
}
|
||||
|
@ -312,7 +321,7 @@ class Index extends SearchBaseAbstract {
|
|||
foreach ($this->keycode as $idtype => $data) {
|
||||
if ($this->checkCmsType($idtype)) {
|
||||
foreach ($data as $typeid => $code) {
|
||||
$this->_debug('code', $code);
|
||||
$this->_debug('createKeywords: raw code from data array', $code);
|
||||
|
||||
$code = stripslashes($code); // remove backslash
|
||||
$code = str_ireplace(array('<br>', '<br />'), "\n", $code); // replace HTML line breaks with newlines
|
||||
|
@ -320,13 +329,18 @@ class Index extends SearchBaseAbstract {
|
|||
if (strlen($code) > 0) {
|
||||
$code = clHtmlEntityDecode($code);
|
||||
}
|
||||
$this->_debug('code', $code);
|
||||
$this->_debug('createKeywords: code after clean', $code);
|
||||
|
||||
$tmp_keys = preg_split('/[\s,]+/', trim($code)); // split content by any number of commas or space characters
|
||||
$this->_debug('tmp_keys', $tmp_keys);
|
||||
$this->_debug('createKeywords: tmp_keys', $tmp_keys);
|
||||
|
||||
foreach ($tmp_keys as $value) {
|
||||
$value = strtolower($value); // index terms are stored with lower case
|
||||
$value = preg_replace('/[^\w]+/u', '', $value);
|
||||
|
||||
if (empty(trim($value))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!in_array($value, $this->stopwords)) {
|
||||
// eliminate stopwords
|
||||
|
@ -335,6 +349,7 @@ class Index extends SearchBaseAbstract {
|
|||
if (strlen($value) > 1) {
|
||||
// do not index single characters
|
||||
$this->keywords[$value] = $this->keywords[$value] . $idtype . '-' . $typeid . ' ';
|
||||
$this->_debug('createKeywords: entry array keywords', $this->keywords);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -345,7 +360,7 @@ class Index extends SearchBaseAbstract {
|
|||
}
|
||||
}
|
||||
|
||||
$this->_debug('keywords', $this->keywords);
|
||||
$this->_debug('createKeywords: keywords returned', $this->keywords);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -357,9 +372,10 @@ class Index extends SearchBaseAbstract {
|
|||
$tmp_count = array();
|
||||
|
||||
foreach ($this->keywords as $keyword => $count) {
|
||||
$bProceed = true;
|
||||
$this->_debug('keyword', $keyword);
|
||||
$tmp_count = preg_split('/[\s]/', trim($count));
|
||||
$this->_debug('tmp_count', $tmp_count);
|
||||
|
||||
$occurrence = count($tmp_count);
|
||||
$tmp_count = array_unique($tmp_count);
|
||||
$cms_types = implode(',', $tmp_count);
|
||||
|
@ -376,8 +392,12 @@ class Index extends SearchBaseAbstract {
|
|||
('" . Contenido_Security::escapeDB($keyword, $this->db) . "', '" . Contenido_Security::escapeDB($index_string, $this->db) . "', " . Contenido_Security::toInteger($this->lang) . ", " . Contenido_Security::toInteger($nextid) . ")";
|
||||
} else {
|
||||
// if keyword allready exists, create new index_string
|
||||
if (preg_match("/&$this->idart=/", $this->keywords_old[$keyword])) {
|
||||
$index_string = preg_replace("/&$this->idart=[0-9]+\([\w-,]+\)/", $index_string, $this->keywords_old[$keyword]);
|
||||
if (preg_match("/&" . $this->idart . "=/", $this->keywords_old[$keyword])) {
|
||||
$index_string = preg_replace("/&" . $this->idart . "=[0-9]+\([,\w-]+\)/", $index_string, $this->keywords_old[$keyword]);
|
||||
if ($index_string === $this->keywords_old[$keyword]) {
|
||||
$bProceed = false;
|
||||
$this->_debug('db update', 'no update needed');
|
||||
}
|
||||
} else {
|
||||
$index_string = $this->keywords_old[$keyword] . $index_string;
|
||||
}
|
||||
|
@ -386,9 +406,11 @@ class Index extends SearchBaseAbstract {
|
|||
SET " . $this->place . " = '" . $index_string . "'
|
||||
WHERE idlang='" . Contenido_Security::toInteger($this->lang) . "' AND keyword='" . Contenido_Security::escapeDB($keyword, $this->db) . "'";
|
||||
}
|
||||
$this->_debug('sql', $sql);
|
||||
|
||||
$this->db->query($sql);
|
||||
if ($bProceed) {
|
||||
$this->_debug('sql', $sql);
|
||||
$this->db->query($sql);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -431,7 +453,7 @@ class Index extends SearchBaseAbstract {
|
|||
idlang=" . Contenido_Security::toInteger($this->lang) . " AND
|
||||
(keyword IN ('" . $keys . "') OR " . $this->place . " REGEXP '&" . Contenido_Security::toInteger($this->idart) . "=')";
|
||||
|
||||
$this->_debug('sql', $sql);
|
||||
$this->_debug('getKeywords: sql', $sql);
|
||||
|
||||
$this->db->query($sql);
|
||||
|
||||
|
@ -440,6 +462,8 @@ class Index extends SearchBaseAbstract {
|
|||
while ($this->db->next_record()) {
|
||||
$this->keywords_old[$this->db->f('keyword')] = $this->db->f($place);
|
||||
}
|
||||
|
||||
$this->_debug('getKeywords: array keywords_old', $this->keywords_old);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -448,6 +472,7 @@ class Index extends SearchBaseAbstract {
|
|||
* @return $key
|
||||
*/
|
||||
function removeSpecialChars($key) {
|
||||
|
||||
$aSpecialChars = array(
|
||||
"-", "_", "'", ".", "!", "\"", "#", "$", "%", "&", "(", ")", "*", "+", ",", "/",
|
||||
":", ";", "<", "=", ">", "?", "@", "[", "\\", "]", "^", "`", "{", "|", "}", "~"
|
||||
|
@ -461,6 +486,7 @@ class Index extends SearchBaseAbstract {
|
|||
// a client and should not be treated in this method.
|
||||
// modified 2007-10-01, H. Librenz - added as hotfix for encoding problems (doesn't find any words with
|
||||
// umlaut vowels in it since you turn on UTF-8 as language encoding)
|
||||
|
||||
$sEncoding = getEncodingByLanguage($this->db, $this->lang, $this->cfg);
|
||||
|
||||
if (strtolower($sEncoding) != 'iso-8859-2') {
|
||||
|
@ -486,6 +512,9 @@ class Index extends SearchBaseAbstract {
|
|||
$key = clHtmlEntityDecode($key);
|
||||
$key = str_replace($aSpecialChars, '', $key);
|
||||
|
||||
ini_set('mbstring.substitute_character', "none");
|
||||
$key = mb_convert_encoding($key, 'UTF-8', 'UTF-8');
|
||||
|
||||
return $key;
|
||||
}
|
||||
|
||||
|
@ -516,6 +545,21 @@ class Index extends SearchBaseAbstract {
|
|||
return $key;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return array array with arrays of type and typesuffix
|
||||
*/
|
||||
public function getContentTypes(): array {
|
||||
if (empty(self::$_cms_type)) {
|
||||
$this->setContentTypes();
|
||||
}
|
||||
|
||||
return array(
|
||||
'cms_type' => self::$_cms_type,
|
||||
'cms_type_suffix' => self::$_cms_type_suffix
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* set the array of stopwords which should not be indexed
|
||||
* @param array $aStopwords
|
||||
|
@ -537,8 +581,8 @@ class Index extends SearchBaseAbstract {
|
|||
$this->_debug('sql', $sql);
|
||||
$this->db->query($sql);
|
||||
while ($this->db->next_record()) {
|
||||
$this->cms_type[$this->db->f('type')] = $this->db->f('idtype');
|
||||
$this->cms_type_suffix[$this->db->f('idtype')] = substr($this->db->f('type'), 4, strlen($this->db->f('type')));
|
||||
self::$_cms_type[$this->db->f('type')] = $this->db->f('idtype');
|
||||
self::$_cms_type_suffix[$this->db->f('idtype')] = substr($this->db->f('type'), 4, strlen($this->db->f('type')));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -554,11 +598,11 @@ class Index extends SearchBaseAbstract {
|
|||
|
||||
if (strlen($opt) > 0) {
|
||||
if (!stristr($opt, 'cms_')) {
|
||||
if (in_array($opt, $this->cms_type_suffix)) {
|
||||
if (in_array($opt, $this->getContentTypes()['cms_type_suffix'])) {
|
||||
$this->cms_options[$opt] = 'CMS_' . $opt;
|
||||
}
|
||||
} else {
|
||||
if (array_key_exists($opt, $this->cms_type)) {
|
||||
if (array_key_exists($opt, $this->getContentTypes()['cms_type'])) {
|
||||
$this->cms_options[$opt] = $opt;
|
||||
}
|
||||
}
|
||||
|
@ -788,8 +832,8 @@ class Search extends SearchBaseAbstract {
|
|||
|
||||
$this->index = new Index($oDB);
|
||||
|
||||
$this->cms_type = $this->index->cms_type;
|
||||
$this->cms_type_suffix = $this->index->cms_type_suffix;
|
||||
$this->cms_type = $this->index->getContentTypes()['cms_type'];
|
||||
$this->cms_type_suffix = $this->index->getContentTypes()['cms_type_suffix'];
|
||||
|
||||
$this->search_option = (array_key_exists('db', $options)) ? strtolower($options['db']) : 'regexp';
|
||||
$this->search_combination = (array_key_exists('combine', $options)) ? strtolower($options['combine']) : 'or';
|
||||
|
@ -1339,11 +1383,11 @@ class SearchResult extends SearchBaseAbstract {
|
|||
$cms_type = strtoupper($cms_type);
|
||||
if (strlen($cms_type) > 0) {
|
||||
if (!stristr($cms_type, 'cms_')) {
|
||||
if (in_array($cms_type, $this->index->cms_type_suffix)) {
|
||||
if (in_array($cms_type, $this->index->getContentTypes()['cms_type'])) {
|
||||
$cms_type = 'CMS_' . $cms_type;
|
||||
}
|
||||
} else {
|
||||
if (!array_key_exists($cms_type, $this->index->cms_type)) {
|
||||
if (!array_key_exists($cms_type, $this->index->getContentTypes()['cms_type_suffix'])) {
|
||||
return array();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* File:
|
||||
* class.articlelanguage.php
|
||||
|
@ -18,14 +19,12 @@
|
|||
*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
if (!defined('CON_FRAMEWORK')) {
|
||||
die('Illegal call');
|
||||
}
|
||||
|
||||
|
||||
class cApiArticleLanguageCollection extends ItemCollection {
|
||||
|
||||
|
||||
public function __construct($select = false) {
|
||||
global $cfg;
|
||||
parent::__construct($cfg["tab"]["art_lang"], "idartlang");
|
||||
|
@ -36,26 +35,26 @@ class cApiArticleLanguageCollection extends ItemCollection {
|
|||
$this->select($select);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public function getIdArtLang($iIdart, $iIdlang) {
|
||||
$this->setWhere('idart', Contenido_Security::toInteger($iIdart));
|
||||
$this->setWhere('idlang', Contenido_Security::toInteger($iIdlang));
|
||||
if($this->query() && $this->count() > 0) {
|
||||
if ($this->query() && $this->count() > 0) {
|
||||
return $this->next()->get('idartlang');
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class cApiArticleLanguage extends Item {
|
||||
|
||||
class cApiArticleLanguage extends Item
|
||||
{
|
||||
/**
|
||||
* Constructor Function
|
||||
* @param mixed $mId Specifies the ID of item to load
|
||||
*
|
||||
* @global type $cfg
|
||||
* @param type $mId
|
||||
*/
|
||||
public function __construct($mId = false)
|
||||
{
|
||||
public function __construct($mId = false) {
|
||||
global $cfg;
|
||||
parent::__construct($cfg["tab"]["art_lang"], "idartlang");
|
||||
$this->setFilters(array(), array());
|
||||
|
@ -63,24 +62,23 @@ class cApiArticleLanguage extends Item
|
|||
$this->loadByPrimaryKey($mId);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public function loadByArticleAndLanguageId($idart, $idlang) {
|
||||
$result = true;
|
||||
if (!$this->isLoaded()) {
|
||||
if (!$this->isLoaded()) {
|
||||
$idartlang = $this->_getIdArtLang($idart, $idlang);
|
||||
$result = $this->loadByPrimaryKey($idartlang);
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
protected function _getIdArtLang($idart, $idlang) {
|
||||
$sql = sprintf('SELECT idartlang FROM `%s` WHERE idart = %d AND idlang = %d', cRegistry::getConfigValue('tab', 'art_lang'), $idart, $idlang);
|
||||
$this->db->query($sql);
|
||||
$this->db->next_record();
|
||||
return $this->db->f('idartlang');
|
||||
}
|
||||
|
||||
|
||||
public function getContent($type = '', $id = NULL) {
|
||||
if (NULL === $this->content) {
|
||||
$this->_loadArticleContent();
|
||||
|
@ -108,16 +106,16 @@ class cApiArticleLanguage extends Item
|
|||
// return String
|
||||
return (isset($this->content[$type][$id])) ? $this->content[$type][$id] : '';
|
||||
}
|
||||
|
||||
|
||||
protected function _loadArticleContent() {
|
||||
if (NULL !== $this->content) {
|
||||
return;
|
||||
}
|
||||
|
||||
$sql = "SELECT b.type, a.typeid, a.value FROM `".cRegistry::getConfigValue('tab', 'content')
|
||||
."` AS a, `".cRegistry::getConfigValue('tab', 'type')
|
||||
."` AS b WHERE a.idartlang = ".$this->get('idartlang')
|
||||
." AND b.idtype = a.idtype ORDER BY a.idtype, a.typeid";
|
||||
$sql = "SELECT b.type, a.typeid, a.value FROM `" . cRegistry::getConfigValue('tab', 'content')
|
||||
. "` AS a, `" . cRegistry::getConfigValue('tab', 'type')
|
||||
. "` AS b WHERE a.idartlang = " . $this->get('idartlang')
|
||||
. " AND b.idtype = a.idtype ORDER BY a.idtype, a.typeid";
|
||||
|
||||
$this->db->query($sql);
|
||||
|
||||
|
@ -126,5 +124,7 @@ class cApiArticleLanguage extends Item
|
|||
$this->content[strtolower($this->db->f('type'))][$this->db->f('typeid')] = urldecode($this->db->f('value'));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
|
@ -619,48 +619,30 @@ function conSetMetaValue($idartlang, $idmetatype, $value) {
|
|||
}
|
||||
|
||||
/**
|
||||
* (re)generate keywords for all articles of a given client (with specified language)
|
||||
* @param $client Client
|
||||
* @param $lang Language of a client
|
||||
* @return void
|
||||
*
|
||||
* @author Willi Man
|
||||
* Created : 12.05.2004
|
||||
* Modified : 13.05.2004
|
||||
* @copyright four for business AG 2003
|
||||
*
|
||||
* @param int $client
|
||||
* @param int $lang
|
||||
*/
|
||||
function conGenerateKeywords($client, $lang) {
|
||||
global $cfg;
|
||||
$db_art = new DB_ConLite;
|
||||
function conGenerateKeywords(int $client = null, int $lang = null) {
|
||||
$aOptions = [];
|
||||
$aOptions['start'] = true;
|
||||
$aOptions['offline'] = true;
|
||||
$aOptions['client'] = $client ?? 0;
|
||||
$aOptions['lang'] = $lang ?? 0;
|
||||
|
||||
$options = array("img", "link", "linktarget", "swf"); // cms types to be excluded from indexing
|
||||
|
||||
$sql = "SELECT
|
||||
a.idart, b.idartlang
|
||||
FROM
|
||||
" . $cfg["tab"]["art"] . " AS a,
|
||||
" . $cfg["tab"]["art_lang"] . " AS b
|
||||
WHERE
|
||||
a.idart = b.idart AND
|
||||
a.idclient = " . Contenido_Security::escapeDB($client, $db) . " AND
|
||||
b.idlang = " . Contenido_Security::escapeDB($lang, $db);
|
||||
|
||||
$db_art->query($sql);
|
||||
|
||||
$articles = array();
|
||||
while ($db_art->next_record()) {
|
||||
$articles[$db_art->f("idart")] = $db_art->f("idartlang");
|
||||
}
|
||||
|
||||
if (count($articles) > 0) {
|
||||
foreach ($articles as $artid => $article_lang) {
|
||||
$article_content = array();
|
||||
$article_content = conGetContentFromArticle($article_lang);
|
||||
|
||||
if (count($article_content) > 0) {
|
||||
$art_index = new Index($db_art);
|
||||
$art_index->lang = $lang;
|
||||
$art_index->start($artid, $article_content, 'auto', $options);
|
||||
$oArticleCollector = new cArticleCollector();
|
||||
$oArticleCollector->setOptions($aOptions);
|
||||
$oArticleCollector->loadArticles();
|
||||
/* @var $oArticle cApiArticleLanguage */
|
||||
if ($oArticleCollector->count() > 0) {
|
||||
foreach ($oArticleCollector as $oArticle) {
|
||||
$aArticleContent = [];
|
||||
$aArticleContent = $oArticle->getContent();
|
||||
if(!empty($aArticleContent)) {
|
||||
/* @var $oIndex Index */
|
||||
$oIndex = new Index();
|
||||
//$oIndex->setDebug(true);
|
||||
$oIndex->start($oArticle->get('idart'), $aArticleContent, 'auto', array("img", "link", "linktarget", "swf"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2215,7 +2215,6 @@ function clHtmlEntityDecode(string $value, ?int $flags = ENT_QUOTES | ENT_SUBSTI
|
|||
* @return string Returns the converted string
|
||||
*/
|
||||
function clHtmlEntities(string $value,?int $flags = ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401, string $encoding = 'UTF-8') {
|
||||
var_dump($flags);
|
||||
return htmlentities($value, $flags, $encoding);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue