2011-11-14 14:21:22 +00:00
|
|
|
<?php
|
|
|
|
/**
|
|
|
|
* File: class.agents.php
|
|
|
|
*
|
|
|
|
* @package SemmelstatzR
|
|
|
|
* @version $Rev$
|
2011-12-19 18:47:48 +00:00
|
|
|
* @since 1.0.0
|
2011-11-14 14:21:22 +00:00
|
|
|
* @author SEM-Team
|
|
|
|
* @copyright (c)2011 SEM-Team
|
|
|
|
* @link http://sourceforge.net/projects/semmelstatz/
|
|
|
|
* @license http://www.gnu.org/licenses/gpl-3.0.html
|
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*
|
|
|
|
* $Id$
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
class Agents {
|
|
|
|
|
|
|
|
private $_sAgent;
|
|
|
|
|
|
|
|
private $_aBots = array(
|
|
|
|
'aipbot',
|
|
|
|
'amfibibot',
|
|
|
|
'appie',
|
|
|
|
'ask jeeves/teoma',
|
|
|
|
'aspseek',
|
|
|
|
'axadine',
|
|
|
|
'baiduspider',
|
|
|
|
'becomebot',
|
|
|
|
'blogcorpuscrawler',
|
|
|
|
'blogpulse',
|
|
|
|
'blogsnowbot',
|
|
|
|
'boitho.com',
|
|
|
|
'bruinbot',
|
|
|
|
'cerberian',
|
|
|
|
'cfnetwork',
|
|
|
|
'check_http',
|
|
|
|
'cipinetbot',
|
|
|
|
'claymont',
|
|
|
|
'cometsearch@cometsystems.com',
|
|
|
|
'converacrawler',
|
|
|
|
'cydralspider',
|
|
|
|
'digger',
|
|
|
|
'es.net_crawler',
|
|
|
|
'eventax',
|
|
|
|
'everyfeed-spider',
|
|
|
|
'exabot@exava.com',
|
|
|
|
'faxobot',
|
|
|
|
'findlinks',
|
|
|
|
'fireball',
|
|
|
|
'francis',
|
|
|
|
'gaisbot',
|
|
|
|
'gamekitbot',
|
|
|
|
'gazz@nttr.co.jp',
|
|
|
|
'geonabot',
|
|
|
|
'getrax crawler',
|
|
|
|
'gigabot',
|
|
|
|
'girafa.com',
|
|
|
|
'goforitbot',
|
|
|
|
'googlebot',
|
|
|
|
'grub-client',
|
|
|
|
'holmes',
|
|
|
|
'houxoucrawler',
|
|
|
|
'http://www.almaden.ibm.com/cs/crawler',
|
|
|
|
'http://www.istarthere.com',
|
|
|
|
'http://www.relevantnoise.com',
|
|
|
|
'httrack ?',
|
|
|
|
'ia_archiver',
|
|
|
|
'ichiro',
|
|
|
|
'iltrovatore-setaccio',
|
|
|
|
'inelabot',
|
|
|
|
'infoseek',
|
|
|
|
'inktomi.com',
|
|
|
|
'irlbot',
|
|
|
|
'jetbot',
|
|
|
|
'jobspider_ba',
|
|
|
|
'kazoombot',
|
|
|
|
'larbin',
|
|
|
|
'libwww',
|
|
|
|
'linkwalker',
|
|
|
|
'lmspider',
|
|
|
|
'mackster',
|
|
|
|
'mediapartners-google',
|
|
|
|
'microsoft url control',
|
|
|
|
'mj12bot',
|
|
|
|
'moreoverbot',
|
|
|
|
'mozdex',
|
|
|
|
'msnbot',
|
|
|
|
'msrbot',
|
|
|
|
'naverbot',
|
|
|
|
'netresearchserver',
|
|
|
|
'ng/2.0',
|
|
|
|
'np(bot)',
|
|
|
|
'nutch',
|
|
|
|
'objectssearch',
|
|
|
|
'ocelli',
|
|
|
|
'omniexplorer_bot',
|
|
|
|
'openbot',
|
|
|
|
'overture',
|
|
|
|
'patwebbot',
|
|
|
|
'php',
|
|
|
|
'phpdig',
|
|
|
|
'pilgrim html-crawler',
|
|
|
|
'pipeliner',
|
|
|
|
'pompos',
|
|
|
|
'psbot',
|
|
|
|
'python-urllib',
|
|
|
|
'quepasacreep',
|
|
|
|
'robozilla',
|
|
|
|
'rpt-httpclient',
|
|
|
|
'savvybot',
|
|
|
|
'scooter',
|
|
|
|
'search.ch',
|
|
|
|
'seekbot',
|
|
|
|
'semager',
|
|
|
|
'seznambot',
|
|
|
|
'sherlock',
|
|
|
|
'shelob',
|
|
|
|
'sitesearch',
|
|
|
|
'snapbot',
|
|
|
|
'snappreviewbot',
|
|
|
|
'speedy spider',
|
|
|
|
'sphere scout',
|
|
|
|
'stackrambler',
|
|
|
|
'steeler',
|
|
|
|
'surveybot',
|
|
|
|
'szukacz',
|
|
|
|
'technoratibot',
|
|
|
|
'telnet',
|
|
|
|
'themiragorobot',
|
|
|
|
'thesubot',
|
|
|
|
'thumbshots-de-bot',
|
|
|
|
'topicblogs',
|
|
|
|
'turnitinbot',
|
|
|
|
'tutorgigbot',
|
|
|
|
'tutorial crawler',
|
|
|
|
'twiceler',
|
|
|
|
'vagabondo',
|
|
|
|
'versus',
|
|
|
|
'voilabot',
|
|
|
|
'w3c_css_validator',
|
|
|
|
'w3c_validator',
|
|
|
|
'w3c-checklink',
|
|
|
|
'web downloader',
|
|
|
|
'webcopier',
|
|
|
|
'webcrawler',
|
|
|
|
'webfilter robot',
|
|
|
|
'west wind internet protocols',
|
|
|
|
'wget',
|
|
|
|
'wwweasel robot',
|
|
|
|
'wwwster',
|
|
|
|
'xaldon webspider',
|
|
|
|
'xenu',
|
|
|
|
'yahoo! slurp',
|
|
|
|
'yahoofeedseeker',
|
|
|
|
'yahoo-mmcrawler',
|
|
|
|
'zao',
|
|
|
|
'zipppbot',
|
|
|
|
'zyborg',
|
|
|
|
);
|
|
|
|
|
|
|
|
public function __construct($sAgent = null) {
|
|
|
|
if(!is_null($sAgent)) {
|
|
|
|
$this->_sAgent = $sAgent;
|
|
|
|
}
|
|
|
|
$this->_getIniFile();
|
|
|
|
}
|
|
|
|
|
|
|
|
public function isBot($sAgent = null) {
|
|
|
|
if(is_null($sAgent) && empty($this->_sAgent)) return false;
|
|
|
|
$sAgent = (is_null($sAgent))?$this->_sAgent:$sAgent;
|
|
|
|
|
|
|
|
foreach($this->_aBots as $bot) {
|
|
|
|
if(stristr($sAgent, $bot) !== false) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
private function _getIniFile($sFile = null) {
|
2014-01-28 11:45:24 +00:00
|
|
|
if(is_null($sFile)) $sFile = dirname(__FILE__)."/data/uas_20140127-01.ini"; // default ini
|
2011-11-14 14:21:22 +00:00
|
|
|
|
|
|
|
if(is_file($sFile) && is_readable($sFile)) {
|
|
|
|
$aTmpIni = parse_ini_file($sFile, true);
|
|
|
|
$this->_aBots = array();
|
|
|
|
foreach($aTmpIni['robots'] as $value) {
|
|
|
|
$this->_aBots[] = $value[2];
|
|
|
|
}
|
|
|
|
//print_r($this->_aBots);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
?>
|