1
0
Fork 0
SemmelstatzR/trunk/includes/class.agents.php

208 Zeilen
5 KiB
PHP

<?php
/**
* File: class.agents.php
*
* @package SemmelstatzR
* @version $Rev$
* @since 1.0.0
* @author SEM-Team
* @copyright (c)2011 SEM-Team
* @link http://sourceforge.net/projects/semmelstatz/
* @license http://www.gnu.org/licenses/gpl-3.0.html
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* $Id$
*
*/
class Agents {
private $_sAgent;
private $_aBots = array(
'aipbot',
'amfibibot',
'appie',
'ask jeeves/teoma',
'aspseek',
'axadine',
'baiduspider',
'becomebot',
'blogcorpuscrawler',
'blogpulse',
'blogsnowbot',
'boitho.com',
'bruinbot',
'cerberian',
'cfnetwork',
'check_http',
'cipinetbot',
'claymont',
'cometsearch@cometsystems.com',
'converacrawler',
'cydralspider',
'digger',
'es.net_crawler',
'eventax',
'everyfeed-spider',
'exabot@exava.com',
'faxobot',
'findlinks',
'fireball',
'francis',
'gaisbot',
'gamekitbot',
'gazz@nttr.co.jp',
'geonabot',
'getrax crawler',
'gigabot',
'girafa.com',
'goforitbot',
'googlebot',
'grub-client',
'holmes',
'houxoucrawler',
'http://www.almaden.ibm.com/cs/crawler',
'http://www.istarthere.com',
'http://www.relevantnoise.com',
'httrack ?',
'ia_archiver',
'ichiro',
'iltrovatore-setaccio',
'inelabot',
'infoseek',
'inktomi.com',
'irlbot',
'jetbot',
'jobspider_ba',
'kazoombot',
'larbin',
'libwww',
'linkwalker',
'lmspider',
'mackster',
'mediapartners-google',
'microsoft url control',
'mj12bot',
'moreoverbot',
'mozdex',
'msnbot',
'msrbot',
'naverbot',
'netresearchserver',
'ng/2.0',
'np(bot)',
'nutch',
'objectssearch',
'ocelli',
'omniexplorer_bot',
'openbot',
'overture',
'patwebbot',
'php',
'phpdig',
'pilgrim html-crawler',
'pipeliner',
'pompos',
'psbot',
'python-urllib',
'quepasacreep',
'robozilla',
'rpt-httpclient',
'savvybot',
'scooter',
'search.ch',
'seekbot',
'semager',
'seznambot',
'sherlock',
'shelob',
'sitesearch',
'snapbot',
'snappreviewbot',
'speedy spider',
'sphere scout',
'stackrambler',
'steeler',
'surveybot',
'szukacz',
'technoratibot',
'telnet',
'themiragorobot',
'thesubot',
'thumbshots-de-bot',
'topicblogs',
'turnitinbot',
'tutorgigbot',
'tutorial crawler',
'twiceler',
'vagabondo',
'versus',
'voilabot',
'w3c_css_validator',
'w3c_validator',
'w3c-checklink',
'web downloader',
'webcopier',
'webcrawler',
'webfilter robot',
'west wind internet protocols',
'wget',
'wwweasel robot',
'wwwster',
'xaldon webspider',
'xenu',
'yahoo! slurp',
'yahoofeedseeker',
'yahoo-mmcrawler',
'zao',
'zipppbot',
'zyborg',
);
public function __construct($sAgent = null) {
if(!is_null($sAgent)) {
$this->_sAgent = $sAgent;
}
$this->_getIniFile();
}
public function isBot($sAgent = null) {
if(is_null($sAgent) && empty($this->_sAgent)) return false;
$sAgent = (is_null($sAgent))?$this->_sAgent:$sAgent;
foreach($this->_aBots as $bot) {
if(stristr($sAgent, $bot) !== false) {
return true;
}
}
return false;
}
private function _getIniFile($sFile = null) {
2014-01-28 11:45:24 +00:00
if(is_null($sFile)) $sFile = dirname(__FILE__)."/data/uas_20140127-01.ini"; // default ini
if(is_file($sFile) && is_readable($sFile)) {
$aTmpIni = parse_ini_file($sFile, true);
$this->_aBots = array();
foreach($aTmpIni['robots'] as $value) {
$this->_aBots[] = $value[2];
}
//print_r($this->_aBots);
}
}
}
?>