<?php /** * File: class.agents.php * * @package SemmelstatzR * @version $Rev$ * @since 1.0.0 * @author SEM-Team * @copyright (c)2011 SEM-Team * @link http://sourceforge.net/projects/semmelstatz/ * @license http://www.gnu.org/licenses/gpl-3.0.html * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * * $Id$ * */ class Agents { private $_sAgent; private $_aBots = array( 'aipbot', 'amfibibot', 'appie', 'ask jeeves/teoma', 'aspseek', 'axadine', 'baiduspider', 'becomebot', 'blogcorpuscrawler', 'blogpulse', 'blogsnowbot', 'boitho.com', 'bruinbot', 'cerberian', 'cfnetwork', 'check_http', 'cipinetbot', 'claymont', 'cometsearch@cometsystems.com', 'converacrawler', 'cydralspider', 'digger', 'es.net_crawler', 'eventax', 'everyfeed-spider', 'exabot@exava.com', 'faxobot', 'findlinks', 'fireball', 'francis', 'gaisbot', 'gamekitbot', 'gazz@nttr.co.jp', 'geonabot', 'getrax crawler', 'gigabot', 'girafa.com', 'goforitbot', 'googlebot', 'grub-client', 'holmes', 'houxoucrawler', 'http://www.almaden.ibm.com/cs/crawler', 'http://www.istarthere.com', 'http://www.relevantnoise.com', 'httrack ?', 'ia_archiver', 'ichiro', 'iltrovatore-setaccio', 'inelabot', 'infoseek', 'inktomi.com', 'irlbot', 'jetbot', 'jobspider_ba', 'kazoombot', 'larbin', 'libwww', 'linkwalker', 'lmspider', 'mackster', 'mediapartners-google', 'microsoft url control', 'mj12bot', 'moreoverbot', 'mozdex', 'msnbot', 'msrbot', 'naverbot', 'netresearchserver', 'ng/2.0', 'np(bot)', 'nutch', 'objectssearch', 'ocelli', 'omniexplorer_bot', 'openbot', 'overture', 'patwebbot', 'php', 'phpdig', 'pilgrim html-crawler', 'pipeliner', 'pompos', 'psbot', 'python-urllib', 'quepasacreep', 'robozilla', 'rpt-httpclient', 'savvybot', 'scooter', 'search.ch', 'seekbot', 'semager', 'seznambot', 'sherlock', 'shelob', 'sitesearch', 'snapbot', 'snappreviewbot', 'speedy spider', 'sphere scout', 'stackrambler', 'steeler', 'surveybot', 'szukacz', 'technoratibot', 'telnet', 'themiragorobot', 'thesubot', 'thumbshots-de-bot', 'topicblogs', 'turnitinbot', 'tutorgigbot', 'tutorial crawler', 'twiceler', 'vagabondo', 'versus', 'voilabot', 'w3c_css_validator', 'w3c_validator', 'w3c-checklink', 'web downloader', 'webcopier', 'webcrawler', 'webfilter robot', 'west wind internet protocols', 'wget', 'wwweasel robot', 'wwwster', 'xaldon webspider', 'xenu', 'yahoo! slurp', 'yahoofeedseeker', 'yahoo-mmcrawler', 'zao', 'zipppbot', 'zyborg', ); public function __construct($sAgent = null) { if(!is_null($sAgent)) { $this->_sAgent = $sAgent; } $this->_getIniFile(); } public function isBot($sAgent = null) { if(is_null($sAgent) && empty($this->_sAgent)) return false; $sAgent = (is_null($sAgent))?$this->_sAgent:$sAgent; foreach($this->_aBots as $bot) { if(stristr($sAgent, $bot) !== false) { return true; } } return false; } private function _getIniFile($sFile = null) { if(is_null($sFile)) $sFile = dirname(__FILE__)."/data/uas_20140127-01.ini"; // default ini if(is_file($sFile) && is_readable($sFile)) { $aTmpIni = parse_ini_file($sFile, true); $this->_aBots = array(); foreach($aTmpIni['robots'] as $value) { $this->_aBots[] = $value[2]; } //print_r($this->_aBots); } } } ?>