207 Zeilen
		
	
	
	
		
			5 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			207 Zeilen
		
	
	
	
		
			5 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
<?php
 | 
						|
/**
 | 
						|
 * File: class.agents.php
 | 
						|
 *
 | 
						|
 * @package     SemmelstatzR
 | 
						|
 * @version     $Rev$
 | 
						|
 * @since       1.0.0
 | 
						|
 * @author      SEM-Team
 | 
						|
 * @copyright   (c)2011 SEM-Team
 | 
						|
 * @link        http://sourceforge.net/projects/semmelstatz/
 | 
						|
 * @license     http://www.gnu.org/licenses/gpl-3.0.html
 | 
						|
 * 
 | 
						|
 * This program is free software: you can redistribute it and/or modify
 | 
						|
 * it under the terms of the GNU General Public License as published by
 | 
						|
 * the Free Software Foundation, either version 3 of the License, or
 | 
						|
 * (at your option) any later version.
 | 
						|
 * 
 | 
						|
 * This program is distributed in the hope that it will be useful,
 | 
						|
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
 * GNU General Public License for more details.
 | 
						|
 * 
 | 
						|
 * You should have received a copy of the GNU General Public License
 | 
						|
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.  
 | 
						|
 *
 | 
						|
 * $Id$
 | 
						|
 * 
 | 
						|
 */
 | 
						|
 | 
						|
class Agents {
 | 
						|
    
 | 
						|
    private $_sAgent;
 | 
						|
    
 | 
						|
    private $_aBots = array(
 | 
						|
        'aipbot',
 | 
						|
        'amfibibot',
 | 
						|
        'appie',
 | 
						|
        'ask jeeves/teoma',
 | 
						|
        'aspseek',
 | 
						|
        'axadine',
 | 
						|
        'baiduspider',
 | 
						|
        'becomebot',
 | 
						|
        'blogcorpuscrawler',
 | 
						|
        'blogpulse',
 | 
						|
        'blogsnowbot',
 | 
						|
        'boitho.com',
 | 
						|
        'bruinbot',
 | 
						|
        'cerberian',
 | 
						|
        'cfnetwork',
 | 
						|
        'check_http',
 | 
						|
        'cipinetbot',
 | 
						|
        'claymont',
 | 
						|
        'cometsearch@cometsystems.com',
 | 
						|
        'converacrawler',
 | 
						|
        'cydralspider',
 | 
						|
        'digger',
 | 
						|
        'es.net_crawler',
 | 
						|
        'eventax',
 | 
						|
        'everyfeed-spider',
 | 
						|
        'exabot@exava.com',
 | 
						|
        'faxobot',
 | 
						|
        'findlinks',
 | 
						|
        'fireball',
 | 
						|
        'francis',
 | 
						|
        'gaisbot',
 | 
						|
        'gamekitbot',
 | 
						|
        'gazz@nttr.co.jp',
 | 
						|
        'geonabot',
 | 
						|
        'getrax crawler',
 | 
						|
        'gigabot',
 | 
						|
        'girafa.com',
 | 
						|
        'goforitbot',
 | 
						|
        'googlebot',
 | 
						|
        'grub-client',
 | 
						|
        'holmes',
 | 
						|
        'houxoucrawler',
 | 
						|
        'http://www.almaden.ibm.com/cs/crawler',
 | 
						|
        'http://www.istarthere.com',
 | 
						|
        'http://www.relevantnoise.com',
 | 
						|
        'httrack ?',
 | 
						|
        'ia_archiver',
 | 
						|
        'ichiro',
 | 
						|
        'iltrovatore-setaccio',
 | 
						|
        'inelabot',
 | 
						|
        'infoseek',
 | 
						|
        'inktomi.com',
 | 
						|
        'irlbot',
 | 
						|
        'jetbot',
 | 
						|
        'jobspider_ba',
 | 
						|
        'kazoombot',
 | 
						|
        'larbin',
 | 
						|
        'libwww',
 | 
						|
        'linkwalker',
 | 
						|
        'lmspider',
 | 
						|
        'mackster',
 | 
						|
        'mediapartners-google',
 | 
						|
        'microsoft url control',
 | 
						|
        'mj12bot',
 | 
						|
        'moreoverbot',
 | 
						|
        'mozdex',
 | 
						|
        'msnbot',
 | 
						|
        'msrbot',
 | 
						|
        'naverbot',
 | 
						|
        'netresearchserver',
 | 
						|
        'ng/2.0',
 | 
						|
        'np(bot)',
 | 
						|
        'nutch',
 | 
						|
        'objectssearch',
 | 
						|
        'ocelli',
 | 
						|
        'omniexplorer_bot',
 | 
						|
        'openbot',
 | 
						|
        'overture',
 | 
						|
        'patwebbot',
 | 
						|
        'php',
 | 
						|
        'phpdig',
 | 
						|
        'pilgrim html-crawler',
 | 
						|
        'pipeliner',
 | 
						|
        'pompos',
 | 
						|
        'psbot',
 | 
						|
        'python-urllib',
 | 
						|
        'quepasacreep',
 | 
						|
        'robozilla',
 | 
						|
        'rpt-httpclient',
 | 
						|
        'savvybot',
 | 
						|
        'scooter',
 | 
						|
        'search.ch',
 | 
						|
        'seekbot',
 | 
						|
        'semager',
 | 
						|
        'seznambot',
 | 
						|
        'sherlock',
 | 
						|
        'shelob',
 | 
						|
        'sitesearch',
 | 
						|
        'snapbot',
 | 
						|
        'snappreviewbot',
 | 
						|
        'speedy spider',
 | 
						|
        'sphere scout',
 | 
						|
        'stackrambler',
 | 
						|
        'steeler',
 | 
						|
        'surveybot',
 | 
						|
        'szukacz',
 | 
						|
        'technoratibot',
 | 
						|
        'telnet',
 | 
						|
        'themiragorobot',
 | 
						|
        'thesubot',
 | 
						|
        'thumbshots-de-bot',
 | 
						|
        'topicblogs',
 | 
						|
        'turnitinbot',
 | 
						|
        'tutorgigbot',
 | 
						|
        'tutorial crawler',
 | 
						|
        'twiceler',
 | 
						|
        'vagabondo',
 | 
						|
        'versus',
 | 
						|
        'voilabot',
 | 
						|
        'w3c_css_validator',
 | 
						|
        'w3c_validator',
 | 
						|
        'w3c-checklink',
 | 
						|
        'web downloader',
 | 
						|
        'webcopier',
 | 
						|
        'webcrawler',
 | 
						|
        'webfilter robot',
 | 
						|
        'west wind internet protocols',
 | 
						|
        'wget',
 | 
						|
        'wwweasel robot',
 | 
						|
        'wwwster',
 | 
						|
        'xaldon webspider',
 | 
						|
        'xenu',
 | 
						|
        'yahoo! slurp',
 | 
						|
        'yahoofeedseeker',
 | 
						|
        'yahoo-mmcrawler',
 | 
						|
        'zao',
 | 
						|
        'zipppbot',
 | 
						|
        'zyborg',
 | 
						|
        );
 | 
						|
    
 | 
						|
    public function __construct($sAgent = null) {
 | 
						|
        if(!is_null($sAgent)) {
 | 
						|
            $this->_sAgent = $sAgent;
 | 
						|
        }
 | 
						|
        $this->_getIniFile();
 | 
						|
    }
 | 
						|
    
 | 
						|
    public function isBot($sAgent = null) {
 | 
						|
        if(is_null($sAgent) && empty($this->_sAgent)) return false;
 | 
						|
        $sAgent = (is_null($sAgent))?$this->_sAgent:$sAgent;
 | 
						|
        
 | 
						|
        foreach($this->_aBots as $bot) {
 | 
						|
            if(stristr($sAgent, $bot) !== false) {
 | 
						|
                return true;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        return false;
 | 
						|
    }
 | 
						|
    
 | 
						|
    private function _getIniFile($sFile = null) {
 | 
						|
        if(is_null($sFile)) $sFile = dirname(__FILE__)."/data/uas_20140127-01.ini"; // default ini
 | 
						|
        
 | 
						|
        if(is_file($sFile) && is_readable($sFile)) {
 | 
						|
            $aTmpIni = parse_ini_file($sFile, true);
 | 
						|
            $this->_aBots = array();
 | 
						|
            foreach($aTmpIni['robots'] as $value) {
 | 
						|
                $this->_aBots[] = $value[2];
 | 
						|
            }
 | 
						|
            //print_r($this->_aBots);
 | 
						|
        }
 | 
						|
    }
 | 
						|
}
 | 
						|
?>
 |