效果
http://www.hongyun2000.com/bot.php
实现步骤:
1.添加数据库表
- -- --------------------------------------------------------
- --
- -- 蜘蛛表的结构 `ecs_bot`
- --
- CREATE TABLE IF NOT EXISTS `ecs_bot` (
- `id` int (11) NOT NULL AUTO_INCREMENT,
- `url` varchar (255) DEFAULT NULL ,
- `bot` varchar (20) DEFAULT NULL ,
- `time ` varchar (30) DEFAULT NULL ,
- `ip` varchar (50) DEFAULT NULL ,
- PRIMARY KEY (`id`)
- ) ENGINE=MyISAM DEFAULT CHARSET=utf8 AUTO_INCREMENT=1 ;
-- --------------------------------------------------------
--
-- 蜘蛛表的结构 `ecs_bot`
--
CREATE TABLE IF NOT EXISTS `ecs_bot` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`url` varchar(255) DEFAULT NULL,
`bot` varchar(20) DEFAULT NULL,
`time` varchar(30) DEFAULT NULL,
`ip` varchar(50) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 AUTO_INCREMENT=1 ;
2.修改lib_main.php里的is_spider()
- /**
- * 判断是否为搜索引擎蜘蛛
- *
- * @access public
- * @return string
- */
- function is_spider( $record = true)
- {
- static $spider = NULL;
- if ( $spider !== NULL)
- {
- return $spider ;
- }
- if ( empty empty ( $_SERVER [ 'HTTP_USER_AGENT' ]))
- {
- $spider = '' ;
- return '' ;
- }
- $searchengine_bot = array (
- 'googlebot' ,
- 'mediapartners-google' ,
- 'baiduspider+' ,
- 'msnbot' ,
- 'yodaobot' ,
- 'yahoo! slurp;' ,
- 'yahoo! slurp china;' ,
- 'iaskspider' ,
- 'sogou web spider' ,
- 'sogou push spider'
- );
- $searchengine_name = array (
- 'GOOGLE' ,
- 'GOOGLE ADSENSE' ,
- 'BAIDU' ,
- 'MSN' ,
- 'YODAO' ,
- 'YAHOO' ,
- 'Yahoo China' ,
- 'IASK' ,
- 'SOGOU' ,
- 'SOGOU'
- );
- $spider = strtolower ( $_SERVER [ 'HTTP_USER_AGENT' ]);
- foreach ( $searchengine_bot AS $key => $value )
- {
- if ( strpos ( $spider , $value ) !== false)
- {
- $spider = $searchengine_name [ $key ];
- if ( $record === true)
- {
- $GLOBALS [ 'db' ]->autoReplace( $GLOBALS [ 'ecs' ]->table( 'searchengine' ), array ( 'date' => local_date( 'Y-m-d' ), 'searchengine' => $spider , 'count' => 1), array ( 'count' => 1));
- //蜘蛛爬行记录 author heui
- $record_count = $GLOBALS [ 'db' ]->getOne( 'SELECT COUNT(*) FROM ' . $GLOBALS [ 'ecs' ]->table( 'bot' ));
- if ( $record_count >10000)
- {
- //超过10000开始删除
- $GLOBALS [ 'db' ]->query( "DELETE FROM " . $GLOBALS [ 'ecs' ]->table( 'bot' ). " LIMIT 1" );
- }
- $server_name = $_SERVER [ "SERVER_NAME" ];
- $server_port = $_SERVER [ "SERVER_PORT" ];
- $script_name = $_SERVER [ "SCRIPT_NAME" ];
- $query_string = $_SERVER [ "QUERY_STRING" ];
- $server_ip = $_SERVER [ "REMOTE_ADDR" ];
- $url = "http://" . $server_name ;
- if ( $server_port != "80" )
- {
- $url = $url . ":" . $server_port ;
- }
- $url = $url . $script_name ;
- if ( $query_string != "" )
- {
- $url = $url . "?" . $query_string ;
- }
- $sql = "INSERT INTO " . $GLOBALS [ 'ecs' ]->table( 'bot' ) . "(" .
- "id, url, bot, time, ip) " .
- "VALUES('', '" . $url . "', '" . $spider . "','" .gmtime(). "','" . $server_ip . "')" ;
- $GLOBALS [ 'db' ]->query( $sql );
- }
- return $spider ;
- }
- }
- $spider = '' ;
- return '' ;
- }
/**
* 判断是否为搜索引擎蜘蛛
*
* @access public
* @return string
*/
function is_spider($record = true)
{
static $spider = NULL;
if ($spider !== NULL)
{
return $spider;
}
if (empty($_SERVER['HTTP_USER_AGENT']))
{
$spider = '';
return '';
}
$searchengine_bot = array(
'googlebot',
'mediapartners-google',
'baiduspider+',
'msnbot',
'yodaobot',
'yahoo! slurp;',
'yahoo! slurp china;',
'iaskspider',
'sogou web spider',
'sogou push spider'
);
$searchengine_name = array(
'GOOGLE',
'GOOGLE ADSENSE',
'BAIDU',
'MSN',
'YODAO',
'YAHOO',
'Yahoo China',
'IASK',
'SOGOU',
'SOGOU'
);
$spider = strtolower($_SERVER['HTTP_USER_AGENT']);
foreach ($searchengine_bot AS $key => $value)
{
if (strpos($spider, $value) !== false)
{
$spider = $searchengine_name[$key];
if ($record === true)
{
$GLOBALS['db']->autoReplace($GLOBALS['ecs']->table('searchengine'), array('date' => local_date('Y-m-d'), 'searchengine' => $spider, 'count' => 1), array('count' => 1));
//蜘蛛爬行记录 author heui
$record_count = $GLOBALS['db']->getOne('SELECT COUNT(*) FROM ' . $GLOBALS['ecs']->table('bot'));
if($record_count>10000)
{
//超过10000开始删除
$GLOBALS['db']->query("DELETE FROM ".$GLOBALS['ecs']->table('bot')." LIMIT 1");
}
$server_name = $_SERVER["SERVER_NAME"];
$server_port = $_SERVER["SERVER_PORT"];
$script_name = $_SERVER["SCRIPT_NAME"];
$query_string = $_SERVER["QUERY_STRING"];
$server_ip = $_SERVER["REMOTE_ADDR"];
$url="http://".$server_name;
if ($server_port != "80")
{
$url = $url.":".$server_port;
}
$url=$url.$script_name;
if ($query_string !="")
{
$url=$url."?".$query_string;
}
$sql="INSERT INTO " . $GLOBALS['ecs']->table('bot') . "(" .
"id, url, bot, time, ip) ".
"VALUES('', '" . $url . "', '".$spider."','".gmtime()."','".$server_ip. "')";
$GLOBALS['db']->query($sql);
}
return $spider;
}
}
$spider = '';
return '';
}
3.增加查看蜘蛛爬行列表页面bot.php
- <?php
- /**
- * 蜘蛛爬行记录列表
- * bot.php
- * ============================================================================
- * @author heui
- */
- define('IN_ECS' , true);
- require (dirname( __FILE__ ) . '/includes/init.php' );
- if ((DEBUG_MODE & 2) != 2)
- {
- $smarty ->caching = true;
- }
- /* 初始化分页信息 */
- $page = isset( $_REQUEST [ 'page' ]) && intval ( $_REQUEST [ 'page' ]) > 0 ? intval ( $_REQUEST [ 'page' ]) : 1;
- $size = 100;
- $cache_id = sprintf( '%X' , crc32( $page . '_' . $size ));
- if (! $smarty ->is_cached( 'bot.dwt' , $cache_id ))
- {
- $smarty ->assign( 'bot_list' , get_bot_list( $page , $size ));
- $record_count = $GLOBALS [ 'db' ]->getOne( 'SELECT COUNT(*) FROM ' . $GLOBALS [ 'ecs' ]->table( 'bot' ));
- $page_count = $record_count > 0 ? intval ( ceil ( $record_count / $size )) : 1;
- $smarty ->assign( 'pager' , get_pager( 'bot.php' , array (), $record_count , $page , $size ));
- }
- $smarty ->display( 'bot.dwt' , $cache_id );
- /**
- * 获取蜘蛛爬行记录列表
- * @author heui
- * @return array
- */
- function get_bot_list( $page , $size )
- {
- $sql = "SELECT id, url, bot, time, ip FROM " . $GLOBALS [ 'ecs' ]->table( 'bot' ). "ORDER BY time DESC" ;
- $res = $GLOBALS [ 'db' ]->selectLimit( $sql , $size , ( $page - 1) * $size );
- $arr = array ();
- while ( $row = $GLOBALS [ 'db' ]->fetchRow( $res ))
- {
- $arr [ $row [ 'id' ]][ 'id' ] = $row [ 'id' ];
- $arr [ $row [ 'id' ]][ 'url' ] = $row [ 'url' ];
- $arr [ $row [ 'id' ]][ 'bot' ] = $row [ 'bot' ];
- $arr [ $row [ 'id' ]][ 'time' ] = date ( "Y-m-j H:i:s" , $row [ 'time' ]);
- $arr [ $row [ 'id' ]][ 'ip' ] = $row [ 'ip' ];
- }
- return $arr ;
- }
<?php
/**
* 蜘蛛爬行记录列表
* bot.php
* ============================================================================
* @author heui
*/
define('IN_ECS', true);
require(dirname(__FILE__) . '/includes/init.php');
if ((DEBUG_MODE & 2) != 2)
{
$smarty->caching = true;
}
/* 初始化分页信息 */
$page = isset($_REQUEST['page']) && intval($_REQUEST['page']) > 0 ? intval($_REQUEST['page']) : 1;
$size = 100;
$cache_id = sprintf('%X', crc32($page.'_'.$size));
if (!$smarty->is_cached('bot.dwt', $cache_id))
{
$smarty->assign('bot_list', get_bot_list($page, $size));
$record_count = $GLOBALS['db']->getOne('SELECT COUNT(*) FROM ' . $GLOBALS['ecs']->table('bot'));
$page_count = $record_count > 0 ? intval(ceil($record_count / $size)) : 1;
$smarty->assign('pager', get_pager('bot.php', array(), $record_count, $page, $size));
}
$smarty->display('bot.dwt', $cache_id);
/**
* 获取蜘蛛爬行记录列表
* @author heui
* @return array
*/
function get_bot_list($page, $size)
{
$sql = "SELECT id, url, bot, time, ip FROM " . $GLOBALS['ecs']->table('bot'). "ORDER BY time DESC";
$res = $GLOBALS['db']->selectLimit($sql, $size, ($page - 1) * $size);
$arr = array();
while ($row = $GLOBALS['db']->fetchRow($res))
{
$arr[$row['id']]['id'] = $row['id'];
$arr[$row['id']]['url'] = $row['url'];
$arr[$row['id']]['bot'] = $row['bot'];
$arr[$row['id']]['time'] = date("Y-m-j H:i:s",$row['time']);
$arr[$row['id']]['ip'] = $row['ip'];
}
return $arr;
}
4.增加蜘蛛爬行列表的模板 bot.dwt
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" >
- < html xmlns = "http://www.w3.org/1999/xhtml" >
- < head >
- < meta http-equiv = "Content-Type" content = "text/html; charset=utf-8" />
- < title > 蜘蛛爬行记录 </ title >
- < style >
- ul, li{
- float:left;
- list-style: none;
- }
- ul{
- width:900px;
- }
- </ style >
- </ head >
- < body >
- < ul style = "background-color:#FF0000;" >
- < li style = "width: 50px;" > id </ li >
- < li style = "width: 300px;" > url </ li >
- < li style = "width: 100px;" > bot </ li >
- < li style = "width: 200px;" > time </ li >
- < li style = "width: 200px;" > ip </ li >
- </ ul >
- <!--{foreach from=$bot_list item=bot}-->
- < ul >
- < li style = "width: 50px;" > {$bot.id} </ li >
- < li style = "width: 300px;" > < a href = "{$bot.url}" target = "_blank" > {$bot.url|truncate:30} </ a > </ li >
- < li style = "width: 100px;" > {$bot.bot} </ li >
- < li style = "width: 200px;" > {$bot.time} </ li >
- < li style = "width: 200px;" > {$bot.ip} </ li >
- </ ul >
- <!--{/foreach}-->
- <!--翻页 start-->
- < ul > < li >
- < span style = "margin-right:10px;" > {$lang.pager_1} < b > {$pager.record_count} </ b > {$lang.pager_2} </ span >
- <!-- {if $pager.page_first} --> < a href = "{$pager.page_first}" > {$lang.page_first} ... </ a > <!-- {/if} -->
- <!-- {if $pager.page_prev} --> < a class = "prev" href = "{$pager.page_prev}" > {$lang.page_prev} </ a > <!-- {/if} -->
- <!-- {if $pager.page_count neq 1} -->
- <!--{foreach from=$pager.page_number key=key item=item}-->
- <!-- {if $pager.page eq $key} -->
- < span > {$key} </ span >
- <!-- {else} -->
- < a href = "{$item}" > [{$key}] </ a >
- <!-- {/if} -->
- <!--{/foreach}-->
- <!-- {/if} -->
- <!-- {if $pager.page_next} --> < a href = "{$pager.page_next}" > {$lang.page_next} </ a > <!-- {/if} -->
- <!-- {if $pager.page_last} --> < a href = "{$pager.page_last}" > ...{$lang.page_last} </ a > <!-- {/if} -->
- </ li > </ ul >
- <!--翻页 END-->
- </ body >
- </ html >