新闻采集系统的设计
1、基本设计思路:采用 fsockopen 函数抓取网页的新闻内容,根据相应的规则筛选出合适的内容,将其写入后台数据库。
2、PHP代码实现:
<?php
ini_set('display_errors', 1);
error_reporting(E_ALL);
$url = "roll.news.sina.com.cn";
$fp = fsockopen($url, 80, $errno, $errstr, 30);
if (!$fp)
{
echo "$errstr ($errno) <br />\n";
}
else
{
$out = "GET /news/gnxw/zs-pl/index_1.shtml HTTP/1.1\r\n";
$out .= "Host: roll.news.sina.com.cn\r\n";
$out .= "Connection: Close\r\n\r\n";
fwrite($fp, $out);
$html_str = "";
while (!feof($fp))
{
$html_str .= fgets($fp);
}
fclose($fp);
}
$html_str = preg_replace("/[\t]+/", "", $html_str);
$partern = '@<li><a href="([^<>]+)"\s* target="_blank">([^<>]+)\s*</a><span>\(([^<>]+)\s*\)</span></li>@';
preg_match_all($partern, $html_str, $result);
foreach ($result[2] as $news_str)
{
$pos = strpos($news_str, ":");
if (false === $pos)
{
$news_title = $news_cnts = $news_str;
}
else
{
$news_arr = explode(":", $news_str);
$news_title = trim($news_arr[0]);
$news_cnts = trim($news_arr[1]);
}
include_once("db.cls.php");
$_CFG = array(
"DB" => array(
"DEFAULT" => array(
"logfile" => "D:\wamp\log\log.txt",
"host" => "localhost:3306",
"user" => "root",
"password" => 123456,
"db_name" => "news_db",
"charset" => "gb2312",
),
),
);
$db = new Mysql();
$db->open("DEFAULT");
$tbl_name = "news_tbl";
$add_news_arr = array(
"title" => $news_title,
"content" => $news_cnts,
);
$insert_rst = $db->insert($tbl_name, $add_news_arr);
}
?>
3、数据库设计:
(1) 数据库名称: news_db
(2) 创建数据表:
create table news_tbl (
ID INT NOT NULL AUTO_INCREMENT,
title VARCHAR(100) NOT NULL,
content TEXT NOT NULL,
date_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (ID)
);
附录:db.cls.php
<?php
class Mysql
{
//传回句柄,传回结果集
var $_conn, $_q, $_r;
//数据库连接
###########################################################
# $p_dbid : 连接选择 例,$_CFG["DB"] 默认为default 连接
# $p_error : 连接错误处理机制 1 为自动处理, 否则为手工处理,默认为1
###########################################################
function open( $p_dbid="DEFAULT" , $p_error="1" )
{
global $_CFG;
if( $p_error == "1" )
{
//数据库连接错误自动处理
$this->_conn = @mysql_connect( $_CFG["DB"][$p_dbid]['host'], $_CFG["DB"][$p_dbid]['user'], $_CFG["DB"][$p_dbid]['password'] ) or die( "系维升级中.." );
}
else
{
//数据库连接错误人工处理
$this->_conn = @mysql_connect( $_CFG["DB"][$p_dbid]['host'], $_CFG["DB"][$p_dbid]['user'], $_CFG["DB"][$p_dbid]['password'] );
if(!$this->_conn)
{
return false;
}
}
mysql_select_db($_CFG["DB"][$p_dbid]['db_name'], $this->_conn );
if($_CFG["DB"][$p_dbid]['charset']!="")
{
mysql_query("SET NAMES '".$_CFG["DB"][$p_dbid]['charset']."'");
mysql_query("SET CHARACTER_SET_CLIENT=".$_CFG["DB"][$p_dbid]['charset']);
mysql_query("SET CHARACTER_SET_RESULTS=".$_CFG["DB"][$p_dbid]['charset']);
}
return true;
}
//SQL发送
function query( $p_sql )
{
global $_GET;
if( $_GET["DB_DEBUG"] == true )
{
echo $p_sql."<br>\n";
}
// echo $p_sql."<br>\n";
return mysql_query( $p_sql, $this->_conn );
}
//插入数据
###########################################################
# $p_tabName : 表名
# $p_tabVar : 一个以,下标对应字段名,值对应插入的值的数组
# $p_q : 数据库连接句柄
###########################################################
function insert( $p_tabName , $p_tabVar )
{
foreach($p_tabVar as $key => $var)
{
$sql_addsub_name.= " `".$key."` ,";
$sql_add_var_name.= " '".mysql_real_escape_string($var)."' ,";
}
$sql_addsub_name = substr($sql_addsub_name, 0, -1);
$sql_add_var_name = substr($sql_add_var_name, 0, -1);
$sqlAdd = "INSERT INTO ".$p_tabName." ( ".$sql_addsub_name.") VALUES (".$sql_add_var_name.")";
return $this->query( $sqlAdd );
}
//替换唯一数据
###########################################################
# $p_tabName : 表名
# $p_tabVar : 一个以,下标对应字段名,值对应插入的值的数组
###########################################################
function replace( $p_tabName , $p_tabVar )
{
foreach($p_tabVar as $key => $var)
{
$sql_addsub_name.= " `".$key."` ,";
$sql_add_var_name.= " '".mysql_real_escape_string($var)."' ,";
}
$sql_addsub_name = substr($sql_addsub_name, 0, -1);
$sql_add_var_name = substr($sql_add_var_name, 0, -1);
$sqlAdd = "REPLACE INTO ".$p_tabName." ( ".$sql_addsub_name.") VALUES (".$sql_add_var_name.")";
return $this->query( $sqlAdd );
}
//删除数据
###########################################################
# $tabName : 表名
# $whereStr : 需要删除的条件字串,不带WHERE
# $db : 数据库连接句柄
###########################################################
function delete( $p_tabName , $p_whereStr )
{
$sqlDel = "DELETE FROM ".$p_tabName." WHERE ".$p_whereStr;
return $this->query( $sqlDel );
}
//编辑数据
###########################################################
# $tabName : 表名
# $setVarArray :一个以,下标对应字段名,值对应插入的值的数组
# $whereStr : 需要编辑的条件字串,不带WHERE
# $db : 数据库连接句柄
###########################################################
function update( $p_tabName , $p_setVarArray , $p_whereStr )
{
foreach($p_setVarArray as $key => $var)
{
$edit_sql.= "`".$key."` = "."'".mysql_real_escape_string($var)."' ,";
}
$edit_sql = substr($edit_sql, 0, -1);
$sqlEdit = "UPDATE `".$p_tabName."` SET ".$edit_sql." WHERE ".$p_whereStr;
return $this->query( $sqlEdit );
}
//返回结果集
function fetch( $p_rs, $p_type = "array" )
{
if( $p_type == "array" )
{
return mysql_fetch_array( $p_rs , MYSQL_ASSOC);
}
else if( $p_type == "object" )
{
return mysql_fetch_object( $p_rs );
}
else
{
return mysql_fetch_row( $p_rs );
}
}
//获取最后一次记录ID
function getInsertID()
{
return mysql_insert_id($this->_conn);
}
//返回最后操作的影响列数
function getAffectedRows( )
{
return mysql_affected_rows();
}
//返回结果集列数
function getNumRows( $p_rs )
{
return mysql_num_rows($p_rs);
}
//定位结果集指针
function seek( $p_rs , $p_num )
{
return mysql_field_seek( $p_rs, $p_num );
}
//释放结果集
function free( $p_rs )
{
return mysql_free_result( $p_rs );
}
//关闭数据库连接
function close()
{
return mysql_close( $this->_conn );
}
}
?>