读RSS的PHP

最新推荐文章于 2024-11-16 18:00:00 发布

Allan8212

最新推荐文章于 2024-11-16 18:00:00 发布

阅读量1.7k

点赞数

文章标签： rss function search file xml null

本文链接：https://blog.csdn.net/Allan8212/article/details/741970

版权

PHP解析类

<?php
/*Khalid XML files parser :: class kxparse, Started in March 2002 by Khalid Al-kary*/
class kxparse{
var $xml;
var $cursor;
var $cursor2;
var $encoding;

//the constructor $xmlfile is the file you want to load into the parser
function kxparse($xmlfile)
{
//just read the file
$file=fopen($xmlfile,"r");

//put the text inside the file in the XML object variable
while (!feof($file))
 {
 $this->xml.=fread($file,4096);
 }

// $this->xml = preg_replace("/[/t/r/n]+/","",$this->xml);//去掉xml文件中的回车换行制表等符号
$this->xml = str_replace(array("<![CDATA[","]]>"), "", $this->xml);//去掉xml文件中的CDATA标签
//close the opened file
fclose($file);

//set the cursor to 0 (start of document), the cursor is later used by another functions
$this->cursor=0;

//set the second curosr to the end of document
$this->cursor2=strlen($this->xml);
}

//function that returns a particular attribute value ...
//tag is the tag itself(with its start and end)
function get_attribute_internal($tag,$attr)
{
 //identifying the character directly after the tag name to cut it then
 if (strpos($tag," ")<strpos($tag,">"))
 {
 $separ=" ";
 }
 else
 {
 $separ=">";
 }

//cutting of the tag name according to separ
$tname=substr($tag,1,strpos($tag,$separ)-1);

//cut the tag starting from the white space after the tag name, ending with(not containing) the > of the tag start
$work=substr($tag,strlen($tname)+1,strpos($tag,">")-strlen($tname)-1);

//get the index of the tag occurence inside $work
$index_of_attr=strpos($work," ".$attr."=/"")+1;

  //check if the attribute was found in the tag
  if ($index_of_attr)
   {
    //now get the attributename+"=""+attrbutevalue+""" and extract the value from between them
    //calculate from where we will cut
    $index_of_value=$index_of_attr+strlen($attr)+2;

//cut note the last argument for calculating the end
$work=substr($work,$index_of_value,strpos($work,"/"",$index_of_value)-$index_of_value);

    //now return the attribute value
    return $work;
   }

   //if the attribute wasn't found, return false'
  else
   {
    return FALSE;
   }
}

//this function HTML-decodes the var $text...
function htmldecode($text)
{
 $text=str_replace("<","<",$text);
 $text=str_replace(">",">",$text);
 $text=str_replace("&","&",$text);
 $text=str_replace("&ltt;","<",$text);
 $text=str_replace("&gtt;",">",$text);
 return $text;
}

//the function that saves a file to a particular location
function save($file)
{
//open the file and overwrite of already avilable
$my_file=fopen($file,"wb");

//$my_status holds wether the operation is okay
$my_status=fwrite($my_file,$this->xml);

//close the file handle
fclose($my_file);

  if ($my_status!=-1)
   {
    return TRUE;
   }
  else
   {
    return FALSE;
   }

}

//function that gets a tag in the XML tree (with its starting and ending)
function get_tag_in_tree($tname,$tindex)
{
  $this->get_work_space($tname,$tindex);
  return substr($this->xml,$this->cursor,$this->cursor2-$this->cursor);
}
//function that gets the text of a tag
function get_tag_text($tname,$tindex)
{
$mytag=$this->get_tag_in_tree($tname,$tindex);
return $this->get_tag_text_internal($mytag);
}
//funtion that counts the number of occurences of a tag in the XML tree
function count_tag($tname,$tindex)
{
  return $this->get_work_space($tname,$tindex);
}

//functoin that gets the attribute value in a tag
function get_attribute($tname,$tindex,$attrname)
{
  $mytag=$this->get_tag_in_tree($tname,$tindex);
  return $this->get_attribute_internal($mytag,$attrname);
}

//Very important function, half of the engine
//sets the $this->cursor and $this->cursor2 to the place where it's intended to work
function get_work_space($tname,$tindex)
{
 //counts the number of ":" in the given colonedtagindex
 $num_of_search=substr_count($tindex,":");

 //counts the number of ":" in the given colonedtagname
 $num_of_search_text=substr_count($tname,":");

 //checks if they are not equal this regarded an error
 if ($num_of_search!=$num_of_search_text)
 {
 return false;
 }
 else
 {
 //now get the numbers in an array
 $search_array=explode(":",$tindex);

 //and also get the corresponding tag names
 $search_text_array=explode(":",$tname);

 //set the cursor to 0 in order to erase former work
 $this->cursor=0;

 //set the cursor2 to the end of the file for the same reason
 $this->cursor2=strlen($this->xml);

 //get the first tag name to intiate the loop
 $currtname=$search_text_array[0];

 //get the first tag index to intiate the loop
 $currtindex=$search_array[0];

 //the loop according to number of ":"
 for ($i=0;$i<count($search_array);$i++)
 {
 //if it's not the first tag name and index
 if ($i!=0)
 {
 //so append the latest colonedtagname to the current tag name
 $currtname=$currtname.":".$search_text_array[$i];

 //and append the latset colonedtagindex to the current tag index
 $currtindex=$currtindex.":".$search_array[$i];
 }
 //$arr holds the number of occurences of the current tag name between the cursor and cursor2
 $arr=$this->track_tag_cursors($search_text_array[$i]);

 //the index which you want to get the position of
 $tem=$search_array[$i];

 //to support count_tag_in_tree
 //when given a ? it returns the number of occurences of the current tag name
 if ($tem=="?")
 {
 return count($arr)-1;
 }
 else {

 //to support the auto-last method
 //if the current tag index equals "-1" so replace it by the last occurence index
 if ($tem==-1)
 {
 $tem=count($arr)-1;
 }

 //now just set cursor one to the occurence position in the XML file accrding to $tem
 $this->cursor=$arr[(int)$tem];

 //and set cursor2 at the end of that tag
 $this->cursor2=strpos($this->xml,"</".$search_text_array[$i].">",$this->cursor)+strlen("</".$search_text_array[$i].">");
 }
 }
 }
}
//the function that appends a tag to the XML tree
function create_tag($tname,$tindex,$ntname)
{
 //first get the intended father tag
 $this->get_work_space($tname,$tindex);

 //explode the given colonedtagname into an array
 $search_text_array=explode(":",$tname);

 //after setting the cursors using get_work_space
 //get a cope of the returned tag
 $workarea=substr($this->xml,$this->cursor,$this->cursor2-$this->cursor);

 //calculate the place where you will put the tag start and end
 $inde=$this->cursor+strpos($workarea,"</".$search_text_array[count($search_text_array)-1].">");

 //here, replace means insert because the length argument is set to 0
 $this->xml=substr_replace($this->xml,"<".$ntname."></".$ntname.">",$inde,0);
}
//the function that sets the value of an attribute
function set_attribute($tname,$tindex,$attr,$value)
{
 //first set the cursors using get_work_space
 $this->get_work_space($tname,$tindex);

 //now get a copy of the XML tag between cursor and cursor2
 $currxml=substr($this->xml,$this->cursor,$this->cursor2-$this->cursor);

 //cut the area of the tag on which you want to work
 //starting from the tag "<" and ending with the opening tag ">"
 $work=substr($currxml,0,strpos($currxml,">")+1);

 //if the attribute is already available
 if (strpos($work," ".$attr."=/""))
 {
 //calculate the current value's length
 $currval_length=strlen($this->get_attribute_internal($currxml,$attr));

 //get the position of the attribute inside the tag
 $my_attribute_pos=strpos($work," ".$attr."=/"")+1;

 //get the length of the attribute
 $my_attribute_length=strlen($attr);

 //now replace the old value
 $this->xml=substr_replace($this->xml,$value,$this->cursor+$my_attribute_pos+$my_attribute_length+2,$currval_length);
 return TRUE;
 }

 //if the attribute wasn't already available'
 else
 {
 //check if there are other attributes in the tag
 if (strpos($work," "))
 {
 $separ=" ";
 }
 else
 {
 $separ=">";
 }

 //prepare the attribute
 $newattr=" ".$attr."=/"".$value."/"";

 //insert the new attribute
 $this->xml=substr_replace($this->xml,$newattr,$this->cursor+strpos($work,$separ),0);
 return TRUE;
 }
}
//the function that changes or adds the text of a tag
function set_tag_text($tname,$tindex,$text)
{
 //firs get set the cursors using get_work_space
 $this->get_work_space($tname,$tindex);

 //explode the given colonedtagname in an array
 $search_text_array=explode(":",$tname);

 //get the latest name
 $currtname=$search_text_array[count($search_text_array)-1];

 //calculate the start of replacement
 $replace_start_index=strpos($this->xml,">",$this->cursor)+1;

 //calculate the end of replacement
 $replace_end_index=strpos($this->xml,"</".$currtname.">",$this->cursor)-1;

 //calculate the length between them
 $tem=$replace_end_index-$replace_start_index+1;

 //and now replace
 $this->xml=substr_replace($this->xml,$text,$replace_start_index,$tem);
}
//functio that removes a tag
function remove_tag($tname,$tindex)
{
 //set the cursors using get_work_space
 $this->get_work_space($tname,$tindex);

 //now replace with ""
 $this->xml=substr_replace($this->xml,"",$this->cursor,$this->cursor2-$this->cursor);
}

}
?>

存入数据库

<?PHP
include_once "kxparse.php";

//load the file into the parser: constructor(string file)
$xmlread = new kxparse("http://rss.xinhuanet.com/rss/native.xml");//需要解析的xml地址

mysql_connect("localhost","root","");
mysql_select_db("Rss");

$channeltitle = $xmlread->get_tag_text("rss:channel:title","1:1:1");
$channellink = $xmlread->get_tag_text("rss:channel:link","1:1:1");
$channeldescription = $xmlread->get_tag_text("rss:channel:description","1:1:1");
$channelpubdate = $xmlread->get_tag_text("rss:channel:pubDate","1:1:1");

for($i=1; $i <= ($xmlread->count_tag("rss:item","1:?")); $i++)
{
$itemtitle = $xmlread->get_tag_text("rss:item:title","1:$i:1");
$itemlink = $xmlread->get_tag_text("rss:item:link","1:$i:1");
$itemauthor = $xmlread->get_tag_text("rss:item:author","1:$i:1");
$itemguid = $xmlread->get_tag_text("rss:item:guid","1:$i:1");
$itemcategory = $xmlread->get_tag_text("rss:item:category","1:$i:1");
$itempubdate = $xmlread->get_tag_text("rss:item:pubDate","1:$i:1");
$itemcomments = $xmlread->get_tag_text("rss:item:comments","1:$i:1");
$itemdescription = $xmlread->get_tag_text("rss:item:description","1:$i:1");

$sql = "INSERT INTO RssNews (channeltitle,
 channellink,
 channeldescription,
 channelpubdate,
 itemtitle,
 itemlink,
 itemauthor,
 itemguid,
 itemcategory,
 itempubdate,
 itemcomments,
 itemdescription)
 VALUES('".$channeltitle."',
 '".$channellink."',
 '".$channeldescription."',
 '".$channelpubdate."',
 '".$itemtitle."',
 '".$itemlink."',
 '".$itemauthor."',
 '".$itemguid."',
 '".$itemcategory."',
 '".$itempubdate."',
 '".$itemcomments."',
 '".$itemdescription."')";

if($xmlread->getencoding() == "UFT-8")
{
  mysql_query($sql);
}
else
{
  mysql_query(iconv($xmlread->getencoding(),"UTF-8",$sql));

}

}

从数据库里读出内容

<?PHP
mysql_connect("localhost","root","");
mysql_select_db("Rss");

$sql = "SELECT * from RssNews";

$result = mysql_query($sql);

while($row = mysql_fetch_array($result))
{
echo iconv("UTF-8","GBK",$row['channeltitle'])." ";
echo iconv("UTF-8","GBK",$row['channellink'])." ";
echo iconv("UTF-8","GBK",$row['channeldescription'])." ";
echo iconv("UTF-8","GBK",$row['channelpubdate'])." ";
echo iconv("UTF-8","GBK",$row['itemtitle'])." ";
echo iconv("UTF-8","GBK",$row['itemlink'])." ";
echo iconv("UTF-8","GBK",$row['itemauthor'])." ";
echo iconv("UTF-8","GBK",$row['itemguid'])." ";
echo iconv("UTF-8","GBK",$row['itemcategory'])." ";
echo iconv("UTF-8","GBK",$row['itempubdate'])." ";
echo iconv("UTF-8","GBK",$row['itemcomments'])." ";
echo iconv("UTF-8","GBK",$row['itemdescription'])." ";
}
?>

数据库结构

--
-- 数据库: `rss`
--

-- --------------------------------------------------------

--
-- 表的结构 `rssnews`
--

CREATE TABLE `rssnews` (
`channeltitle` varchar(500) collate utf8_bin default NULL,
`channellink` varchar(500) collate utf8_bin default NULL,
`channeldescription` varchar(500) collate utf8_bin default NULL,
`channelpubdate` varchar(500) collate utf8_bin default NULL,
`itemtitle` varchar(500) collate utf8_bin default NULL,
`itemlink` varchar(500) collate utf8_bin default NULL,
`itemauthor` varchar(500) collate utf8_bin default NULL,
`itemguid` varchar(500) collate utf8_bin default NULL,
`itemcategory` varchar(500) collate utf8_bin default NULL,
`itempubdate` varchar(500) collate utf8_bin default NULL,
`itemcomments` varchar(500) collate utf8_bin default NULL,
`itemdescription` varchar(500) collate utf8_bin default NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin;