读RSS的PHP

原创 2006年05月17日 08:12:00

PHP解析类

<?php
/*Khalid XML files parser :: class kxparse, Started in March 2002 by Khalid Al-kary*/
class kxparse{
var $xml;
var $cursor;
var $cursor2;
var $encoding;

//the constructor $xmlfile is the file you want to load into the parser
function kxparse($xmlfile)
{
 //just read the file
 $file=fopen($xmlfile,"r");
 
 //put the text inside the file in the XML object variable
 while (!feof($file))
  {
   $this->xml.=fread($file,4096);  
  }
  
// $this->xml = preg_replace("/[/t/r/n]+/","",$this->xml);//去掉xml文件中的回车换行制表等符号
 $this->xml = str_replace(array("<![CDATA[","]]>"), "", $this->xml);//去掉xml文件中的CDATA标签
 //close the opened file
 fclose($file);

 //set the cursor to 0 (start of document), the cursor is later used by another functions
 $this->cursor=0;

 //set the second curosr to the end of document
 $this->cursor2=strlen($this->xml);
}

function getencoding()
{
 $rx = "/<?xml.*encoding=['/"](.*?)['/"].*?>/m";
 if (preg_match($rx, $this->xml, $m))
 {
  $this->encoding = strtoupper($m[1]);
 }
 else
 {
  $this->encoding = "UTF-8";
 }
 return $this->encoding;
}
/*this function first gets a copy of the XML file starting from cursor and ending with cursor2
and then counts the number of occurences of the given tag name inside that area
returns an array (occurrence index -> occurence position in the XML file)
this function is half of the engine that moves Kxparse */
function track_tag_cursors($tname)
 {
  //getting the copy as intended
  $currxml=substr($this->xml,$this->cursor,$this->cursor2);
  
  //counting the number of occurences in the cut area
  $occurs=substr_count($currxml,"<".$tname);
  
  //the aray that will be returned
  $tag_poses=array();
  
  //setting its 0 to 0 because indeces in Kxparse start from 1
  $tag_poses[0]=0;
  
  //for each of the occurences
  for ($i=1;$i<=$occurs;$i++)
   {
    
    if ($i!=1)
     {
      //if it's not the first occurence
      //start checking for the next occurence but first cut the previous occurences off from the string
      $tag_poses[$i]=strpos($currxml,"<".$tname,$tag_poses[$i-1]+1-$this->cursor)+$this->cursor;
     }
    else
     {
      //if its the first occurence just assign its value + the cursor (because the position is in the XML file wholly
      $tag_poses[$i]=strpos($currxml,"<".$tname)+$this->cursor;
     }
     
   }
  
  //return that array 
  return $tag_poses;
 }
//this function strips and decodes the tag text...
function get_tag_text_internal($tname)
 {
  //strip the tags from the returned text and the decode it
  return $this->htmldecode(strip_tags($tname));
 }

//function that returns a particular attribute value ...
//tag is the tag itself(with its start and end)
function get_attribute_internal($tag,$attr)
 {
  //identifying the character directly after the tag name to cut it then
  if (strpos($tag," ")<strpos($tag,">"))
   {
    $separ=" ";
   }
  else
   {
    $separ=">";
   }

  //cutting of the tag name according to separ
  $tname=substr($tag,1,strpos($tag,$separ)-1);

  //cut the tag starting from the white space after the tag name, ending with(not containing) the > of the tag start
  $work=substr($tag,strlen($tname)+1,strpos($tag,">")-strlen($tname)-1);

  //get the index of the tag occurence inside $work
  $index_of_attr=strpos($work," ".$attr."=/"")+1;

  //check if the attribute was found in the tag
  if ($index_of_attr)
   {
    //now get the attributename+"=""+attrbutevalue+""" and extract the value from between them
    //calculate from where we will cut
    $index_of_value=$index_of_attr+strlen($attr)+2;

    //cut note the last argument for calculating the end
    $work=substr($work,$index_of_value,strpos($work,"/"",$index_of_value)-$index_of_value);

    //now return the attribute value
    return $work;
   }

   //if the attribute wasn't found, return false'
  else
   {
    return FALSE;
   }
 }


//this function HTML-decodes the var $text...
function htmldecode($text)
 {
  $text=str_replace("&lt;","<",$text);
  $text=str_replace("&gt;",">",$text);
  $text=str_replace("&amp;","&",$text);
  $text=str_replace("&ltt;","&lt;",$text);
  $text=str_replace("&gtt;","&gt;",$text);
  return $text;
 }

//the function that saves a file to a particular location
function save($file)
 {
  //open the file and overwrite of already avilable
  $my_file=fopen($file,"wb");

  //$my_status holds wether the operation is okay
  $my_status=fwrite($my_file,$this->xml);

  //close the file handle
  fclose($my_file);

  if ($my_status!=-1)
   {
    return TRUE;
   }
  else
   {
    return FALSE;
   }

 }

//function that gets a tag in the XML tree (with its starting and ending)
function get_tag_in_tree($tname,$tindex)
 {
  $this->get_work_space($tname,$tindex);
  return substr($this->xml,$this->cursor,$this->cursor2-$this->cursor);
 }
//function that gets the text of a tag
function get_tag_text($tname,$tindex)
{
 $mytag=$this->get_tag_in_tree($tname,$tindex);
 return $this->get_tag_text_internal($mytag);

//funtion that counts the number of occurences of a tag in the XML tree 
function count_tag($tname,$tindex)
 {
  return $this->get_work_space($tname,$tindex);
 }
 
//functoin that gets the attribute value in a tag 
function get_attribute($tname,$tindex,$attrname) 
 {
  $mytag=$this->get_tag_in_tree($tname,$tindex);
  return $this->get_attribute_internal($mytag,$attrname);
 }

//Very important function, half of the engine
//sets the $this->cursor and $this->cursor2 to the place where it's intended to work 
function get_work_space($tname,$tindex) 
 {
  //counts the number of ":"  in the given colonedtagindex
  $num_of_search=substr_count($tindex,":");
  
  //counts the number of ":" in the given colonedtagname
  $num_of_search_text=substr_count($tname,":");
  
  //checks if they are not equal this regarded an error
  if ($num_of_search!=$num_of_search_text)
   {
    return false;
   }
  else
   {
    //now get the numbers in an array
    $search_array=explode(":",$tindex);
    
    //and also get the corresponding tag names
    $search_text_array=explode(":",$tname);
    
    //set the cursor to 0 in order to erase former work
    $this->cursor=0;
    
    //set the cursor2 to the end of the file for the same reason
    $this->cursor2=strlen($this->xml);
    
    //get the first tag name to intiate the loop
    $currtname=$search_text_array[0];
    
    //get the first tag index to intiate the loop
    $currtindex=$search_array[0];
    
    //the loop according to number of ":"
    for ($i=0;$i<count($search_array);$i++)
     {
      //if it's not the first tag name and index
      if ($i!=0)
       {
        //so append the latest colonedtagname to the current tag name
        $currtname=$currtname.":".$search_text_array[$i];
        
        //and append the latset colonedtagindex to the current tag index
        $currtindex=$currtindex.":".$search_array[$i];
       }
      //$arr holds the number of occurences of the current tag name between the cursor and cursor2 
      $arr=$this->track_tag_cursors($search_text_array[$i]);
      
      //the index which you want to get the position of
      $tem=$search_array[$i];
      
      //to support count_tag_in_tree
      //when given a ? it returns the number of occurences of the current tag name
      if ($tem=="?")
       {
        return count($arr)-1;
       }
      else { 
      
      //to support the auto-last method
      //if the current tag index equals "-1" so replace it by the last occurence index
      if ($tem==-1) 
       {
        $tem=count($arr)-1;
       }
      
      //now just set cursor one to the occurence position in the XML file accrding to $tem 
      $this->cursor=$arr[(int)$tem];
      
      //and set cursor2 at the end of that tag
      $this->cursor2=strpos($this->xml,"</".$search_text_array[$i].">",$this->cursor)+strlen("</".$search_text_array[$i].">");
       }
     }
   } 
}
//the function that appends a tag to the XML tree
function create_tag($tname,$tindex,$ntname) 
 {
  //first get the intended father tag
  $this->get_work_space($tname,$tindex);
  
  //explode the given colonedtagname into an array
  $search_text_array=explode(":",$tname);
  
  //after setting the cursors using get_work_space
  //get a cope of the returned tag
  $workarea=substr($this->xml,$this->cursor,$this->cursor2-$this->cursor);
  
  //calculate the place where you will put the tag start and end
  $inde=$this->cursor+strpos($workarea,"</".$search_text_array[count($search_text_array)-1].">");
  
  //here, replace means insert because the length argument is set to 0
  $this->xml=substr_replace($this->xml,"<".$ntname."></".$ntname.">",$inde,0);
 }
//the function that sets the value of an attribute 
function set_attribute($tname,$tindex,$attr,$value)
 {
  //first set the cursors using get_work_space
  $this->get_work_space($tname,$tindex);
  
  //now get a copy of the XML tag between cursor and cursor2
  $currxml=substr($this->xml,$this->cursor,$this->cursor2-$this->cursor);
  
  //cut the area of the tag on which you want to work
  //starting from the tag "<" and ending with the opening tag ">"
  $work=substr($currxml,0,strpos($currxml,">")+1);
  
  //if the attribute is already available
  if (strpos($work," ".$attr."=/""))
  {
   //calculate the current value's length
   $currval_length=strlen($this->get_attribute_internal($currxml,$attr));
   
   //get the position of the attribute inside the tag
   $my_attribute_pos=strpos($work," ".$attr."=/"")+1;
   
   //get the length of the attribute
   $my_attribute_length=strlen($attr);
   
   //now replace the old value
   $this->xml=substr_replace($this->xml,$value,$this->cursor+$my_attribute_pos+$my_attribute_length+2,$currval_length);
   return TRUE;
  }
  
  //if the attribute wasn't already available'
  else
  {
   //check if there are other attributes in the tag
   if (strpos($work," "))
    {
     $separ=" ";
    }
   else
    {
     $separ=">";
    }
   
   //prepare the attribute
   $newattr=" ".$attr."=/"".$value."/"";
   
   //insert the new attribute
   $this->xml=substr_replace($this->xml,$newattr,$this->cursor+strpos($work,$separ),0);
   return TRUE;
  } 
}
//the function that changes or adds the text of a tag
function set_tag_text($tname,$tindex,$text)
 {
  //firs get set the cursors using get_work_space
  $this->get_work_space($tname,$tindex);
  
  //explode the given colonedtagname in an array
  $search_text_array=explode(":",$tname);
  
  //get the latest name
  $currtname=$search_text_array[count($search_text_array)-1];
  
  //calculate the start of replacement
  $replace_start_index=strpos($this->xml,">",$this->cursor)+1;
  
  //calculate the end of replacement
  $replace_end_index=strpos($this->xml,"</".$currtname.">",$this->cursor)-1;
  
  //calculate the length between them
  $tem=$replace_end_index-$replace_start_index+1;
  
  //and now replace
  $this->xml=substr_replace($this->xml,$text,$replace_start_index,$tem);
 }
//functio that removes a tag 
function remove_tag($tname,$tindex) 
 {
  //set the cursors using get_work_space
  $this->get_work_space($tname,$tindex);
  
  //now replace with ""
  $this->xml=substr_replace($this->xml,"",$this->cursor,$this->cursor2-$this->cursor);
 }

}
?>

存入数据库

<?PHP
include_once "kxparse.php";

//load the file into the parser: constructor(string file)
$xmlread = new kxparse("http://rss.xinhuanet.com/rss/native.xml");//需要解析的xml地址

mysql_connect("localhost","root","");
mysql_select_db("Rss");

$channeltitle = $xmlread->get_tag_text("rss:channel:title","1:1:1");
$channellink = $xmlread->get_tag_text("rss:channel:link","1:1:1");
$channeldescription = $xmlread->get_tag_text("rss:channel:description","1:1:1");
$channelpubdate = $xmlread->get_tag_text("rss:channel:pubDate","1:1:1");

for($i=1; $i <= ($xmlread->count_tag("rss:item","1:?")); $i++)
{
 $itemtitle = $xmlread->get_tag_text("rss:item:title","1:$i:1");
 $itemlink = $xmlread->get_tag_text("rss:item:link","1:$i:1");
 $itemauthor = $xmlread->get_tag_text("rss:item:author","1:$i:1");
 $itemguid = $xmlread->get_tag_text("rss:item:guid","1:$i:1");
 $itemcategory = $xmlread->get_tag_text("rss:item:category","1:$i:1");
 $itempubdate = $xmlread->get_tag_text("rss:item:pubDate","1:$i:1");
 $itemcomments = $xmlread->get_tag_text("rss:item:comments","1:$i:1");
 $itemdescription = $xmlread->get_tag_text("rss:item:description","1:$i:1");
 
 $sql = "INSERT INTO RssNews (channeltitle,
         channellink,
         channeldescription,
         channelpubdate,
         itemtitle,
         itemlink,
         itemauthor,
         itemguid,
         itemcategory,
         itempubdate,
         itemcomments,
         itemdescription)
   VALUES('".$channeltitle."',
       '".$channellink."',
       '".$channeldescription."',
       '".$channelpubdate."',
       '".$itemtitle."',
       '".$itemlink."',
       '".$itemauthor."',
       '".$itemguid."',
       '".$itemcategory."',
       '".$itempubdate."',
       '".$itemcomments."',
       '".$itemdescription."')";

 if($xmlread->getencoding() == "UFT-8")
 {
  mysql_query($sql);
 }
 else
 {
  mysql_query(iconv($xmlread->getencoding(),"UTF-8",$sql));
  
 }

}

?>

从数据库里读出内容

<?PHP
mysql_connect("localhost","root","");
mysql_select_db("Rss");

$sql = "SELECT * from RssNews";

$result = mysql_query($sql);

while($row = mysql_fetch_array($result))
{
 echo iconv("UTF-8","GBK",$row['channeltitle'])."<br>";
 echo iconv("UTF-8","GBK",$row['channellink'])."<br>";
 echo iconv("UTF-8","GBK",$row['channeldescription'])."<br>";
 echo iconv("UTF-8","GBK",$row['channelpubdate'])."<br>";
 echo iconv("UTF-8","GBK",$row['itemtitle'])."<br>";
 echo iconv("UTF-8","GBK",$row['itemlink'])."<br>";
 echo iconv("UTF-8","GBK",$row['itemauthor'])."<br>";
 echo iconv("UTF-8","GBK",$row['itemguid'])."<br>";
 echo iconv("UTF-8","GBK",$row['itemcategory'])."<br>";
 echo iconv("UTF-8","GBK",$row['itempubdate'])."<br>";
 echo iconv("UTF-8","GBK",$row['itemcomments'])."<br>";
 echo iconv("UTF-8","GBK",$row['itemdescription'])."<br><br><br>";
}
?>

数据库结构

--
-- 数据库: `rss`
--

-- --------------------------------------------------------

--
-- 表的结构 `rssnews`
--

CREATE TABLE `rssnews` (
  `channeltitle` varchar(500) collate utf8_bin default NULL,
  `channellink` varchar(500) collate utf8_bin default NULL,
  `channeldescription` varchar(500) collate utf8_bin default NULL,
  `channelpubdate` varchar(500) collate utf8_bin default NULL,
  `itemtitle` varchar(500) collate utf8_bin default NULL,
  `itemlink` varchar(500) collate utf8_bin default NULL,
  `itemauthor` varchar(500) collate utf8_bin default NULL,
  `itemguid` varchar(500) collate utf8_bin default NULL,
  `itemcategory` varchar(500) collate utf8_bin default NULL,
  `itempubdate` varchar(500) collate utf8_bin default NULL,
  `itemcomments` varchar(500) collate utf8_bin default NULL,
  `itemdescription` varchar(500) collate utf8_bin default NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin;

相关文章推荐

RSS 2.0 php生成类,快读生成需要xml文件

在项目中用到的RSS 2.0版本的php生成类。 只需要根据数据结构吧数据输入进去就能生成一个xml文件,并通过函数输出。...

php的rss订阅功能实现包

  • 2012年04月01日 13:00
  • 11KB
  • 下载

php版rss采集发布系统

  • 2008年09月13日 22:11
  • 497KB
  • 下载

php xml 动态生成 rss 源

  • aszt123
  • aszt123
  • 2013年08月24日 13:13
  • 717

国内外php主流开源cms、商城、点评、SNS、DIGG、RSS、分类信息、Wiki汇总

网上商城源代码源码之家- www.mycodes.net 酷网动力 www.aspcool.com 源码联盟 www.aspsun.com 源码天下 www.pccode.nethttp:/...
  • ztguang
  • ztguang
  • 2016年03月30日 10:36
  • 1746

PHP生成RSS聚合内容实例

PHP生成RSS聚合内容实例 最近YOOZHAN优站正在做一个新闻程序,客户要求站内要能一键自动生成RSS文件,方便用户定阅和自己用采集工具发布到更多的网站。其实这个功能已经不是新鲜事情了,...

用PHP的XML扩展库读取CSDN博客的RSS列表信息

使用PHP里的XML扩展库对xml文件解析,获得我们所需要的信息的一个小栗子,以供参考。...

PHP 和 AJAX RSS 阅读器

RSS 阅读器用于阅读 RSS Feed。 RSS 允许对新闻和更新进行快速浏览。 AJAX RSS 阅读器 在下面的 AJAX 实例中,我们将演示一个 RSS 阅读器,通过它,来自 R...

php读取rss示例

  • 2008年10月30日 16:27
  • 2KB
  • 下载

PHP RSS version 1

  • 2014年09月05日 09:21
  • 2KB
  • 下载
内容举报
返回顶部
收藏助手
不良信息举报
您举报文章:读RSS的PHP
举报原因:
原因补充:

(最多只允许输入30个字)