读RSS的PHP

原创 2006年05月17日 08:12:00

PHP解析类

<?php
/*Khalid XML files parser :: class kxparse, Started in March 2002 by Khalid Al-kary*/
class kxparse{
var $xml;
var $cursor;
var $cursor2;
var $encoding;

//the constructor $xmlfile is the file you want to load into the parser
function kxparse($xmlfile)
{
 //just read the file
 $file=fopen($xmlfile,"r");
 
 //put the text inside the file in the XML object variable
 while (!feof($file))
  {
   $this->xml.=fread($file,4096);  
  }
  
// $this->xml = preg_replace("/[/t/r/n]+/","",$this->xml);//去掉xml文件中的回车换行制表等符号
 $this->xml = str_replace(array("<![CDATA[","]]>"), "", $this->xml);//去掉xml文件中的CDATA标签
 //close the opened file
 fclose($file);

 //set the cursor to 0 (start of document), the cursor is later used by another functions
 $this->cursor=0;

 //set the second curosr to the end of document
 $this->cursor2=strlen($this->xml);
}

function getencoding()
{
 $rx = "/<?xml.*encoding=['/"](.*?)['/"].*?>/m";
 if (preg_match($rx, $this->xml, $m))
 {
  $this->encoding = strtoupper($m[1]);
 }
 else
 {
  $this->encoding = "UTF-8";
 }
 return $this->encoding;
}
/*this function first gets a copy of the XML file starting from cursor and ending with cursor2
and then counts the number of occurences of the given tag name inside that area
returns an array (occurrence index -> occurence position in the XML file)
this function is half of the engine that moves Kxparse */
function track_tag_cursors($tname)
 {
  //getting the copy as intended
  $currxml=substr($this->xml,$this->cursor,$this->cursor2);
  
  //counting the number of occurences in the cut area
  $occurs=substr_count($currxml,"<".$tname);
  
  //the aray that will be returned
  $tag_poses=array();
  
  //setting its 0 to 0 because indeces in Kxparse start from 1
  $tag_poses[0]=0;
  
  //for each of the occurences
  for ($i=1;$i<=$occurs;$i++)
   {
    
    if ($i!=1)
     {
      //if it's not the first occurence
      //start checking for the next occurence but first cut the previous occurences off from the string
      $tag_poses[$i]=strpos($currxml,"<".$tname,$tag_poses[$i-1]+1-$this->cursor)+$this->cursor;
     }
    else
     {
      //if its the first occurence just assign its value + the cursor (because the position is in the XML file wholly
      $tag_poses[$i]=strpos($currxml,"<".$tname)+$this->cursor;
     }
     
   }
  
  //return that array 
  return $tag_poses;
 }
//this function strips and decodes the tag text...
function get_tag_text_internal($tname)
 {
  //strip the tags from the returned text and the decode it
  return $this->htmldecode(strip_tags($tname));
 }

//function that returns a particular attribute value ...
//tag is the tag itself(with its start and end)
function get_attribute_internal($tag,$attr)
 {
  //identifying the character directly after the tag name to cut it then
  if (strpos($tag," ")<strpos($tag,">"))
   {
    $separ=" ";
   }
  else
   {
    $separ=">";
   }

  //cutting of the tag name according to separ
  $tname=substr($tag,1,strpos($tag,$separ)-1);

  //cut the tag starting from the white space after the tag name, ending with(not containing) the > of the tag start
  $work=substr($tag,strlen($tname)+1,strpos($tag,">")-strlen($tname)-1);

  //get the index of the tag occurence inside $work
  $index_of_attr=strpos($work," ".$attr."=/"")+1;

  //check if the attribute was found in the tag
  if ($index_of_attr)
   {
    //now get the attributename+"=""+attrbutevalue+""" and extract the value from between them
    //calculate from where we will cut
    $index_of_value=$index_of_attr+strlen($attr)+2;

    //cut note the last argument for calculating the end
    $work=substr($work,$index_of_value,strpos($work,"/"",$index_of_value)-$index_of_value);

    //now return the attribute value
    return $work;
   }

   //if the attribute wasn't found, return false'
  else
   {
    return FALSE;
   }
 }


//this function HTML-decodes the var $text...
function htmldecode($text)
 {
  $text=str_replace("&lt;","<",$text);
  $text=str_replace("&gt;",">",$text);
  $text=str_replace("&amp;","&",$text);
  $text=str_replace("&ltt;","&lt;",$text);
  $text=str_replace("&gtt;","&gt;",$text);
  return $text;
 }

//the function that saves a file to a particular location
function save($file)
 {
  //open the file and overwrite of already avilable
  $my_file=fopen($file,"wb");

  //$my_status holds wether the operation is okay
  $my_status=fwrite($my_file,$this->xml);

  //close the file handle
  fclose($my_file);

  if ($my_status!=-1)
   {
    return TRUE;
   }
  else
   {
    return FALSE;
   }

 }

//function that gets a tag in the XML tree (with its starting and ending)
function get_tag_in_tree($tname,$tindex)
 {
  $this->get_work_space($tname,$tindex);
  return substr($this->xml,$this->cursor,$this->cursor2-$this->cursor);
 }
//function that gets the text of a tag
function get_tag_text($tname,$tindex)
{
 $mytag=$this->get_tag_in_tree($tname,$tindex);
 return $this->get_tag_text_internal($mytag);

//funtion that counts the number of occurences of a tag in the XML tree 
function count_tag($tname,$tindex)
 {
  return $this->get_work_space($tname,$tindex);
 }
 
//functoin that gets the attribute value in a tag 
function get_attribute($tname,$tindex,$attrname) 
 {
  $mytag=$this->get_tag_in_tree($tname,$tindex);
  return $this->get_attribute_internal($mytag,$attrname);
 }

//Very important function, half of the engine
//sets the $this->cursor and $this->cursor2 to the place where it's intended to work 
function get_work_space($tname,$tindex) 
 {
  //counts the number of ":"  in the given colonedtagindex
  $num_of_search=substr_count($tindex,":");
  
  //counts the number of ":" in the given colonedtagname
  $num_of_search_text=substr_count($tname,":");
  
  //checks if they are not equal this regarded an error
  if ($num_of_search!=$num_of_search_text)
   {
    return false;
   }
  else
   {
    //now get the numbers in an array
    $search_array=explode(":",$tindex);
    
    //and also get the corresponding tag names
    $search_text_array=explode(":",$tname);
    
    //set the cursor to 0 in order to erase former work
    $this->cursor=0;
    
    //set the cursor2 to the end of the file for the same reason
    $this->cursor2=strlen($this->xml);
    
    //get the first tag name to intiate the loop
    $currtname=$search_text_array[0];
    
    //get the first tag index to intiate the loop
    $currtindex=$search_array[0];
    
    //the loop according to number of ":"
    for ($i=0;$i<count($search_array);$i++)
     {
      //if it's not the first tag name and index
      if ($i!=0)
       {
        //so append the latest colonedtagname to the current tag name
        $currtname=$currtname.":".$search_text_array[$i];
        
        //and append the latset colonedtagindex to the current tag index
        $currtindex=$currtindex.":".$search_array[$i];
       }
      //$arr holds the number of occurences of the current tag name between the cursor and cursor2 
      $arr=$this->track_tag_cursors($search_text_array[$i]);
      
      //the index which you want to get the position of
      $tem=$search_array[$i];
      
      //to support count_tag_in_tree
      //when given a ? it returns the number of occurences of the current tag name
      if ($tem=="?")
       {
        return count($arr)-1;
       }
      else { 
      
      //to support the auto-last method
      //if the current tag index equals "-1" so replace it by the last occurence index
      if ($tem==-1) 
       {
        $tem=count($arr)-1;
       }
      
      //now just set cursor one to the occurence position in the XML file accrding to $tem 
      $this->cursor=$arr[(int)$tem];
      
      //and set cursor2 at the end of that tag
      $this->cursor2=strpos($this->xml,"</".$search_text_array[$i].">",$this->cursor)+strlen("</".$search_text_array[$i].">");
       }
     }
   } 
}
//the function that appends a tag to the XML tree
function create_tag($tname,$tindex,$ntname) 
 {
  //first get the intended father tag
  $this->get_work_space($tname,$tindex);
  
  //explode the given colonedtagname into an array
  $search_text_array=explode(":",$tname);
  
  //after setting the cursors using get_work_space
  //get a cope of the returned tag
  $workarea=substr($this->xml,$this->cursor,$this->cursor2-$this->cursor);
  
  //calculate the place where you will put the tag start and end
  $inde=$this->cursor+strpos($workarea,"</".$search_text_array[count($search_text_array)-1].">");
  
  //here, replace means insert because the length argument is set to 0
  $this->xml=substr_replace($this->xml,"<".$ntname."></".$ntname.">",$inde,0);
 }
//the function that sets the value of an attribute 
function set_attribute($tname,$tindex,$attr,$value)
 {
  //first set the cursors using get_work_space
  $this->get_work_space($tname,$tindex);
  
  //now get a copy of the XML tag between cursor and cursor2
  $currxml=substr($this->xml,$this->cursor,$this->cursor2-$this->cursor);
  
  //cut the area of the tag on which you want to work
  //starting from the tag "<" and ending with the opening tag ">"
  $work=substr($currxml,0,strpos($currxml,">")+1);
  
  //if the attribute is already available
  if (strpos($work," ".$attr."=/""))
  {
   //calculate the current value's length
   $currval_length=strlen($this->get_attribute_internal($currxml,$attr));
   
   //get the position of the attribute inside the tag
   $my_attribute_pos=strpos($work," ".$attr."=/"")+1;
   
   //get the length of the attribute
   $my_attribute_length=strlen($attr);
   
   //now replace the old value
   $this->xml=substr_replace($this->xml,$value,$this->cursor+$my_attribute_pos+$my_attribute_length+2,$currval_length);
   return TRUE;
  }
  
  //if the attribute wasn't already available'
  else
  {
   //check if there are other attributes in the tag
   if (strpos($work," "))
    {
     $separ=" ";
    }
   else
    {
     $separ=">";
    }
   
   //prepare the attribute
   $newattr=" ".$attr."=/"".$value."/"";
   
   //insert the new attribute
   $this->xml=substr_replace($this->xml,$newattr,$this->cursor+strpos($work,$separ),0);
   return TRUE;
  } 
}
//the function that changes or adds the text of a tag
function set_tag_text($tname,$tindex,$text)
 {
  //firs get set the cursors using get_work_space
  $this->get_work_space($tname,$tindex);
  
  //explode the given colonedtagname in an array
  $search_text_array=explode(":",$tname);
  
  //get the latest name
  $currtname=$search_text_array[count($search_text_array)-1];
  
  //calculate the start of replacement
  $replace_start_index=strpos($this->xml,">",$this->cursor)+1;
  
  //calculate the end of replacement
  $replace_end_index=strpos($this->xml,"</".$currtname.">",$this->cursor)-1;
  
  //calculate the length between them
  $tem=$replace_end_index-$replace_start_index+1;
  
  //and now replace
  $this->xml=substr_replace($this->xml,$text,$replace_start_index,$tem);
 }
//functio that removes a tag 
function remove_tag($tname,$tindex) 
 {
  //set the cursors using get_work_space
  $this->get_work_space($tname,$tindex);
  
  //now replace with ""
  $this->xml=substr_replace($this->xml,"",$this->cursor,$this->cursor2-$this->cursor);
 }

}
?>

存入数据库

<?PHP
include_once "kxparse.php";

//load the file into the parser: constructor(string file)
$xmlread = new kxparse("http://rss.xinhuanet.com/rss/native.xml");//需要解析的xml地址

mysql_connect("localhost","root","");
mysql_select_db("Rss");

$channeltitle = $xmlread->get_tag_text("rss:channel:title","1:1:1");
$channellink = $xmlread->get_tag_text("rss:channel:link","1:1:1");
$channeldescription = $xmlread->get_tag_text("rss:channel:description","1:1:1");
$channelpubdate = $xmlread->get_tag_text("rss:channel:pubDate","1:1:1");

for($i=1; $i <= ($xmlread->count_tag("rss:item","1:?")); $i++)
{
 $itemtitle = $xmlread->get_tag_text("rss:item:title","1:$i:1");
 $itemlink = $xmlread->get_tag_text("rss:item:link","1:$i:1");
 $itemauthor = $xmlread->get_tag_text("rss:item:author","1:$i:1");
 $itemguid = $xmlread->get_tag_text("rss:item:guid","1:$i:1");
 $itemcategory = $xmlread->get_tag_text("rss:item:category","1:$i:1");
 $itempubdate = $xmlread->get_tag_text("rss:item:pubDate","1:$i:1");
 $itemcomments = $xmlread->get_tag_text("rss:item:comments","1:$i:1");
 $itemdescription = $xmlread->get_tag_text("rss:item:description","1:$i:1");
 
 $sql = "INSERT INTO RssNews (channeltitle,
         channellink,
         channeldescription,
         channelpubdate,
         itemtitle,
         itemlink,
         itemauthor,
         itemguid,
         itemcategory,
         itempubdate,
         itemcomments,
         itemdescription)
   VALUES('".$channeltitle."',
       '".$channellink."',
       '".$channeldescription."',
       '".$channelpubdate."',
       '".$itemtitle."',
       '".$itemlink."',
       '".$itemauthor."',
       '".$itemguid."',
       '".$itemcategory."',
       '".$itempubdate."',
       '".$itemcomments."',
       '".$itemdescription."')";

 if($xmlread->getencoding() == "UFT-8")
 {
  mysql_query($sql);
 }
 else
 {
  mysql_query(iconv($xmlread->getencoding(),"UTF-8",$sql));
  
 }

}

?>

从数据库里读出内容

<?PHP
mysql_connect("localhost","root","");
mysql_select_db("Rss");

$sql = "SELECT * from RssNews";

$result = mysql_query($sql);

while($row = mysql_fetch_array($result))
{
 echo iconv("UTF-8","GBK",$row['channeltitle'])."<br>";
 echo iconv("UTF-8","GBK",$row['channellink'])."<br>";
 echo iconv("UTF-8","GBK",$row['channeldescription'])."<br>";
 echo iconv("UTF-8","GBK",$row['channelpubdate'])."<br>";
 echo iconv("UTF-8","GBK",$row['itemtitle'])."<br>";
 echo iconv("UTF-8","GBK",$row['itemlink'])."<br>";
 echo iconv("UTF-8","GBK",$row['itemauthor'])."<br>";
 echo iconv("UTF-8","GBK",$row['itemguid'])."<br>";
 echo iconv("UTF-8","GBK",$row['itemcategory'])."<br>";
 echo iconv("UTF-8","GBK",$row['itempubdate'])."<br>";
 echo iconv("UTF-8","GBK",$row['itemcomments'])."<br>";
 echo iconv("UTF-8","GBK",$row['itemdescription'])."<br><br><br>";
}
?>

数据库结构

--
-- 数据库: `rss`
--

-- --------------------------------------------------------

--
-- 表的结构 `rssnews`
--

CREATE TABLE `rssnews` (
  `channeltitle` varchar(500) collate utf8_bin default NULL,
  `channellink` varchar(500) collate utf8_bin default NULL,
  `channeldescription` varchar(500) collate utf8_bin default NULL,
  `channelpubdate` varchar(500) collate utf8_bin default NULL,
  `itemtitle` varchar(500) collate utf8_bin default NULL,
  `itemlink` varchar(500) collate utf8_bin default NULL,
  `itemauthor` varchar(500) collate utf8_bin default NULL,
  `itemguid` varchar(500) collate utf8_bin default NULL,
  `itemcategory` varchar(500) collate utf8_bin default NULL,
  `itempubdate` varchar(500) collate utf8_bin default NULL,
  `itemcomments` varchar(500) collate utf8_bin default NULL,
  `itemdescription` varchar(500) collate utf8_bin default NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin;

php编写RSS源

记编写rss源 点点细雨     2013年11月26日星期二   为了提高搜索引擎的收录速度,今天开始编写rss源来增加对搜索引擎的友好。 废话就不多打了,毕竟我打字速度也不快(O(∩_∩)...
  • diandianxiyu
  • diandianxiyu
  • 2013年11月26日 14:36
  • 2842

PHP进行RSS订阅

PHP进行RSS订阅 现在有很多的rss订阅,我们直接可以订阅到邮箱。既然学了PHP,那么有没有一种方法可以直接将rss的新闻信息显示在自己的网页上呢?有的,必须的,下面就是这个rss脚本: ...
  • amberom
  • amberom
  • 2015年01月12日 10:36
  • 898

很好用的php rss解析类

/*** Rss Parse Class ver0.1** @link http://www.ugia.cn/?p=42* @author: legend (PASiOcn@msn.com)* @ve...
  • xcyxl
  • xcyxl
  • 2006年12月21日 16:20
  • 1260

最佳PHP解析RSS类lastRSS

/* ====================================================================== lastRSS 0.9.1  Simple yet ...
  • phphot
  • phphot
  • 2008年03月07日 15:51
  • 2337

PHP RSS订阅类

  • u010349417
  • u010349417
  • 2013年11月15日 16:41
  • 787

php的Rss xml 示例 采集新浪新闻

simplexml_load_file  直接使用此函数: header("Content-type:text/html;Charset=utf-8"); $content = simplexml_...
  • dats0407
  • dats0407
  • 2012年06月25日 19:11
  • 2621

Android Rss阅读器

前言        前几天去北京面试,题目是让我解析一下腾讯的Rss。之前虽然知道xml,但是自己从来没有去学习怎么解析,在网上查一些例子,但是就是没有解析出来。现在看看还蛮好笑的,因为我那时候是使用...
  • wangjinyu501
  • wangjinyu501
  • 2013年06月08日 12:49
  • 8118

IT信息订阅 RSS源列表

 本人采用的是http://www.google.com/reader直接将下面的信息另存为xml文件,在reader中导入就可以了            Google 阅读器中 xu 的订阅    ...
  • eqxu
  • eqxu
  • 2008年02月20日 21:24
  • 9137

用PHP生成RSS Feed,并给网站添加RSS Feed功能

给网站添加 RSS Feed 功能能带来很多好处。如果你用的是 PHP,你可以写代码自己生成,详细方法可以参考 IBM 网站的这篇文章:PHP and RSS: Getting it together...
  • zhang_jianzhi
  • zhang_jianzhi
  • 2010年07月15日 10:47
  • 971

PHP xml应用之RSS 订阅

blog.html(提供订阅的页面): Document rss.php(从数据库生成订阅的内容(xml),并显示xml): ...
  • houyanhua1
  • houyanhua1
  • 2018年01月22日 13:25
  • 43
内容举报
返回顶部
收藏助手
不良信息举报
您举报文章:读RSS的PHP
举报原因:
原因补充:

(最多只允许输入30个字)