插件48:把HTML文件转换为RSS文件

<?php // Plug-in 48: HTML To RSS

// This is an executable example with additional code supplied
// To obtain just the plug-ins please click on the Download link

$url         = "http://www.mhprofessional.com/";
$html        = file_get_contents($url);
$title       = "RSS version of '$url'";
$description = "The website '$url' converted to an RSS feed";
$webmaster   = "nobody@nowhere.com";
$copyright   = "Translator Copyright 2009 pluginphp.com";

header('Content-Type: text/xml');
echo PIPHP_HTMLToRSS($html, $title, $description, $url,
   $webmaster, $copyright);

function PIPHP_HTMLToRSS($html, $title, $description, $url,
   $webmaster, $copyright)
{
   // Plug-in 48: HTML To RSS
   //
   // This plug-in takes a string containing a complete HTML
   // page and turns it into RSS format which is returned. The
   // arguments required are:
   //
   //    $html:        HTML to convert to RSS
   //    $title:       Title to use
   //    $description: Description to use
   //    $url:         URL to link to (generally same as the
   //                  HTML source)
   //    $webmaster:   Webmaster contact email address
   //    $copyright:   Copyright details

   $date  = date("D, d M Y H:i:s e");
   $html  = str_replace('&', '&',         $html);
   $html  = str_replace('&',     '!!**1**!!', $html);
   $dom   = new domdocument();
   @$dom  ->loadhtml($html);
   $xpath = new domxpath($dom);
   $hrefs = $xpath->evaluate("/html/body//a");
   $links = array();
   $to    = array();
   $count = 0;

   for ($j = 0 ; $j < $hrefs->length ; ++$j)
      $links[] = $hrefs->item($j)->getAttribute('href');

   $links = array_unique($links);
   sort($links);

   foreach ($links as $link)
   {
      if ($link != "")
      {
         $temp = str_replace('!!**1**!!', '&', $link);
         $to[$count] = urlencode(PIPHP_RelToAbsURL($url, $temp));
         $html = str_replace("href=\"$link\"",
            "href=\"!!$count!!\"", $html);
         $html = str_replace("href='$link'",
            "href='!!$count!!'",   $html);
         $html = str_replace("href=$link",
            "href=!!$count!!",     $html);
         ++$count;
      }
   }

   for ($j = 0 ; $j < $count ; ++$j)
      $html = str_replace("!!$j!!", $to[$j],
         $html);

   $html = str_replace('http%3A%2F%2F', 'http://', $html);
   $html = str_replace('!!**1**!!', '&', $html);
   $html = preg_replace('/[\s]+/', ' ', $html);
   $html = preg_replace('/<script[^>]*>.*?<\/script>/i', '',
      $html);
   $html = preg_replace('/<style[^>]*>.*?<\/style>/i', '',
      $html);
   $ok   = '<a><i><b><u><s><h><img><div><span><table><tr>';
   $ok  .= '<th><tr><td><br><p><ul><ol><li>';
   $html = strip_tags($html, $ok);
   $html = preg_replace('/<h[1-7][^>]*?>/i', '<h>',
      $html);
   $html = htmlentities($html);
   $html = preg_replace("/<h>/si",
      "</description></item>\n<item><title>", $html);
   $html = preg_replace("/<\/h[1-7]>/si",
      "</title><guid>$url</guid><description>", $html);
	
	return <<<_END
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"><channel>
<generator>Pluginphp.com: plug-in 48</generator>
<title>$title</title><link>$url</link>
<description>$description</description>
<language>en</language>
<webMaster>$webmaster</webMaster>
<copyright>$copyright</copyright>
<pubDate>$date</pubDate>
<lastBuildDate>$date</lastBuildDate>
<item><title>$title</title>
<guid>$url</guid>
<description>$html</description></item></channel></rss>
_END;
}

// The below function is repeated here to ensure that it's
// available to the main function which relies on it

function PIPHP_RelToAbsURL($page, $url)
{
   // Plug-in 21: Relative To Absolute URL
   //
   // This plug-in accepts the absolute URL of a web page
   // and a link featured within that page. The link is then
   // turned into an absolute URL which can be independently
   // accessed. Only applies to http:// URLs. Arguments are:
   //
   //    $page: The web page containing the URL
   //    $url:  The URL to convert to absolute

   if (substr($page, 0, 7) != "http://") return $url;
   
   $parse = parse_url($page);
   $root  = $parse['scheme'] . "://" . $parse['host'];
   $p     = strrpos(substr($page, 7), '/');
   
   if ($p) $base = substr($page, 0, $p + 8);
   else $base = "$page/";
   
   if (substr($url, 0, 1) == '/')           $url = $root . $url;
   elseif (substr($url, 0, 7) != "http://") $url = $base . $url;

   return $url;
}

?>

插件说明:

本插件接受一个HTML文档或其他相关参数,返回一个格式正确的RSS文件。他需要以下参数:

$html 需要转换的HTML文档

$title 作为RSS文件的标题

$description RSS文件说明

$url 该RSS文件链接的URL

$wenmaster 网站管理员的Email地址

$copyright 版权信息

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值