php 新浪博客,新浪博客、百度博客搬家(PHP代码)

新浪博客、百度博客搬家(PHP代码)

/**

* $Id blogmove.php

*

* 博客搬家

*

* @author skillCoding $date 2011-09-14

*/

include 'Snoopy.class.php';

include 'function_crawling.php';

class BlogMove{

var $snoopy;

var $links = array();

var $blog_links = array();

var $blogs = array();

var $needle_rule = array();//匹配规则

function BlogMove(){

$this->snoopy = new Snoopy();

}

/**

* 新浪博客搬家

*

* @param String $url

*/

function move_sina($url){

if(!preg_match('/^http:\/\/blog\.sina\.com\.cn\/\w+/', $url)){

return 'Invalid blog address';

}

$this->snoopy->fetch($url);

$str = $this->snoopy->results;

$this->snoopy->results = null;

$str = html_format($str);

//匹配博客目录

if(preg_match_all('/http:\/\/blog\.sina\.com\.cn\/s\/articlelist_\d+_0_1\.html/', $str, $m)){

$url = $m[0][0];

$this->needle_rule = array(

'page'=>array('s\/articlelist_\d+_0_\d+','\.html$'),

'blog'=>array('\/s\/blog_','\.html$')

);

$this->get_blog_list($url);

if($this->blog_links){

foreach ($this->blog_links as $link){

$this->snoopy->fetch($link);

$str = $this->snoopy->results;

$this->snoopy->results = null;

$str = html_format($str);

//匹配标题

$titlearr=crawl_match($str, '/

/');

$title = strip_tags($titlearr[0][0]);

//匹配日期

$datearr=crawl_match($str, '/

(.*?)/'); $date = strip_tags($datearr[0][0]); $date = str_replace(array('(',')'), '', $date); //匹配正文 $needle = 'id="sina_keyword_ad_area2"'; $contentarr=crawl_match($str, '/

/',$needle); $content = $contentarr[0]; $content = trim(str_replace(array($needle,'class="articalcontent ">'), '', $content)); $blog = array( 'title'=>$title, 'dateline'=>$date, 'link'=>$link, 'content'=>$content, ); array_push($this->blogs, $blog); } return $this->blogs; }else{ return 'No Blog'; } }else{ return 'Invalid blog address'; } } /** * 百度博客搬家 * * @param String $url */ function move_baidu($url){ if(!preg_match('/^http:\/\/hi\.baidu\.com\/(.*?)\/blog$/', $url)){ return 'Invalid blog address'; } $url .= '/index/0'; $this->needle_rule = array( 'page'=>array('\/blog\/index\/','\d+$'), 'blog'=>array('\/blog\/item\/','\.html$') ); $this->get_blog_list($url); if($this->blog_links){ foreach ($this->blog_links as $link){ $this->snoopy->fetch($link); $str = $this->snoopy->results; $this->snoopy->results = null; $str = html_format($str); //匹配标题 $titlearr=crawl_match($str, '/

(.*?)/'); $title = $titlearr[1][1]; //匹配日期 $datearr=crawl_match($str, '/

(.*?)/'); $date = $datearr[1][0]; //匹配正文 $needle = array('id="blog_text"','class="cnt"'); $contentarr=crawl_match($str, '/

/',$needle); $content = $contentarr[0]; $content = trim(str_replace($needle, '', $content)); $content = ltrim($content,'>'); $blog = array( 'title'=>$title, 'dateline'=>$date, 'link'=>$link, 'content'=>$content, ); array_push($this->blogs, $blog); } return $this->blogs; }else{ return 'No Blog'; } } /** * 获取博文列表 * * @param String $url */ function get_blog_list($url){ $page_links = array(); $this->snoopy->fetchlinks($url); $links = $this->snoopy->results; $this->snoopy->results = null; $page_links = links_filter($links, $this->needle_rule['page']); //获取列表页数 if(!$page_links) array_push($page_links, $url); foreach ($page_links as $page){ $this->snoopy->fetchlinks($page); $links = $this->snoopy->results; $this->snoopy->results = null; $blog_linkarr = links_filter($links, $this->needle_rule['blog']);//获取当前页博文列表 $this->blog_links = array_merge($this->blog_links,$blog_linkarr); } } }

新浪博客、百度博客搬家(PHP代码)

$blog = new BlogMove();

$return = $blog->move_baidu("http://hi.baidu.com/blog/blog/");

$return = $blog->move_sina("http://blog.sina.com.cn/sunsan");

新浪博客、百度博客搬家(PHP代码)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值