新浪博客、百度博客搬家(PHP代码)

新浪博客、百度博客搬家(PHP代码)

/**
 * $Id blogmove.php
 *
 * 博客搬家
 *
 * @author skillCoding $date 2011-09-14
 */

include  'Snoopy.class.php';
include  'function_crawling.php';

class BlogMove{
	var $snoopy;
	var $links = array();
	var $blog_links = array();		
	var $blogs = array();			
	var $needle_rule = array();		//匹配规则
	
	function BlogMove(){
		$this->snoopy = new Snoopy();
	}

	/**
	 *  新浪博客搬家
	 * 
	 * @param String $url
	 */
	function move_sina($url){
		
		if(!preg_match('/^http:\/\/blog\.sina\.com\.cn\/\w+/', $url)){
			return 'Invalid blog address';
		}
		
		$this->snoopy->fetch($url);
		$str = $this->snoopy->results;
		$this->snoopy->results = null;
		$str = html_format($str);
		
		//匹配博客目录
		if(preg_match_all('/http:\/\/blog\.sina\.com\.cn\/s\/articlelist_\d+_0_1\.html/', $str, $m)){
			
			$url = $m[0][0];
			
			$this->needle_rule = array(
				'page'=>array('s\/articlelist_\d+_0_\d+','\.html$'),
				'blog'=>array('\/s\/blog_','\.html$')
			);
			
			$this->get_blog_list($url);
			
			if($this->blog_links){
				
				foreach ($this->blog_links as $link){
					
					$this->snoopy->fetch($link);
					$str = $this->snoopy->results;
					$this->snoopy->results = null;
					$str = html_format($str);
					
					//匹配标题
					$titlearr=crawl_match($str, '/
  
  
   
   <\>/');
					$title = strip_tags($titlearr[0][0]);
					
					//匹配日期
					$datearr=crawl_match($str, '/
   
   (.*?)<\>/');
					$date = strip_tags($datearr[0][0]);
					$date = str_replace(array('(',')'), '', $date);
					
					//匹配正文
					$needle = 'id="sina_keyword_ad_area2"';
					$contentarr=crawl_match($str, '/
    
    
     
     <\>/',$needle);
					$content = $contentarr[0];
					$content = trim(str_replace(array($needle,'class="articalcontent  ">'), '', $content));
	
					$blog = array(
						'title'=>$title,
						'dateline'=>$date,
						'link'=>$link,
						'content'=>$content,
					);
					
					array_push($this->blogs, $blog);
				}
				return $this->blogs;
			}else{
				return 'No Blog';
			}
		}else{
			return 'Invalid blog address';
		}
	}
	
	/**
	 *  百度博客搬家
	 * 
	 * @param String $url
	 */
	function move_baidu($url){
		if(!preg_match('/^http:\/\/hi\.baidu\.com\/(.*?)\/blog$/', $url)){
			return 'Invalid blog address';
		}

		$url .= '/index/0';

		$this->needle_rule = array(
			'page'=>array('\/blog\/index\/','\d+$'),
			'blog'=>array('\/blog\/item\/','\.html$')
		);
		
		$this->get_blog_list($url);
		
		if($this->blog_links){
			foreach ($this->blog_links as $link){
				$this->snoopy->fetch($link);
				$str = $this->snoopy->results;
				$this->snoopy->results = null;
				$str = html_format($str);
				
				//匹配标题
				$titlearr=crawl_match($str, '/
     
     
(.*?)<\>/'); $title = $titlearr[1][1]; //匹配日期 $datearr=crawl_match($str, '/
(.*?)<\>/'); $date = $datearr[1][0]; //匹配正文 $needle = array('id="blog_text"','class="cnt"'); $contentarr=crawl_match($str, '/ <\>/',$needle); $content = $contentarr[0]; $content = trim(str_replace($needle, '', $content)); $content = ltrim($content,'>'); $blog = array( 'title'=>$title, 'dateline'=>$date, 'link'=>$link, 'content'=>$content, ); array_push($this->blogs, $blog); } return $this->blogs; }else{ return 'No Blog'; } } /** * 获取博文列表 * * @param String $url */ function get_blog_list($url){ $page_links = array(); $this->snoopy->fetchlinks($url); $links = $this->snoopy->results; $this->snoopy->results = null; $page_links = links_filter($links, $this->needle_rule['page']); //获取列表页数 if(!$page_links) array_push($page_links, $url); foreach ($page_links as $page){ $this->snoopy->fetchlinks($page); $links = $this->snoopy->results; $this->snoopy->results = null; $blog_linkarr = links_filter($links, $this->needle_rule['blog']);//获取当前页博文列表 $this->blog_links = array_merge($this->blog_links,$blog_linkarr); } } } <\>
<\>
<\>

新浪博客、百度博客搬家(PHP代码)

$blog = new BlogMove();
$return = $blog->move_baidu("http://hi.baidu.com/blog/blog/");
$return = $blog->move_sina("http://blog.sina.com.cn/sunsan");


新浪博客、百度博客搬家(PHP代码)



评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值