微信公众号文章php采集程序,微信公众号文章内容采集

直接上代码

// 设置脚本执行不超时

set_time_limit ( 0 );

class Gather {

private $url;

private $path;

public function __construct($url, $path) {

$this->url = $url;

$this->path = $path;

}

public function fetch() {

return $this->transform ( $this->url, $this->path );

}

private function createPic($url, $path, $name) {

$img = file_get_contents ( $url );

$info = getimagesize ( $url );

$type = str_replace ( 'image/', '', $info ['mime'] );

$fileName = $path . DIRECTORY_SEPARATOR . $name . ".$type";

file_put_contents ( $fileName, $img );

return $fileName;

}

private function transform($url, $path) {

if (! file_exists ( $path ))

mkdir ( $path );

$content = file_get_contents ( $url );

preg_match ( '/

(.*)/i', $content, $result );

$data ['title'] = $result [1]; // 文章标题

preg_match ( '/var\s+msg_cdn_url\s*=\s*"([^\s]*)"/', $content, $result );

preg_match ( '/var\s+msg_desc\s*=\s*"([^\s]*)"/', $content, $result );

$data ['description'] = $result [1]; // 公众号文章摘要

// 获取微信主体内容

preg_match ( '/

(.*?)/is', $content, $result );

// 获取微信主体中的防盗链图片

preg_match_all ( '/data-src="[a-zA-z]+:\/\/[^\s]*[mmbiz|mmbiz_jpg|mmbiz_gif]\/[^\s]*\/\d*\?([^\s]*=[^\s]*)*"|data-src="[a-zA-z]+:\/\/[^\s]*[mmbiz|mmbiz_jpg|mmbiz_gif]\/[^\s]*\/\d+"|background-image\s*:\s*url\s*\(\s*[a-zA-z]+:\/\/[^\s]*mmbiz\/[^\s]*\/\d+|background-image\s*:\s*url\s*\(\s*[a-zA-z]+:\/\/[^\s]*mmbiz\/[^\s]*\/\d+\?[^\s]*=[^\s]*/is', $result [1], $result2 );

// 判断微信主体中是否包含防盗链图片

if (! empty ( $result2 [0] )) {

foreach ( $result2 [0] as $value ) {

// 取出防盗链地址中的data-src值后的http://url主体

//preg_match ( '/[a-zA-z]+:\/\/[^\s]*\/[mmbiz|mmbiz_jpg]\/([^\s\/]*)\/\d+\?[^\s"]*|[a-zA-z]+:\/\/[^\s]*[mmbiz|mmbiz_jpg]\/([^\s\/]*)\/\d+/', $value, $temp );

preg_match ( '/[a-zA-z]+:\/\/[^\s]*[mmbiz|mmbiz_jpg|mmbiz_gif]\/([^\s\/]*)\/\d*\?([^\s]*=[^\s]*)*[^"]|[a-zA-z]+:\/\/[^\s]*[mmbiz|mmbiz_jpg|mmbiz_gif]\/([^\s\/]*)\/\d+/', $value, $temp );

$temp = array_filter ($temp);

$temp = array_values($temp);

$urlList [] = $temp [0];

$nameList [] = $temp [1];

}

$path = realpath($path);

foreach ( $urlList as $value ) {

$name = array_shift ( $nameList );

$fileName = $this->createPic ( $value, $path, $name ); // 把图片保存到本地

$result [1] = str_replace ( $value, $fileName, $result [1] );

}

}

// 更新所有data-src的地址

$result [1] = str_replace ( "data-src", "src", $result [1] );

// 返回处理后的微信主体内容。

$data ['content'] = trim($result [1]);

return $data;

}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值