public function getWechatUrl()
{
$url = input('url','');
$url = trim($url);
$re = $this->crawByUrl($url);
$src = 'upload/upload/wenzhang/'.time().'_'.$card_id.'.html'; (自定义$card_id)
file_put_contents($src,$re['html']);
$re['html'] = $src;
return $re;
}
public function crawByUrl($url)
{
$content = $this->_get($url);
$basicInfo = $this->articleBasicInfo($content);
list($content_html, $content_text) = $this->contentHandle($content);
/*$src = 'upload/upload/wenzhang/'.time().'_'.'2450.html';*/
/*file_put_contents($src,$content);
return $src;*/
$str_1 = substr($content,strripos($content,"profile_nickname")+18);
$str_7 = substr($content,strripos($content,"twitter:title")+24);
$str_8 = substr($str_7,0,strrpos($str_7,"twitter:creator")-23); //公众号文章标题
$str_9 = substr($content,strripos($content,"twitter:image")+24);
$str_10 = substr($str_9,0,strrpos($str_9,"twitter:title")-23); //公众号文章封面
return array_merge($basicInfo,['html' => $content_html,'title'=>$str_8,'img'=>$str_10]);
}
public function _get($url)
{
$html = file_get_contents($url);
return $html;
}
/**
* 处理微信文章源码,提取文章主体,处理图片链接
* @author bignerd
* @since 2016-08-16T15:59:27+080
* @param $content 抓取的微信文章源码
* @return [带图html文本,无图html文本]
* /
public function contentHandle($content)
{
$content_html_pattern = '/<div class="rich_media_content.*?".*?id="js_content".*?>(.*?)<\/div>/s';
$content_html_pattern2 = '/<div id="js_pc_qr_code"(.*?)<div class="wx_network_msg_wrp.*?".*?id="js_network_msg_wrp.*?>/s';
preg_match_all($content_html_pattern, $content, $html_matchs);
$content_html = $html_matchs[0][0];
//去除掉hidden隐藏
$content_html = str_replace('style="visibility: hidden;"','',$content_html);
//过滤掉iframe
$content_html = preg_replace('/<iframe(.*?)<\/iframe>/','',$content_html);
$path = 'upload/article/';
/** @var 带图片html文本 */
$content_html = preg_replace_callback('/data-src="(.*?)"/', function($matches) use ($path){
$dsrc = 'src="https://www.xiaofu.live/' . $path . $this->getImg($matches[1]).'" '.$this->imageStyle;
return $dsrc;
}, $content_html);
/*$content_ht = preg_replace_callback($content_html_pattern, function($matches) use ($content_html){
return $content_html;
}, $content);*/
$content = preg_replace($content_html_pattern,$content_html,$content);
$content = preg_replace($content_html_pattern2,'</div></div><div class="wx_network_msg_wrp" id="js_network_msg_wrp">',$content);
/** @var 无图html文本 */
$content_text = preg_replace('/<img.*?>/s','',$content_html);
return [$content,$content_text];
}
/**
* 获取文章的基本信息
* @author bignerd
* @since 2016-08-16T17:16:32+0800
* @param $content 文章详情源码
* @return $basicInfo
*/
public function articleBasicInfo($content)
{
//待获取item
$item = [
'ct' => 'date',//发布时间
'msg_title' => 'title',//标题
'msg_desc' => 'digest',//描述
'msg_link' => 'content_url',//文章链接
'msg_cdn_url' => 'cover',//封面图片链接
'nickname' => 'wechatname',//公众号名称
];
$basicInfo = [
'author' => '',
'copyright_stat' => '',
];
foreach ($item as $k => $v) {
$pattern = '/ var '.$k.' = "(.*?)";/s';
preg_match_all($pattern,$content,$matches);
if(array_key_exists(1, $matches) && !empty($matches[1][0])){
$basicInfo[$v] = $this->htmlTransform($matches[1][0]);
}else{
$basicInfo[$v] = '';
}
}
/** 获取作者 */
preg_match('/<em class="rich_media_meta rich_media_meta_text">(.*?)<\/em>/s', $content, $matchAuthor);
if(!empty($matchAuthor[1])) $basicInfo['author'] = $matchAuthor[1];
/** 文章类型 */
preg_match('/<span id="copyright_logo" class="rich_media_meta meta_original_tag">(.*?)<\/span>/s', $content, $matchType);
if(!empty($matchType[1])) $basicInfo['copyright_stat'] = $matchType[1];
return $basicInfo;
}
/**
* 特殊字符转换
* @author bignerd
* @since 2016-08-16T17:30:52+0800
* @param $string
* @return $string
*/
public function htmlTransform($string)
{
$string = str_replace('"','"',$string);
$string = str_replace('&','&',$string);
$string = str_replace('amp;','',$string);
$string = str_replace('<','<',$string);
$string = str_replace('>','>',$string);
$string = str_replace(' ',' ',$string);
$string = str_replace("\\", '',$string);
return $string;
}
public function getImg($url){
$refer = "http://www.qq.com/";
$opt = [
'http'=>[
'header'=>"Referer: " . $refer
]
];
$context = stream_context_create($opt);
//接受数据流
$file_contents = file_get_contents($url,false, $context);
$imageSteam = Imagecreatefromstring($file_contents);
$path = 'upload/article/';
if(!file_exists($path))
mkdir($path,0777,true);
$fileName = time().rand(0,99999) . '.jpg';
//生成新图片
imagejpeg($imageSteam, $path . $fileName);
return $fileName;
}
公众号文章链接生成文章
于 2023-03-03 11:15:10 首次发布