公众号文章链接生成文章

  public function getWechatUrl()
  {
    $url = input('url','');
    $url = trim($url);
   
     $re = $this->crawByUrl($url);
     $src = 'upload/upload/wenzhang/'.time().'_'.$card_id.'.html'; (自定义$card_id)
    
     file_put_contents($src,$re['html']);
      
     $re['html'] = $src;
    return $re;
  }
  
 public function crawByUrl($url)
{
$content = $this->_get($url);

$basicInfo = $this->articleBasicInfo($content);

list($content_html, $content_text) = $this->contentHandle($content);
/*$src = 'upload/upload/wenzhang/'.time().'_'.'2450.html';*/
    
 /*file_put_contents($src,$content);
return $src;*/
$str_1 = substr($content,strripos($content,"profile_nickname")+18);

$str_7 = substr($content,strripos($content,"twitter:title")+24);

$str_8 = substr($str_7,0,strrpos($str_7,"twitter:creator")-23); //公众号文章标题

$str_9 = substr($content,strripos($content,"twitter:image")+24);

$str_10 = substr($str_9,0,strrpos($str_9,"twitter:title")-23); //公众号文章封面
return array_merge($basicInfo,['html' => $content_html,'title'=>$str_8,'img'=>$str_10]);
}

 public function _get($url)
 {
$html = file_get_contents($url);
return $html;
 }
 /**
 * 处理微信文章源码,提取文章主体,处理图片链接
 *   @author bignerd
 *  @since  2016-08-16T15:59:27+080
 *   @param  $content 抓取的微信文章源码
 *  @return [带图html文本,无图html文本]
 * /
  public function contentHandle($content)
  {
    $content_html_pattern = '/<div class="rich_media_content.*?".*?id="js_content".*?>(.*?)<\/div>/s';
    $content_html_pattern2 = '/<div id="js_pc_qr_code"(.*?)<div class="wx_network_msg_wrp.*?".*?id="js_network_msg_wrp.*?>/s';
    
    
    
    preg_match_all($content_html_pattern, $content, $html_matchs);
    
    $content_html = $html_matchs[0][0];
    
    //去除掉hidden隐藏
    $content_html = str_replace('style="visibility: hidden;"','',$content_html);
    //过滤掉iframe
    $content_html = preg_replace('/<iframe(.*?)<\/iframe>/','',$content_html);
    $path = 'upload/article/';

    /** @var  带图片html文本 */
    $content_html = preg_replace_callback('/data-src="(.*?)"/', function($matches) use ($path){
        
       $dsrc = 'src="https://www.xiaofu.live/' . $path . $this->getImg($matches[1]).'" '.$this->imageStyle;
       
        return $dsrc;
    }, $content_html);
    
    /*$content_ht = preg_replace_callback($content_html_pattern, function($matches) use ($content_html){
        return $content_html;
    }, $content);*/
    $content = preg_replace($content_html_pattern,$content_html,$content);
    $content = preg_replace($content_html_pattern2,'</div></div><div class="wx_network_msg_wrp" id="js_network_msg_wrp">',$content);
    
    /** @var  无图html文本 */
    $content_text = preg_replace('/<img.*?>/s','',$content_html);
    return [$content,$content_text];
    }
 /**
 * 获取文章的基本信息
 * @author bignerd
 * @since  2016-08-16T17:16:32+0800
* @param  $content 文章详情源码
* @return $basicInfo
*/
 public function articleBasicInfo($content)
 { 
//待获取item                
$item = [
      'ct' => 'date',//发布时间
      'msg_title' => 'title',//标题
      'msg_desc' => 'digest',//描述
      'msg_link' => 'content_url',//文章链接
      'msg_cdn_url' => 'cover',//封面图片链接
      'nickname' => 'wechatname',//公众号名称
    ];
$basicInfo = [
  'author' => '',
  'copyright_stat' => '',
];
foreach ($item as $k => $v) {
  $pattern = '/ var '.$k.' = "(.*?)";/s';
  preg_match_all($pattern,$content,$matches);
  if(array_key_exists(1, $matches) && !empty($matches[1][0])){
    $basicInfo[$v] = $this->htmlTransform($matches[1][0]);
  }else{
    $basicInfo[$v] = '';
  }
}
/** 获取作者 */
preg_match('/<em class="rich_media_meta rich_media_meta_text">(.*?)<\/em>/s', $content, $matchAuthor);
if(!empty($matchAuthor[1])) $basicInfo['author'] = $matchAuthor[1];
/** 文章类型 */
preg_match('/<span id="copyright_logo" class="rich_media_meta meta_original_tag">(.*?)<\/span>/s', $content, $matchType);
if(!empty($matchType[1])) $basicInfo['copyright_stat'] = $matchType[1];
return $basicInfo;
}
/**
* 特殊字符转换
* @author bignerd
* @since  2016-08-16T17:30:52+0800
* @param  $string
* @return $string
*/
public function htmlTransform($string)
{
    $string = str_replace('&quot;','"',$string);
    $string = str_replace('&amp;','&',$string);
    $string = str_replace('amp;','',$string);
    $string = str_replace('&lt;','<',$string);
    $string = str_replace('&gt;','>',$string);
    $string = str_replace('&nbsp;',' ',$string);
    $string = str_replace("\\", '',$string);
    return $string;
    }  
public function getImg($url){
    $refer = "http://www.qq.com/";
    $opt = [
        'http'=>[
            'header'=>"Referer: " . $refer
        ]
    ];
    $context = stream_context_create($opt);
    //接受数据流
    $file_contents = file_get_contents($url,false, $context);
    $imageSteam =  Imagecreatefromstring($file_contents);
    $path = 'upload/article/';
    if(!file_exists($path))
        mkdir($path,0777,true);
    $fileName = time().rand(0,99999) . '.jpg';
    //生成新图片
    imagejpeg($imageSteam, $path . $fileName);
    return $fileName;
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值