关闭

网页抓取url替换程序Demo

标签: url函数curl
71人阅读 评论(0) 收藏 举报
分类:
define("URL", "http://www.hao123.com");
define("URLI", "http://www.hao123.com/");
//首页
function index(){

    $str = $this->curl(URL);
    $str = $this->url_add(URL,$str);
    $this->url_handle($str);  

}
//url加密
function url_be($arr,$str){

    if($arr){
            foreach($arr[2] as $i){
                $preg_css = "/(css)/is";
                $preg_js = "/(js)/is";
                $preg_png = "/(png)/is";
                $preg_jpg = "/(jpg)/is";
                $preg_gif = "/(gif)/is";

                if( !preg_match($preg_css,$i) and !preg_match($preg_js,$i) and !preg_match($preg_png,$i) 
                    and !preg_match($preg_jpg,$i) and !preg_match($preg_gif,$i) and $i != URLI and $i != '' and $i != URL ){

                        $be = base64_encode($i);
                        $be = str_replace('=','',$be);
                        $str = str_replace($i,site_url().'/index/url_bd/'.$be,$str);
                    }   
            }
            return $str;
    }

}
//url处理
function url_handle($str){

    $preg_s = '/(href=\")(.*?)(\")/is';
    preg_match_all($preg_s,$str,$arr);
    $str = $this->url_be($arr,$str);
    $preg_s = '/(src=\")(.*?)(\")/is';
    preg_match_all($preg_s,$str,$arr);
    echo $this->url_be($arr,$str);

}
//url解密
function url_bd($be){
    if($be){
        $url = base64_decode($be);
        $str = $this->curl($url);
        $str = $this->url_add(URL,$str);
        $this->url_handle($str);
    }
}

//追加主域名
function url_add($url,$str){

    $preg = "/(href=\"\/)(.*?)(\")/is";
    $str = preg_replace($preg,'href="'.$url.'/\\2"',$str);
    $preg = "/(src=\"\/)(.*?)(\")/is";
    $str = preg_replace($preg,'href="'.$url.'/\\2"',$str);
    $preg = "/(href=\")(?!http)(?!javascript)(.*?)(\")/is";
    $str = preg_replace($preg,'href="'.$url.'/\\2"',$str);
    return $str;
}   
//抓取页面
function curl($url){

    $ch = curl_init($url) ;
    curl_setopt($ch, CURLOPT_HEADER, false);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //如果把这行注释掉的话,就会直接输出
    $out = curl_exec($ch) ;
    curl_close($ch);
    return $out;

}
0
0

查看评论
* 以上用户言论只代表其个人观点,不代表CSDN网站的观点或立场
    个人资料
    • 访问:20763次
    • 积分:544
    • 等级:
    • 排名:千里之外
    • 原创:32篇
    • 转载:21篇
    • 译文:0篇
    • 评论:2条
    文章分类
    最新评论