有些网站需要做了限定 浏览器才能 打开 所以这次我们要伪装一个浏览器 取抓去 数据
还是以豆瓣为例
代码如下:
''' 伪装浏览器 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36 ''' import urllib.request url = 'https://www.douban.com/' header = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'} req = urllib.request.Request(url=url,headers=header) re = urllib.request.urlopen(req) data = re.read() #设置编码方式 data = data.decode('utf-8') #打印抓取结果 print(data) #打印爬取网页的各类信息 print(type(re)) print(re.geturl()) print(re.info()) print(re.getcode())我的是mac电脑 如果是 win的系统 请自行 百度 或 用抓包工具 查找User-Agent 如有不妥请大神指点。。。
抓取结果:
<!DOCTYPE HTML> <html lang="zh-cmn-Hans" class="ua-mac ua-webkit"> <head> <meta charset="UTF-8"> <meta name="description" content="提供图书、电影、音乐唱片的推荐、评论和价格比较,以及城市独特的文化生活。"> <meta name="keywords" content="豆瓣,广播,登陆豆瓣"> <meta property="qc:admins" content="2554215131764752166375" /> <meta property="wb:webmaster" content="375d4a17a4fa24c2" /> <meta name="mobile-agent" content="format=html5; url=https://m.douban.com"> <title>豆瓣</title> <script> function set_cookie(t,e,o,n){var i,a,r=new Date;r.setTime(r.getTime()+24*(e||30)*60*60*1e3),i="; expires="+r.toGMTString();for(a in t)document.cookie=a+"="+t[a]+i+"; domain="+(o||"douban.com")+"; path="+(n||"/")}function get_cookie(t){var e,o,n=t+"=",i=document.cookie.split(";");for(e=0;e<i.length;e++){for(o=i[e];" "==o.charAt(0);)o=o.substring(1,o.length);if(0===o.indexOf(n))return o.substring(n.length,o.length).replace(/\"/g,"")}return null}window.Douban=window.Douban||{};var Do=function(){Do.actions.push([].slice.call(arguments))};Do.ready=function(){Do.actions.push([].slice.call(arguments))},Do.add=Do.define=function(t,e){Do.mods[t]=e},Do.global=function(){Do.global.mods=Array.prototype.concat(Do.global.mods,[].slice.call(arguments))},Do.global.mods=[],Do.mods={},Do.actions=[],Douban.init_show_login=function(t){Do("dialog",function(){var t="/j/misc/login_form";dui.Dialog({title:"登录",url:t,width:/device-mobile/i.test(document.documentElement.className)?.9*document.documentElement.offsetWidth:350,cache:!0,callback:function(t,e){e.node.addClass("dialog-login"),e.node.find("h2").css("display","none"),e.node.find(".hd h3").replaceWith(e.node.find(".bd h3")),e.node.find("form").css({border:"none",width:"auto",padding:"0"}),e.update()}}).open()})},Do(function(){function t(t,e){var o=["ref="+encodeURIComponent(location.pathname)];for(var n in e)e.hasOwnProperty(n)&&o.push(n+"="+e[n]);window._SPLITTEST&&o.push("splittest="+window._SPLITTEST),localStorage.setItem("report",(localStorage.getItem("report")||"")+"_moreurl_separator_"+o.join("&"))}!function(){"localStorage"in window||(window.localStorage=function(){var t=document;if(!t.documentElement.addBehavior)throw"don't support localstorage or userdata.";var e="_localstorage_ie",o=t.createElement("input");o.type="hidden";var n=function(n){return function(){t.body.appendChild(o),o.addBehavior("#default#userData");var i=new Date;i.setDate(i.getDate()+365),o.expires=i.toUTCString(),o.load(e);var a=n.apply(o,arguments);return t.body.removeChild(o),a}};return{getItem:n(function(t){return this.getAttribute(t)}),setItem:n(function(t,o){this.setAttribute(t,o),this.save(e)}),removeItem:n(function(t){this.removeAttribute(t),this.save(e)}),clear:n(function(){for(var t,o=this.XMLDocument.documentElement.attributes,n=0;t=o[n];n++)this.removeAttribute(t.name);this.save(e)})}}())}(),$(window).one("load",function(){var t=localStorage.getItem("report");if(t){t=t.split("_moreurl_separator_");var e=function(o){return""==o?void e(t.shift()):void $.get("undefined"==typeof _MOREURL_REQ?"/stat.html?"+o:_MOREURL_REQ+"?"+o,function(){return t.length?(e(t.shift()),void localStorage.setItem("report",t.join("_moreurl_separator_"))):void localStorage.removeItem("report")})};e(t.shift())}}),window.moreurl=t,$(document).click(function(e){var o=e.target,n=$(o).data("moreurl-dict");n&&t(o,n)}),$.ajax_withck=function(t){return"POST"==t.type&&(t.data=$.extend(t.data||{},{ck:get_cookie("ck")})),$.ajax(t)},$.postJSON_withck=function(t,e,o){return $.post_withck(t,e,o,"json")},$.post_withck=function(t,e,o,n){return $.isFunction(e)&&(n=o,o=e,e={}),$.ajax({type:"POST",url:t,data:$.extend(e,{ck:get_cookie("ck")}),success:o,dataType:n||"text"})},$("html").click(function(t){var e=$(t.target),o=e.attr("class");o&&$(o.match(/a_(\w+)/gi)).each($.proxy(function(e,o){var n=Douban[o.replace(/^a_/,"init_")];"function"==typeof n&&(t.preventDefault(),n.call(this,t))},e[0]))})}); Do.add('dialog', {path: 'https://img3.doubanio.com/f/shire/3d185ca912c999ee7f464749201139ebf8eb6972/js/ui/dialog.js', type: 'js', requires: ['https://img3.doubanio.com/f/shire/8377b9498330a2e6f056d863987cc7a37eb4d486/css/ui/dialog.css']}); Do.global('https://img3.doubanio.com/f/sns/b5793c2d7c298173d57ecf7d96708b5615336def/js/sns/fp/base.js', 'dialog'); </script> <link rel="stylesheet" href="https://img3.doubanio.com/f/shire/25e3b87e05e5de459e1473fad35d25cafd392ad6/css/core/_init_.css"> <link rel="stylesheet" href="https://img3.doubanio.com/f/sns/024dd07167e74fe8d2ac2faaf2725333e6f7561b/css/sns/anonymous_home.css"> <style type="text/css">
<class 'http.client.HTTPResponse'> https://www.douban.com/ Date: Mon, 18 Sep 2017 08:26:52 GMT Content-Type: text/html; charset=utf-8 Transfer-Encoding: chunked Connection: close Vary: Accept-Encoding X-Xss-Protection: 1; mode=block X-Douban-Mobileapp: 0 Expires: Sun, 1 Jan 2006 01:00:00 GMT Pragma: no-cache Cache-Control: must-revalidate, no-cache, private Set-Cookie: ll="118282"; path=/; domain=.douban.com; expires=Tue, 18-Sep-2018 08:26:52 GMT Set-Cookie: bid=hSytIGnUYvc; Expires=Tue, 18-Sep-18 08:26:52 GMT; Domain=.douban.com; Path=/ X-DOUBAN-NEWBID: hSytIGnUYvc X-DAE-Node: sindar10b X-DAE-App: sns Server: dae Strict-Transport-Security: max-age=15552000; 200