目的:下载斗图图片
网址:http://sc.chinaz.com/biaoqing/151214303910.htm
技术:爬虫,代理
代理的步骤:
1.创建一个Proxy对象(proxy={'协议':'ip:端口'})
urllib.request.ProxyHandle(proxy)
2.定制一个opener(可以用addHeadles方法添加请求头,注意要转换为list类型,如opener.addHeadles=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36')])
urllib.request.build.opener(Proxy对象)
3.安装opener
urllib.request.install_opener(Proxy对象)
导包
from urllib import request
import re
import os
打开网站
def open_url(url):
#ProxyHandler是一个dict类型
proxy={'http':'106.46.136.112:808'}
proxy_support=request.ProxyHandler(proxy)
opener=request.build_opener(proxy_support)
#添加代理请求头文件
opener.addheaders= [('User-Agent','Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36')]
request.install_opener(opener)
req=request.urlopen(url)
return req
保存图片函数
def save_img(filename='斗图'):
url='http://sc.chinaz.com/biaoqing/151214303910.htm'
#找到图片地址
r=r'<img src="([^"]+\.jpg)">'
img_list=re.findall(r,open_url(url).decode('utf-8'))
for i in img_list:
img=open_url(i)
print(i)
s=i.split('/')[-1]
with open(s,'wb') as f :
f.write(img)
保存图片
save_img('斗图')