学习爬虫,先下载美女图片练练手
#coding=utf-8
import urllib
import re
FORBIDDEN = "403 Forbidden"
def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html
def getImg(html):
reg = r'URL":"(http.+?\.jpg)",'
imgre = re.compile(reg)
imglist = re.findall(imgre,html)
x = 0
for imgurl in imglist:
status = urllib.urlopen(imgurl).code
if status == 200:
urllib.urlretrieve(imgurl,'%s.jpg' % x)
x+=1
return imglist
html = getHtml("http://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1491787331416_R&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=%E5%9B%BE%E7%89%87")
print getImg(html)