转载https://www.cnblogs.com/Axi8/p/5757270.html
把python2的部分改成python3了,爬取百度贴吧某帖子内的图片。
#coding:utf-8
import urllib.request#python3
import re
def get_html(url):
page = urllib.request.urlopen(url)#打开网页
html = page.read()#读取页面源码
#html = html.decode(encoding='UTF-8')#python3
html=html.decode('utf-8')#python3
return html
reg = r'src="(.+?\.jpg)" width'#正则表达式
reg_img = re.compile(reg)#编译一下,运行更快
imglist = reg_img.findall(get_html('http://tieba.baidu.com/p/1753935195'))#进行匹配
x = 0
for img in imglist:
urllib.request.urlretrieve(img,'%s.jpg'% x)
x += 1