本文分别为python2和python3代码展示,提取内容可通过正则化进行修改。
python2代码展示:
import urllib import re def getHtml(url): page = urllib.request.urlopen(url) html = page.read() return html def getImg(html): reg = r'src="(.+?\.jpg)" pic_ext' imgre = re.compile(reg) imglist = re.findall(imgre,html) x = 0 for imgurl in imglist: urllib.request.urlretrieve(imgurl,'%s.jpg' % x) x+=1 html = getHtml("http://tieba.baidu.com/p/2460150866") print getImg(html)
python3代码展示
#coding=utf-8 import urllib.request import urllib.parse import re import chardet def getHtml(url): page = urllib.request.urlopen(url) html = page.read() encode_type=chardet.detect(html) html=html.decode(encode_type["encoding"]) return html def getImg(html): #reg = r'src="(.+?\.jpg)" pic_ext' #reg = r'src="(.+?\.jpg)"' reg = r'src="(.+?\.png)" alt=""' imgre = re.compile(reg) imglist = re.findall(imgre,html) x = 0 for imgurl in imglist: urllib.request.urlretrieve(imgurl,'%s.png' % x) x+=1 html = getHtml("https://www.kuyv.cn/star/dilireba/photo/2/") print(getImg(html))