爬取表情包
import urllib.request
import re
import os
path=r'D:\猫'
os.makedirs(path)
headers=("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36")
opener=urllib.request.build_opener()
opener.addheaders=[headers]
urllib.request.install_opener(opener)
"""
for q in range(1,3):
pageurl="http://www.doutula.com/search?type=photo&more=1&keyword=%E7%8C%AB%E7%8C%AB&page="+str(q)
data=urllib.request.urlopen(pageurl).read().decode("utf-8","ignore")
pat='data-original="(.*?)"'
imagelist=re.compile(pat).findall(data)
for w in range(0,len(imagelist)):
thisimg=imagelist[w]
thisimgurl=thisimg
file="D:/狗/"+str(q)+str(w)+"cat.gif"
urllib.request.urlretrieve(thisimgurl,filename=file)
"""
for i in range(6,7):
pageurl="http://www.doutula.com/search?type=photo&more=1&keyword=%E7%8B%97%E7%8B%97&page="+str(i)
data=urllib.request.urlopen(pageurl).read().decode("utf-8","ignore")
pat='data-original="(.*?)"'
imagelist=re.compile(pat).findall(data)
for j in range(0,len(imagelist)):
thisimg=imagelist[j]
thisimgurl=thisimg
file="D:/猫/"+str(i)+str(j)+"dog.gif"
urllib.request.urlretrieve(thisimgurl,filename=file)