import urllib.request
import urllib.parse
import re
import os
def Imgpath(word):
file_path = os.getcwd()[:-4] + word
if not os.path.exists(file_path):
os.makedirs(file_path)
else:
file_path = file_path + '1'
os.makedirs(file_path )
return file_path
def Imgurl(word):
rep_list = []
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 Edg/91.0.864.59',
'access-control-allow-origin':'*',
'content-type':'image/webp',
'accept-language':'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
}
content= urllib.parse.quote(word,encoding='utf-8')
for num in range(30,121,30):
gsm = hex(num)[2:]
url = 'https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord='+content+'&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=&z=&ic=&word='+content+'&s=&se=&tab=&width=&height=&face=&istype=&qc=&nc=&fr=&pn='+str(num)+'&rn=30&gsm='+ gsm +'&1521707235798='
req = urllib.request.Request(url=url,headers=header)
page = urllib.request.urlopen(req).read()
try:
response = page.decode('utf-8')
imgpattern = re.compile(r'"thumbURL":"(.*?)\.jpg')
rsp_data = re.findall(imgpattern, response)
rep_list += rsp_data
except UnicodeDecodeError:
pass
return rep_list
def download_img(word):
x = 1
img_urllist = Imgurl(word)
img_path = Imgpath(word)
for url in img_urllist[:100]:
pngurl = url.replace(r'"thumbURL":"', " ")
path = img_path + '\\' + word + str(x) + '.png'
pngdata = urllib.request.urlopen(pngurl).read()
f = open(path, 'wb')
f.write(pngdata)
f.close()
x += 1
if __name__ == '__main__':
word = input("请输入中文关键词:")
download_img(word)
import tkinter
win = tkinter.Tk()
win.title("百度图片爬虫")
win.geometry("400x200+400+200")
entry= tkinter.Entry(win,width=28)
entry.insert(10,"请输入关键词")
def func1(event):
entry.delete(0, 20)
entry.bind("<Button-1>",func1)
def func2():
word = entry.get()
download_img(word)
button = tkinter.Button(win,text="确定",command = func2,
width=6,height=1)
entry.place(x=100,y=50)
button.place(x=170,y=100)
win.mainloop()