import requests_html
word='李小龙'
pn=30
def my_download(word='李小龙',pn=1):
sess = requests_html.HTMLSession()
baidu_url = 'http://image.baidu.com/search/index?tn=baiduimage&ie=utf-8&word='
pn_per_page = 30
reg = '"thumbURL":"{}"'
addr_list = [j[0] for i in range(pn) for j in sess.get(f'{baidu_url}{word}&pn={pn_per_page*i}').html.search_all(reg)]
print(addr_list.__len__(),list(set(addr_list)).__len__())
return addr_list
# for num,k in enumerate(my_download('李小龙',2)):
# print(num,k)
def my_save_img(my_dir,addr_list,file_name='李小龙'):
import os
if not os.path.exists(my_dir):
os.makedirs(my_dir)
sess = requests_html.HTMLSession()
for num, addr1 in enumerate(addr_list,1):
try:
resp = sess.get(addr1)
with open(f'{my_dir}/{file_name}_{num}.jpg','wb') as f:
f.write(resp.content)
except:
pass #空语句,站位 保证格式是正确的
word='李小龙'
img_list = my_download()
my_save_img(f'download/{word}',img_list,word)
SDUWH2019-2020寒假python实训--my_baidu_img_dwn
最新推荐文章于 2024-07-23 16:23:28 发布