python优雅地实现输入关键词下载图片
思路
- 创建文件夹
- 拿到url(网上很多)
- 循环下载到文件夹中
注意
- 这是借鉴网上的代码,但是有各种BUG
- 文件夹若存在会报错,所以给个用户选择过程
- 若图片加载不出来,无法下载,代码修改了这个bug
- 环境:Python 3.7.7
代码
import urllib.request
import urllib.parse
import ssl
import re
import time
import os
import shutil
from urllib.error import HTTPError
i = 0
ssl._create_default_https_context = ssl._create_unverified_context
def open_url(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.4843.400 QQBrowser/9.7.13021.400'}
req = urllib.request.Request(url=url, headers=headers)
'''req = urllib.request.Request(url)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0')'''
page = urllib.request.urlopen(req)
html = page.read().decode('utf-8')
return html
def get_img(html, file_name, num):
global i
photo_link = r'"objURL":"(http://[^"]*.jpg)"'
imgs = re.findall(photo_link, html)
last_link = r'<a href="(/search/flip[^"]*)" class="n">下一页</a>'
last = re.findall(last_link, html)
try:
for each in imgs:
if i >= num:
print('下载结束!')
return '---', -1
filename = file_name + '/' + str(i) + '.jpg'
try:
response = urllib.request.urlopen(each)
with open(filename, "wb") as f:
f.write(response.read())
i +=1
except HTTPError as e:
print(e)
response.close()
if e.code == 403:
print('权限不足')
elif e.code == 404:
print('没有那个地址')
return last, i
response.close()
return last, i
except:
print(last, i)
return last, i
if __name__ == "__main__":
key_word = input('请输入要下载的图片:')
num = int(input('请输入要下载图片的数量:'))
print('准备下载... ...')
key_words = urllib.parse.quote(key_word)
baidu = 'http://image.baidu.com/search/flip?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1548421364447_R&pv=&ic=0&nc=1&z=&hd=&latest=©right=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&ctd=1548421364449%5E00_1351X629&word='
url = baidu + key_words
j = 0
if_del=0
if os.path.exists(key_word):
j = int(input('原文件夹存在,是否替换,是:1,否:0:'))
if j == 1:
print('路径是:' + os.getcwd() + '\\' + key_word)
if_del = int(input('是否删除,1是,0否'))
if if_del ==1:
shutil.rmtree(os.getcwd() + '\\' + key_word)
os.makedirs(key_word)
while url:
try:
last, a = get_img(open_url(url), key_word, num)
url = 'http://image.baidu.com' + last[0]
if a == -1:
break
except:
time.sleep(0.01)
pass
else:
print('有相同文件夹名字,请删除或另起一个名字')
else:
print('有相同文件夹名字,请删除或另起一个名字')
else:
os.makedirs(key_word)
while url:
try:
last, a = get_img(open_url(url), key_word, num)
url = 'http://image.baidu.com' + last[0]
if a == -1:
break
except:
time.sleep(0.01)
pass