// An highlighted block
import _thread
import http.client
import os
import re
import socket
import time
import urllib.request
import urllib.error
from time import sleep
def current_time():
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
def download_img(path, thread_del):
if not os.path.exists(path):
print("{} Folder {} not exist, now create it.".format(current_time(), path))
os.makedirs(path, exist_ok=True)
else:
print("{} Folder {} exist.".format(current_time(), path))
os.chmod(path, 777)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36'
}
root_url = 'https://www.google.com/search?q=kobe&tbm=isch&ved=2ahUKEwim3-nl047vAhXhTPUHHePDB6oQ2-cCegQIABAA'
req = urllib.request.Request(url=root_url, headers=headers)
data = urllib.request.urlopen(req).read()
link_list = re.findall(r"https://.*?.jpg", data.decode("utf-8"))
link_list_length = len(link_list)
while thread_del < link_list_length:
print('{} Downloading {}'.format(current_time(), link_list[thread_del]))
new_req = urllib.request.Request(url=link_list[thread_del], headers=headers)
jpg_name = '{}_{}.jpg'.format('razer_image', thread_del)
abs_jpg_name = '{}\{}'.format(path, jpg_name)
try:
if not os.path.exists(abs_jpg_name):
with urllib.request.urlopen(new_req, timeout=30) as response, open(
abs_jpg_name, 'wb') as f_save:
print('{} Saving image {}'.format(current_time(), abs_jpg_name))
f_save.write(response.read())
f_save.flush()
f_save.close()
print('{} Image {} download success'.format(current_time(), abs_jpg_name))
else:
print('{} {} already exist, escape it'.format(current_time(), abs_jpg_name))
except urllib.error.HTTPError:
print('{} HTTP error. Image {} download failed'.format(current_time(), abs_jpg_name))
except urllib.error.URLError:
print('{} URL error. Image {} download failed'.format(current_time(), abs_jpg_name))
except http.client.HTTPException:
print('{} HTTP exception. Image {} download failed'.format(current_time(), abs_jpg_name))
except socket.timeout:
print("{} Timeout. Image {} download failed".format(current_time(), abs_jpg_name))
except UnicodeEncodeError:
print("{} Unicode encode error. Image {} download failed".format(current_time(), abs_jpg_name))
except http.client.RemoteDisconnected:
print("{} HTTP client remote disconnected. Image {} download failed".format(current_time(), abs_jpg_name))
print("{} Download finished, thread_del now is {}, link list length is {}".format(current_time(), thread_del,
link_list_length))
thread_del += 5
print("{} thread exit".format(current_time()))
_thread.exit()
if __name__ == '__main__':
path = 'E:\\Software\\Application\\PyCharmProjects\\WxjProject\\excel\\download\\image'
try:
_thread.start_new_thread(download_img, (path, 0,))
_thread.start_new_thread(download_img, (path, 1,))
_thread.start_new_thread(download_img, (path, 2,))
_thread.start_new_thread(download_img, (path, 3,))
_thread.start_new_thread(download_img, (path, 4,))
except:
print("Error: unable to start thread")
count = 1000
while count > 0:
pass
count -= 1
sleep(10)
# while True:
# pass
Python多线程图片下载
最新推荐文章于 2021-11-29 20:58:49 发布