一、单线程
# author:WN
# datetime:2019/10/30 9:07
import requests
import re
def image_urls():
search_name = input("请输入你要搜索的图片:")
search_num = int(input("一页有48张图片,需要几页:"))
# url存放的总列表
all_urls = list()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36',
}
# query参数的值是要搜多的图片名,start参数的值是图片开始的下标
for i in range(0, search_num*48, 48):
url = 'https://pic.sogou.com/pics?query={0}&mode=1&start={1}&reqType=ajax&reqFrom=result&tn=0'.format(search_name, i)
response = requests.get(url, headers=headers)
url_list = re.findall('"thumbUrl":"(.*?)"', response.text)
# 输入的关键字没有寻找到图片时
if len(url_list) == 0:
print("没有图片!")
all_urls.extend(url_list)
download(all_urls, headers)
def download(url_list, headers):
count = 0
for url in url_list:
response = requests.get(url, headers=headers)
with open('E:/搜狗图片/%s.jpg' % count, 'ab') as f:
f.write(response.content)
count += 1
if __name__ == '__main__':
image_urls()
二、多线程
# author:WN
# datetime:2019/10/30 9:07
import requests
import re
import threading
threads = list()
def image_urls():
search_name = input("请输入你要搜索的图片:")
search_num = int(input("一页有48张图片,需要几页:"))
# 保存的图片名
count = 0
# url存放的总列表
all_urls = list()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36',
}
# query参数的值是要搜多的图片名,start参数的值是图片开始的下标
for i in range(0, search_num*48, 48):
url = 'https://pic.sogou.com/pics?query={0}&mode=1&start={1}&reqType=ajax&reqFrom=result&tn=0'.format(search_name, i)
response = requests.get(url, headers=headers)
url_list = re.findall('"thumbUrl":"(.*?)"', response.text)
# 输入的关键字没有寻找到图片时
if len(url_list) == 0:
print("没有图片!")
all_urls.extend(url_list)
for img_url in all_urls:
t = threading.Thread(target=download, args=(img_url, headers, count))
# 设置为后台线程,不会随主线程的结束而结束
t.setDaemon(False)
t.start()
count += 1
threads.append(t)
def download(url, headers, count):
response = requests.get(url, headers=headers)
with open('E:/搜狗图片/%s.jpg' % count, 'ab') as f:
f.write(response.content)
if __name__ == '__main__':
image_urls()
# 阻塞主线程
for j in threads:
j.join()