#紧接着我在"老孙的爬虫一中储存的url进行下载"
from pymongo import MongoClient
import requests
import os
import datetime
from multiprocessing import Pool
from multiprocessing import Process
import time
conn=MongoClient("192.168.2.214",27017)
db=conn.test4
url_set=db.url_set
path_set=db.path_set
def download_file():
# print(os.path.exists("D:\\1111111\\bb"))
while True:
try:
data=url_set.find_one_and_update({"state":0},{"$set":{"state":1}})#替换MongoDB中的第一个匹配的数据
if not data:#此函数匹配第一个参数,若存在,则用第二个替换,若无匹配则反回None
return#如果替换则表示已下载,not None为True,跳出循环
data["state"] = 1
url = data["url"]
MD5_url = data["MD5_url"][3:]
path = os.path.join("F:\\", datetime.datetime.now().strftime("%Y-%m-%d"), data["MD5_url"][0],
data["MD5_url"][1:3])
path_judge = os.path.exists(path) # 判断路径是否存在
if path_judge:
pass
else:
os.makedirs(path)
# print(datetime.datetime.now().strftime("%Y-%m-%d"))#获取当天时间
# print(path)
Download_address = url # 下载地址
f = requests.get(Download_address)
with open(path + "\\" + MD5_url + ".zip", 'wb') as code:
code.write(f.content)
print("下载完成")
# path_set.insert_one({"url":url,"MD5_url":MD5_url,"path":path})
except Exception as e:
print("出错")
def pro():
trader = []
for i in range(24):
pr = Process(target=download_file)
time.sleep(0.5)
pr.start()
trader.append(pr)
for i in trader:
i.join()
if __name__=="__main__":
# for data in set.find():
# print(data["url"])
# print(data["MD5_url"])
# pool=Pool(10)
# for i in range(50):
# pool.apply_async(func=download_file,args=(url,path,))
# pool.close()
# pool.join()
pro()#使用多进程启动多个下载,提高效率