import csv
from concurrent.futures import ThreadPoolExecutor
import requests
f= open("新发地.csv",mode="w",encoding="utf-8",newline="")
csvwriter = csv.writer(f)
def down_load_allpages():
pass
def down_load_onepage(datas):
url = "http://www.xinfadi.com.cn/getPriceData.html"
resp = requests.post(url,data=datas)
for n in resp.json()["list"]:
name = n["prodName"]
lowprice = n['lowPrice']
highprice = n['highPrice']
avgPrice = n["avgPrice"]
pubDate = n["pubDate"]
csvwriter.writerow([name,lowprice,avgPrice,highprice,pubDate])
if __name__ =="__main__":
n=1
datas = {
"limit": "",
"current": f"{n}",
"pubDateStartTime":"",
"pubDateEndTime": "",
"prodPcatid": "",
"prodCatid": "",
"prodName": "",
}
with ThreadPoolExecutor(50) as t:
for i in range (1,2000):
t.submit(down_load_onepage,datas)
n+=1
# time.sleep(0.5)
print(f"下载完毕!{n}")
f.close()
print("全部下载完毕!")
问题总结:
1.新发地有好几万页,页码在FormData中
2.进程池创建用
with ThreadPoolExecutor(50) as t:
for i in range (1,2000):
t.submit(down_load_onepage,datas)
datas是传递的实参,down_load_onepage是方法,不用括号
3.线程池运行很快,但是写入文件很慢