import csv
import requests
import json
import time
from multiprocessing import Pool
'''
1、4核cpu,多进程大约能快5倍。
'''
def getHtml(url):
try:
html=requests.get(url)
html.raise_for_status()
html.encoding="utf-8"
return html.text
except Exception as ex:
print(ex)
def parseHtml(html):
html=html.split("(",1)[1][:-2]
html_content=json.loads(html)
html_content=html_content["data"]["diff"]
return html_content
def get_and_parse(page):
# print(page,os.getpid())
url = "http://46.push2.eastmoney.com/api/qt/clist/get?cb=jQuery112409929794146169659_1588917956370&pn=" + str(
page+1) + "&pz=20&po=0&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f12&fs=m:0+t:6,m:0+t:13,m:0+t:80,m:1+t:2,m:1+t:23&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152&_=1588917956427"
html = getHtml(url)
html_content = parseHtml(html)
return html_content
def outputHtml(json_content): #回调
f=None
header=json_content[0].keys()
rows = json_content
try: #读取已有表格
with open("C:\Dsoftdisk\python\scrap_example\\try.csv","a",newline="") as f:
f_csv=csv.DictWriter(f,header)
f_csv.writerows(rows)
except Exception as ex: # 新建表格
print(ex)
finally: #这一步可不要
f.close()
'''
为啥,csv不关闭还可以打开并写入
知道的麻烦赐教一下
'''
if __name__ == '__main__':
print(time.asctime(time.localtime(time.time())))
pp = Pool(4)
for i in range(100):
pp.apply_async(get_and_parse,args=(i,),callback=outputHtml) #main的返回参数,传递给outputHtml,
pp.close()
pp.join()
print("over", time.asctime(time.localtime(time.time())))
ajax异步,多进程爬取股票信息,写入csv文件,10s大约能爬4000条数据
最新推荐文章于 2021-08-05 12:27:43 发布