import requests
from requests.exceptions import ConnectionError
from pyquery import PyQuery as pq
import operator
from functools import reduce
import re
import json
import pandas as pd
import numpy as np
headers = {
'Host':'club.jd.com',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
}
def getHTMLText(url):
"""提取页面HTML代码,并返回HTML文本"""
try:
r = requests.get(url, timeout=30,headers=headers)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
print("页面提取错误")
return ""
fundCode = '519697'
pageIndex = 2
url = 'http://api.fund.eastmoney.com/f10/lsjz'
cookie = 'HAList=a-sh-603899-%u6668%u5149%u6587%u5177; em_hq_fls=js; qgqp_b_id=261272d980d240a9bd3df919a41ac2d4; EMFUND1=null; EMFUND2=null; EMFUND3=null; EMFUND4=null; EMFUND5=null; EMFUND6=null; EMFUND7=null; EMFUND8=null; EMFUND0=null; st_si=17382221904758; st_asi=delete; EMFUND9=07-31 13:00:08@#$%u666F%u987A%u957F%u57CE%u65B0%u5174%u6210%u957F%u6DF7%u5408@%23%24260108; st_pvi=53058681075087; st_sp=2019-10-30%2013%3A38%3A10; st_inirUrl=https%3A%2F%2Fwww.baidu.com%2Fs; st_sn=11; st_psi=20200804105742826-0-2701686462'
headers = {
'Cookie': cookie,
'Host': 'api.fund.eastmoney.com',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Referer': 'http://fundf10.eastmoney.com/jjjz_%s.html' % fundCode,
}
dfs = []
for i in range(139):
params = {
'callback': 'jQuery18307633215694564663_1548321266367',
'fundCode': fundCode,
'pageIndex': i,
'pageSize': 20,
}
r = requests.get(url=url, headers=headers, params=params)
text = re.findall('\((.*?)\)', r.text)[0] # 提取dict
LSJZList = json.loads(text)['Data']['LSJZList'] # 获取历史净值数据
TotalCount = json.loads(text)['TotalCount'] # 转化为dict
LSJZ = pd.DataFrame(LSJZList) # 转化为DataFrame格式
LSJZ['fundCode'] = fundCode # 新增一列fundCode
dfs.append(LSJZ)
df = pd.concat(dfs)
df['FSRQ'] = pd.to_datetime(df['FSRQ'])
df['daynameofweek']=df['FSRQ'].dt.weekday_name
df = df[df['JZZZL']!='']
#df['JZZZL'].replace(regex = {' ':'0'},inplace=True)
df['JZZZL'] = df['JZZZL'].astype('float64')
df['isdown'] = np.select([df['JZZZL']>0,df['JZZZL']<=0],
[0,-1])
dd = df.groupby('daynameofweek')['isdown'].sum().sort_values()
print(dd)
天天基金爬虫
最新推荐文章于 2023-10-18 14:17:39 发布