# -*- coding: utf-8 -*-
"""
Created on Sun Sem 17 14:30
@author: Zonghao Qu
"""
import urllib
import pandas as pd
from bs4 import BeautifulSoup as bs
import exchange_calendars as xcals
import urllib.error
#获取2020年的交易日
xshg = xcals.get_calendar("XSHG")
xshg_range = xshg.schedule.loc["2020-01-01":"2020-12-31"]
date = xshg_range.index.strftime("%Y-%m-%d").tolist()
def get_info(date_list):
'''
输入exchange_calender日期,获取对应的url
date格式为:年-月-日
author:Zonghao Qu
'''
url_list = []
for i in date_list:
pre = i.split('-')
mon = pre[0] + pre[1]
day = pre[2]
date_info = mon + '/' + day
url = f'http://www.cffex.com.cn/sj/hqsj/rtj/{date_info}/index.xml?id=94'
url_list.append(url)
return url_list
#获取每个交易日对应的数据url
url_list = get_info(date)
url_list
def mktable(url):
'''输入url,爬取表格写入dataframe并返回'''
url = url
response = urllib.request.urlopen(url)
html = response.read().decode('utf-8')
soup = bs(html,'xml')
data_rows = []
for dailydata in soup.find_all('dailydata'): # 查找所有'dailydata'块
if 'IF' in dailydata.find('instrumentid').text: # 如果'instrumentid'包含'IF'
data_dict = {}
data_dict['合约代码'] = dailydata.find('instrumentid').text
data_dict['今开盘'] = dailydata.find('openprice').text
data_dict['最高价'] = dailydata.find('highestprice').text
data_dict['最低价'] = dailydata.find('lowestprice').text
data_dict['成交量'] = dailydata.find('volume').text
data_dict['成交金额'] = dailydata.find('turnover').text
data_dict['持仓量'] = dailydata.find('openinterest').text
data_dict['持仓变化'] = float(dailydata.find('openinterest').text) - float(dailydata.find('preopeninterest').text)
data_dict['今收盘'] = dailydata.find('closeprice').text
data_dict['今结算'] = dailydata.find('settlementprice').text
data_dict['前结算'] = dailydata.find('presettlementprice').text
data_dict['涨跌1'] = float(dailydata.find('closeprice').text) - float(dailydata.find('presettlementprice').text)
data_dict['涨跌2'] = float(dailydata.find('settlementprice').text) - float(dailydata.find('presettlementprice').text)
data_rows.append(data_dict)
# 创建一个DataFrame来存储数据
df = pd.DataFrame(data_rows)
return df
#创建一个空的df用于储存数据
df = pd.DataFrame()
for url in url_list:
print(f'正在爬取{url}')
try:
df_url = mktable(url)
df = pd.concat([df,df_url], ignore_index = True)
print('成功爬取')
except urllib.error.HTTPError as e:
print(e)
#写入Excel表格(后续改为写入数据库)
df.to_excel('IF期货合约日统计.xlsx', sheet_name='2020', index=False)
Quant笔试程序题2023.9.17
最新推荐文章于 2024-07-12 16:16:27 发布