【Time Series】【专题系列】一、时序预测股票数据获取
目录
一、简介
"时间序列+金融"是一个很有"钱"景的话题,若想开始DeepLearning+TimeSeries+Stock,首先得拿到数据。本文提供了一种股票数据获取的源代码。
二、代码
1、首先,将要获取数据的股票按照图中xlsx的格式整理,sheet名设置为"stock_names"。如下图:
2、然后直接运行下列代码,配置好输入(stock_names.xlsx)和输出路径,就可以实现数据获取。
import os
import baostock as bs
import pandas as pd
from datetime import datetime
from datetime import timedelta
# 自定义的日期解析函数,仅保留日期部分
def custom_date_parser(x):
return datetime.strptime(x, '%Y-%m-%d %H:%M:%S').date()
class stock_data():
def __init__(self):
self.params = self.init_baostock()
def init_baostock(self):
# 计算MACD / KDJ / RSI 的params 参数设置
# 12,26,9
# 6,12,24
# 9,3,3
# params = []
# with open('configs/input_params.txt', 'r') as f:
# code_list = f.readlines()
# for index, item in enumerate(code_list):
# item = item.strip()
# item = item.split(',')
# params += [int(i) for i in item]
# return params
params = [12,26,9,
6,12,24,
9,3,3]
return params
def get_stock_basedata(self,code,start_date,end_date):
"""open/high/low/close/volume/MACD/KDJ"""
lg = bs.login()
#将时间去除时分秒
start_date = (pd.to_datetime(start_date) + timedelta(days=1)).strftime("%Y-%m-%d")
end_date = (pd.to_datetime(end_date) + timedelta(days=1)).strftime("%Y-%m-%d")
rs = bs.query_history_k_data_plus(code,
"date,code,open,high,low,close,preclose,volume,amount,turn",
start_date=start_date, end_date=end_date, frequency="d",
adjustflag='2') # 注意adjustflag取前复权
data_list = []
while (rs.error_code == '0') & rs.next():
data_list.append(rs.get_row_data())
self.stock_pd = pd.DataFrame(data_list, columns=rs.fields)
self.stock_pd[['open', 'high', 'low', 'close', 'volume']] = self.stock_pd[['open', 'high', 'low', 'close', 'volume']].astype(
'float64')
self.stock_pd = self.stock_pd.rename(columns={'date': 'datetime'})
self.stock_pd.index = pd.DatetimeIndex(self.stock_pd['datetime'])
# Step2: 利用Pandas 计算MACD / KDJ / RSI
short_ema = self.stock_pd['close'].ewm(span=self.params[0]).mean()
long_ema = self.stock_pd['close'].ewm(span=self.params[1]).mean()
self.stock_pd.loc[:, 'DIFF'] = short_ema - long_ema
self.stock_pd.loc[:, 'DEA'] = self.stock_pd['DIFF'].ewm(span=self.params[2]).mean()
self.stock_pd.loc[:, 'MACD'] = 2 * (self.stock_pd['DIFF'] - self.stock_pd['DEA'])
low_list = self.stock_pd['low'].rolling(9, min_periods=9).min()
low_list.fillna(value=self.stock_pd['low'].expanding().min(), inplace=True)
high_list = self.stock_pd['high'].rolling(9, min_periods=9).max()
high_list.fillna(value=self.stock_pd['high'].expanding().max(), inplace=True)
rsv = (self.stock_pd['close'] - low_list) / (high_list - low_list) * 100
self.stock_pd['k'] = pd.DataFrame(rsv).ewm(com=2).mean()
self.stock_pd['d'] = self.stock_pd['k'].ewm(com=2).mean()
self.stock_pd['j'] = 3 * self.stock_pd['k'] - 2 * self.stock_pd['d']
return self.stock_pd
def get_data_workflow(input_stock_names,output_stock_datas):
#1.获取csv下所有股票的数据并保留在文件夹下
stock_class = stock_data()
stocks_df = pd.read_excel(input_stock_names,
sheet_name='stock_names',
parse_dates=['起始时间','终止时间'],
date_parser=custom_date_parser)
#2.获取数据
for _,code,start_date,end_date,stock_name in stocks_df.itertuples():
tmp_stock_pds = stock_class.get_stock_basedata(code,start_date,end_date)
# 3.保存写入数据
tmp_stock_pds.to_excel(os.path.join(output_stock_datas, stock_name + '.xlsx'), index=False)
if __name__ == '__main__':
get_data_workflow(input_stock_names='input_datas/stock_names.xlsx',
output_stock_datas='output_datas/stock_datas')