指数通常反映了一个行业或者一类股票的行情数据。本文将对697支指数的历史年涨幅进行分析,为量化投资作一个参考。
1、数据准备
本文程序中用到两个数据:
(1)index20210703.csv:存储了697支指数代码。
(2)2010-2021全部指数K线数据:存储了697支指数的历史K线数据。
指数历史数据获取方式在文末给出。
2、获取某一指数指定时间段内的历史数据
#获取某一指数指定时间段内的历史数据
def y_get_price(stockfile, start_date, end_date):
start_date_time = datetime.strptime(start_date, '%Y-%m-%d')
end_date_time = datetime.strptime(end_date, '%Y-%m-%d')
df_data = pd.read_csv(stockfile)
if date_str_compare(start_date, df_data.iloc[0, 0]) == 0:
start_date = df_data.iloc[0, 0]
if date_str_compare(start_date, end_date) != 0:
end_date = df_data.iloc[-1, 0]
if date_str_compare(start_date, end_date) != 0:
sys.exit('[INFO] 日期范围有误,请更正。')
while start_date not in df_data.iloc[:, 0].values:
start_date_time += timedelta(days=1)
start_date = start_date_time.strftime('%Y-%m-%d')
start_index = df_data[df_data.iloc[:, 0] == start_date].index[0]
while end_date not in df_data.iloc[:, 0].values:
end_date_time -= timedelta(days=1)
end_date = end_date_time.strftime('%Y-%m-%d')
end_index = df_data[df_data.iloc[:, 0] == end_date].index[0] + 1
df_res = df_data.iloc[start_index:end_index, :]
return df_res
3、获取指数历史年涨幅
#获取指数历史年涨幅
def year_increase_ratio(stockfile):
res = {}
df_data = pd.read_csv(stockfile)
start_date = df_data.iloc[0, 0]
start_date_list = start_date.split('-')
if '01' != start_date_list[1]:
start_date = str(int(start_date_list[0])+1) + '-01-01'
end_date = df_data.iloc[-1, 0]
end_date_list = end_date.split('-')
if '12' != start_date_list[1]:
end_date = str(int(end_date_list[0])-1) + '-12-31'
if date_str_compare(start_date, end_date) !=0:
return False, '[INFO] 所查询指数数据量未达到一个自然年。'
start_year = int(start_date.split('-')[0])
end_year = int(end_date.split('-')[0]) + 1
for year in range(start_year, end_year):
start_date = str(year) + '-01-01'
end_date = str(year) + '-12-31'
df_tmp = y_get_price(stockfile, start_date, end_date)
ratio = round((df_tmp.iloc[-1, 2] - df_tmp.iloc[0, 1]) / df_tmp.iloc[0, 1] * 100, 2)
res[year] = ratio
return True, res
4、全部代码
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 4 11:05:02 2021
@author: Administrator
"""
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
#比较两个时间字符串的先后顺序
def date_str_compare(date1, date2):
date1 = list(map(int, date1.split('-')))
date2 = list(map(int, date2.split('-')))
if date1[0] < date2[0]:
return 0
elif date1[0] > date2[0]:
return 1
elif date1[1] < date2[1]:
return 0
elif date1[1] > date2[1]:
return 1
elif date1[2] < date2[2]:
return 0
elif date1[2] > date2[2]:
return 1
else:
return 2
#获取某一指数指定时间段内的历史数据
def y_get_price(stockfile, start_date, end_date):
start_date_time = datetime.strptime(start_date, '%Y-%m-%d')
end_date_time = datetime.strptime(end_date, '%Y-%m-%d')
df_data = pd.read_csv(stockfile)
if date_str_compare(start_date, df_data.iloc[0, 0]) == 0:
start_date = df_data.iloc[0, 0]
if date_str_compare(start_date, end_date) != 0:
end_date = df_data.iloc[-1, 0]
if date_str_compare(start_date, end_date) != 0:
sys.exit('[INFO] 日期范围有误,请更正。')
while start_date not in df_data.iloc[:, 0].values:
start_date_time += timedelta(days=1)
start_date = start_date_time.strftime('%Y-%m-%d')
start_index = df_data[df_data.iloc[:, 0] == start_date].index[0]
while end_date not in df_data.iloc[:, 0].values:
end_date_time -= timedelta(days=1)
end_date = end_date_time.strftime('%Y-%m-%d')
end_index = df_data[df_data.iloc[:, 0] == end_date].index[0] + 1
df_res = df_data.iloc[start_index:end_index, :]
return df_res
#获取指数历史年涨幅
def year_increase_ratio(stockfile):
res = {}
df_data = pd.read_csv(stockfile)
start_date = df_data.iloc[0, 0]
start_date_list = start_date.split('-')
if '01' != start_date_list[1]:
start_date = str(int(start_date_list[0])+1) + '-01-01'
end_date = df_data.iloc[-1, 0]
end_date_list = end_date.split('-')
if '12' != start_date_list[1]:
end_date = str(int(end_date_list[0])-1) + '-12-31'
if date_str_compare(start_date, end_date) !=0:
return False, '[INFO] 所查询指数数据量未达到一个自然年。'
start_year = int(start_date.split('-')[0])
end_year = int(end_date.split('-')[0]) + 1
for year in range(start_year, end_year):
start_date = str(year) + '-01-01'
end_date = str(year) + '-12-31'
df_tmp = y_get_price(stockfile, start_date, end_date)
ratio = round((df_tmp.iloc[-1, 2] - df_tmp.iloc[0, 1]) / df_tmp.iloc[0, 1] * 100, 2)
res[year] = ratio
return True, res
#作图指数历史年涨幅
def single_plot(yir):
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
x = list(yir.keys())
y = list(yir.values())
y_cum = np.cumsum(np.array(y))
plt.plot(x, y, marker='.', color='blue', linewidth=1.0, label='历史涨幅(%)')
plt.plot(x, y_cum, marker='.', color='green', linewidth=1.0, label='累计涨幅(%)')
plt.legend()
plt.show()
#获取年份涨幅比例大于零的比例
def increase_times_ratio(yir):
x = list(yir.values())
return round(np.count_nonzero(np.array(x) > 0)/len(x), 2)
#按从大到小排序,选出前k个
def max_topk(itr_list, k):
sortedid = sorted(range(len(itr_list)), key=lambda k: itr_list[k], reverse=True)
return sortedid[:k]
if __name__ == '__main__':
yir_list = []
itr_list = []
df_data = pd.read_csv('index20210703.csv')
df_index = df_data.iloc[:, 0].tolist()
df_display_name = df_data.iloc[:, 1].tolist()
for i in range(1, len(df_index)):
stockfile = '../../Data/2010-2021全部指数K线数据/' + df_index[i] + '.csv'
flag, yir = year_increase_ratio(stockfile)
if not flag:
continue
yir_list.append(yir)
itr = increase_times_ratio(yir)
itr_list.append(itr)
if (i+1) % 50 == 0:
print('[INFO] {}/{} processed.'.format(i+1, len(df_index)))
k = 3
mt = max_topk(itr_list, k)
print('[INFO] 涨幅次数最多的前{}个指数依次是:'.format(k))
for i, m in enumerate(mt):
print('{}、 {}, 涨幅次数比例: {}。'.format(i+1, df_display_name[m], itr_list[m]))
single_plot(yir_list[m])
5、数据获取方式
(1)index20210703.csv:关注"量化之窗"公众号,并输入zsdm”。
(2)2010-2021全部指数K线数据:关注"量化之窗"公众号,并输入“zskxsj”。
(3)获取涨幅次数最多的前10个指数的名称:关注"量化之窗"公众号,并输入“zstop10”
如有疑问,请在文章下方留言。