import tushare as ts
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import tqdm
%matplotlib inline
plt.rcParams['figure.figsize'] = (20.0, 15.0) # set default size of plots
#plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
# 支持中文
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False
class Storage:
"""存储类:用来存储或读取拉取过的分时数据,节省以后再次使用时的运行时间"""
def __init__(self,code_date_ticks={}):
self._code_date_ticks=code_date_ticks # format {code_date:ticks}
def set_ticks(self,code_date='',ticks=None):
self._code_date_ticks[code_date]=ticks
def get_ticks(self,code_date=''):
if code_date not in self._code_date_ticks:
return False
return self._code_date_ticks[code_date]
def dump(self,ticks_file_name=None):
"""ticks_file_name:str like ticks file name./nFuntional:object cache dumps to file"""
f=open(ticks_file_name,'wb')
pickle.dump(self._code_date_ticks,f)
f.close()
def load(self,ticks_file_name=None):
"""ticks_file_name:str like ticks file name./nFunctional: local cache loads to object."""
f=open(ticks_file_name,'rb')
self._code_date_ticks=pickle.load(f)
f.close()
def show_storage(self):
return self._code_date_ticks
class Correlation:
"""计算类:用于计算每日分时相关性,组成逐日的序列,并进行曲线平滑处理"""
def __init__(self):
self._legal_dates=[] # for plot purepose
self._codes_info=''
self._corrs=[]
self._cache=Storage()
def compute_day_corr(self,codeA='',codeB='',date=''):
self._codes_info=codeA+'_'+codeB
combA=codeA+'_'+date
combB=codeB+'_'+date
##--------------------------------
if self._cache.get_ticks(combA) is False:
#print('New Record:%s'%combA)
A=ts.get_tick_data(codeA,date,src='tt') # call API
if A is None:return False # skip Null resource
A_price=A['price']
self._cache.set_ticks(combA,A_price) # pd.series
else:
#print('Read Record:%s'%combA)
A_price=self._cache.get_ticks(combA)
##--------------------------------
if self._cache.get_ticks(combB) is False:
#print('New Record:%s'%combB)
B=ts.get_tick_data(codeB,date,src='tt') # call API
if B is None:return False # skip Null resource
B_price=B['price']
self._cache.set_ticks(combB,B_price) # pd.series
else:
#print('Read Record:%s'%combB)
B_price=self._cache.get_ticks(combB)
##---------------------------------
self._legal_dates.append(date) # only legal date comes here
comb=pd.DataFrame({codeA:A_price,codeB:B_price}) #in DataFrame format for corr. compute purpose
return comb.corr().iloc[0,1]
def compute_corrs_along_day(self,codeA='',codeB='',dates=None):
"""target:str --target code\nbase:str --base code\ndates:sequens"""
self._legal_dates=[] # refresh legal dates for each call
corrs_along_day=[]
print('Processing%s_%s...'%(codeA,codeB),end='*')
for d in tqdm.tqdm(dates):
corr=self.compute_day_corr(codeA,codeB,d)
if corr is not False:
corrs_along_day.append(corr)
#smooth curve
self._corrs=self.smooth_corrs(corrs_along_day)
return self._legal_dates,self._corrs
def smooth_corrs(self,lis=[]):
"""Exponentially weighted averges"""
v,v_lis,beta=0,[],0.9
for i in range(1,len(lis)+1):
v=beta*v+(1-beta)*lis[i-1]
v_bias=v/(1-beta**i)
v_lis.append(v_bias)
return v_lis
def plot_corrs(self):
codeA,codeB=self._codes_info.split('_')
plt.plot(self._corrs,label='%s-%sCorr.'%(codeA,codeB))
plt.legend(loc='best', shadow=True, fontsize='x-large')
plt.grid()
plt.xlabel('date',fontsize='x-large')
plt.ylabel('Corr.',fontsize='x-large')
plt.xticks(range(len(self._legal_dates)),self._legal_dates,rotation=45,fontproperties='STKAITI')
plt.title('Corrs. Curve by day with Exponentially Weighted Averges',fontsize='x-large')
plt.savefig('corrs_%s.png'%self._codes_info)
def call_dump(self,ticks_file_name=None):
self._cache.dump(ticks_file_name)
def call_load(self,ticks_file_name=None):
self._cache.load(ticks_file_name)
def call_storage(self):
return self._cache.show_storage()