常用函数
# In[64]:
i2 = np.eye(2)
# In[65]:
i2
# Out[65]:
array([[ 1., 0.],
[ 0., 1.]])
# In[66]:
np.savetxt('eye.txt',i2)
# In[67]:
#导入CSV文件
#usercols参数为元组,获取索引对应的字段数据
#unpack 设置为True,分拆存储不同列的数据,即分别将收盘价和成交量数组赋值给变量c,v
c,v = np.loadtxt('data/data.csv',delimiter=',',usecols=(6,7),unpack=True)
# In[68]:
#成交量加权平均价格vwap
vwap = np.average(c,weights=v)
print "VWAP=",vwap
# In[69]:
#算数平均函数
print "mean = ",np.mean(c)
# In[70]:
#时间加权平均价格
t = np.arange(len(c))
print "TWAP = ",np.average(c,weights=t)
# In[71]:
h,l = np.loadtxt('data/data.csv',delimiter=',',usecols=(4,5),unpack=True)
# In[72]:
print "highest = ",np.max(h)
print "lowest = ",np.min(l)
# In[73]:
#返回数组取值范围 ptp 即数组最大值与最小值之差
print "Spread high price",np.ptp(h)
print "Spread high price",np.ptp(l)
# In[74]:
#计算收盘价的中位数median
c = np.loadtxt('data/data.csv',delimiter=",",usecols=(6,),unpack=True)
print "median = ",np.median(c)
# In[75]:
#校验median
sorted_close = np.msort(c)
print "sorted =",sorted_close
n = len(c)
middle = (sorted_close[n/2]+sorted_close[(n-1)/2])/2
print middle
# In[76]:
#方差
print "收盘价方差 =",np.var(c)
# In[77]:
#校验方差
print "校验的方差 = ",np.mean((c - np.mean(c))**2)
股票收益率 主要关注 收益率的方差或标准差,这代表着投资风险的大小
# In[78]:
#diff函数返回一个由相邻数组元素的差值组成的数组
returns = np.diff(c)/c[:-1]
# In[79]:
#收益率标准差
print "收益率标准差 =",np.std(returns)
# In[80]:
#对数收益率
logreturns = np.diff(np.log(c))
# In[81]:
#输出数组中所有正值元素的索引
print np.where(returns>0)
# In[82]:
#年波动率等于对数收益率的标准差除以其均值,再除以交易日倒数的平方根(股票年交易日按252天算)
annual_volatility = np.std(logreturns)/np.mean(logreturns)
annual_volatility = annual_volatility/np.sqrt(1/252.)
print annual_volatility
# In[83]:
#月波动率
print "月波动率 =",annual_volatility/np.sqrt(1/12.)
日期分析
# In[84]:
#numpy是面向浮点数计算的,首先将日期转换为数字
def datestr2num(s):
return datetime.datetime.strptime(s,'%d-%m-%Y').date().weekday()
# In[85]:
dates,close = np.loadtxt('data/data.csv',delimiter=',',usecols=(1,6),converters={1:datestr2num},unpack=True)
# In[86]:
#周一:0,周二:1。。。周日:6
print dates
# Out[86]
[ 4. 0. 1. 2. 3. 4. 0. 1. 2. 3. 4. 0. 1. 2. 3. 4. 1. 2.
3. 4. 0. 1. 2. 3. 4. 0. 1. 2. 3. 4.]
# In[87]:
#where函数可以获取满足条件的元素的索引值,take可以按照索引从数组中取出响应的元素
averages = np.zeros(5)
for i in range(5):
indices = np.where(dates == i)
prices = np.take(close,indices)
avg = np.mean(prices)
print "Day ",i,"prices ",prices,"Average ",avg
averages[i] = avg
# In[88]:
print averages
# In[89]:
#获取平均收盘价最高和最低的工作日
top = np.max(averages)
print "Highest avreage:",top
print "Top day of the week ",np.argmax(averages)
bottom = np.min(averages)
print "Lowest average",bottom
print "Bottom day of the week ",np.argmin(averages)
周汇总 ☆☆☆
# In[90]:
dates,open1,high,low,close = np.loadtxt('data/data.csv',delimiter=',',usecols=(1,3,4,5,6),converters={1:datestr2num},unpack=True)
# In[91]:
close = close[:16]
dates = dates[:16]
# In[92]:
first_monday = np.ravel(np.where(dates == 0))[0]
print "第一个周一的索引是 ",first_monday
# Out[92]:
第一个周一的索引是 1
# In[93]:
last_friday = np.ravel(np.where(dates == 4))[-1]
print "最后一个周五的索引是 ",last_friday
# Out[93]:
最后一个周五的索引是 15
# In[94]:
weeks_indices = np.arange(first_monday,last_friday+1)
print "weeks indices initial",weeks_indices
# Out[94]:
weeks indices initial [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
# In[95]:
#split函数将数组切分为3组
weeks_indice = np.split(weeks_indices,3)
print weeks_indice
# In[96]:
#编写函数为每周数据返回一个元组
def summarize(a,o,h,l,c):
monday_open = o[a[0]]
week_high = np.max(np.take(h,a))
week_low = np.min(np.take(l,a))
friday_close = c[a[-1]]
return ("APPL",monday_open,week_high,week_low,friday_close)
# In[97]:
weeksummary = np.apply_along_axis(summarize,1,weeks_indice,open1,high,low,close)
# In[98]:
print weeksummary
# Out[98]:
[['APPL' '335.8' '346.7' '334.3' '346.5']
['APPL' '347.8' '360.0' '347.6' '356.8']
['APPL' '356.7' '364.9' '349.5' '350.5']]
# In[99]:
#将数据保存至文件
np.savetxt("weeksummary1.csv",weeksummary,delimiter=',',fmt='%s')