numpy_basic2
# 六、numpy的常用函数
1. 读取文件
- 逻辑上可被解释为二维数组的文本文件:
数据项1<分隔符>数据项2<分隔符>...<分隔符>数据项n
numpy.loadtxt(
文件路径,
delimiter=分隔符(缺省一个空格),
usecols=列序列(缺省所有列),
unpack=是否展开列(缺省False), # 返回一个二维数组(False)或多个一维数组(True)dtype=元素类型(缺省float), converters=转换器字典(缺省不做转换))
# k.py
import numpy as np
import datetime as dt
import matplotlib.pyplot as mp
import matplotlib.dates as md
def dmy2ymd(dmy):
dmy = str(dmy, encoding='utf-8')
date = dt.datetime.strptime(dmy, '%d-%m-%Y').date()
ymd = date.strftime('%Y-%m-%d')
return ymd
dates, opening_prices, highest_prices, lowest_prices, closing_prices \
= np.loadtxt('../data/aapl.csv', delimiter=',', usecols=(1, 3, 4, 5, 6), unpack=True, dtype='M8[D], f8, f8, f8, f8', \
converters={1: dmy2ymd})
mp.figure('Candlestick', dpi=150, facecolor='lightgray')
mp.title('Candlestick', fontsize=20)
mp.xlabel('Date', fontsize=14)
mp.ylabel('Price', fontsize=14)
ax = mp.gca()
# 设置水平坐标每个星期一为主刻度
ax.xaxis.set_major_locator(md.WeekdayLocator(byweekday=md.MO))
# 设置每一天为次刻度
ax.xaxis.set_minor_locator(md.DayLocator())
# 设置水平坐标主刻度标签格式
ax.xaxis.set_major_formatter(md.DateFormatter('%d %b %Y'))
mp.tick_params(labelsize=10)
mp.grid(linestyle=':')
dates = dates.astype(md.datetime.datetime)
rise = closing_prices - opening_prices >= 0.01 # 阳线掩码,为bool数组
fall = opening_prices - closing_prices >= 0.01 # 阴险掩码
# 填充色
fc = np.zeros(dates.size, dtype='3f4')
fc[rise], fc[fall] = (1, 1, 1), (0, 0.5, 0) # 设置红色和浅绿色
# 边缘色
ec = np.zeros(dates.size, dtype='3f4')
ec[rise], ec[fall] = (1, 0, 0), (0, 0.5, 0) # 设置红色和浅绿色
mp.bar(dates, highest_prices - lowest_prices, 0.01, lowest_prices, color=fc, edgecolor=ec)
mp.bar(dates, closing_prices - opening_prices, 0.6, opening_prices, color=fc, edgecolor=ec)
# 自动调整水平坐标轴的日期标签
mp.gcf().autofmt_xdate()
mp.show()
2. 算数平均值
- 样本:
S = [s1, s2, ..., sn]
- 算数平均值:
m = (s1+s2+...+sn) / n
测 误差
s1 = s+d1
s2 = s+d2
...
sn = s+dn
m =(s1+s2+...+sn) / n
=(s+s+...+s) / n + (d1+d2+...+dn)/n
=s + (d1+d2+...+dn)/n
n->oo - 算数平均值表示对真值的无偏估计。
numpy.mean(S)->m
# mean.py
import numpy as np
def dmy2ymd(dmy):
dmy = str(dmy, encoding='utf-8')
date = dt.datetime.strptime(dmy, '%d-%m-%Y').date()
ymd = date.strftime('%Y-%m-%d')
return ymd
dates, opening_prices, highest_prices, lowest_prices, closing_prices \
= np.loadtxt('../data/aapl.csv', delimiter=',', usecols=(1, 3, 4, 5, 6), unpack=True, dtype='M8[D], f8, f8, f8, f8', \
converters={1: dmy2ymd})
mean = 0
for cp in closing_prices:
mean += cp
mean /= closing_prices.size
print(mean)
mean = np.mean(closing_prices)
print(mean)
351.03766666666667
351.0376666666667
3. 加权平均值
- 样本:
S = [s1, s2, ..., sn]
- 权重:
W = [w1, w2, ..., wn]
- 加权平均值:
a = (s1w1+s2w2+...+snwn)/(w1+w2+...+wn)
numpy.average(S, weights=W)->a - VWAP - 成交量加权平均价格
- TWAP - 时间加权平均价格
# VWAP.py
import numpy as np
closing_prices, volumes = np.loadtxt('../data/aapl.csv', delimiter=',', usecols=(6, 7), unpack=True)
# 原始计算方法
vwap, wsum = 0, 0
for closing_price, volume in zip(closing_prices, volumes):
vwap += closing_price * volume
wsum += volume
vwap /= wsum
print(vwap)
vwap = np.average(closing_prices, weights=volumes)
print(vwap)
350.5895493532009
350.5895493532009
# TWAP.py
import numpy as np
import datetime as dt
def dmy2days(dmy):
dmy = str(dmy, encoding='utf-8')
date = dt.datetime.strptime(dmy, '%d-%m-%Y').date()
days = (date - dt.date.min).days
return days
dates, closing_prices = np.loadtxt('../data/aapl.csv', delimiter=',', usecols=(1, 6), unpack=True,
converters={1:dmy2days})
twap = np.average(closing_prices, weights=dates)
print(twap)
351.0377051146597
4. 最值
- max/min:返回一个数组中最大/最小元素
- argmax/argmin:返回一个数组中最大/最小元素的下标
- maximum/minimum:将两个同维数组中对应位置元素中最大/最小元素构成一个新的数组
- ptp:返回一个数组中最大值和最小值之