4.最值
- max/min:返回一个数组中最大/最小元素
- argmax/argmin:返回一个数组中最大/最小元素的下标
- maximum/minimum:将两个同维数组中对应元素中最大/最小元素构成一个新的数组
- ptp:返回一个数组中最大值和最小值之差
代码:# -*- coding: utf-8 -*- from __future__ import unicode_literals import numpy as np # 产生9个介于[10, 100)区间的随机数 a = np.random.randint(10, 100, 9).reshape(3, 3) print(a) print(np.max(a), np.min(a), np.ptp(a)) print(np.argmax(a), np.argmin(a)) b = np.random.randint(10, 100, 9).reshape(3, 3) print(b) print(np.maximum(a, b), np.minimum(a, b), sep='\n') # a [[69 25 50] # [70 30 18] # [47 84 32]] # 84 18 66 # 7 5 # b [[38 71 71] # [82 34 42] # [39 50 85]] # a,b # [[69 71 71] # [82 34 42] # [47 84 85]] # [[38 25 50] # [70 30 18] # [39 50 32]]
# -*- coding: utf-8 -*- from __future__ import unicode_literals import numpy as np dates, highest_prices, lowest_prices = np.loadtxt( 'aapl.csv', delimiter=',', usecols=(1, 4, 5), dtype='U10, f8, f8', unpack=True) max_price = np.max(highest_prices) min_price = np.min(lowest_prices) print(min_price, '~', max_price) max_index = np.argmax(highest_prices) min_index = np.argmin(lowest_prices) print(dates[min_index], dates[max_index]) highest_ptp = np.ptp(highest_prices) lowest_ptp = np.ptp(lowest_prices) print(lowest_ptp, highest_ptp)
5.中位数
- 将多个样本按照大小排序,取中间位置的元素
10 20 30 40 50
^ (a[(5-1)/2]+a[5/2])/2
10 20 30 40 50 60
^ ^
\ /
平均 (a[(6-1)/2]+a[6/2])/2
(a[(s-1)/2]+a[s/2])/2
np.median(无序样本)->中位数# -*- coding: utf-8 -*- from __future__ import unicode_literals import numpy as np closing_prices = np.loadtxt( 'aapl.csv', delimiter=',', usecols=(6), unpack=True) size = closing_prices.size sorted_prices = np.msort(closing_prices) median = (sorted_prices[int((size - 1) / 2)] + sorted_prices[int(size / 2)]) / 2 print(median) median = np.median(closing_prices) print(median)
6.标准差
- 样本:[s1,s2,...,sn]
平均值:m = (s1+s2+...sn)/n
离差:D = [d1,d2,...,dn],di=si-m
离差方:Q = [q1,q2,...,qn],qi= di**2
总体方差:v = (q1+q2+...+qn)/n
总体标准差:s = sqrt(v),方均根
样本方差:v' = (q1+q2+...+qn)/(n-1)
样本标准差:s' = sqrt(v'),方均根
10
2 + 2 + ... + 2 = 20
10000
-------
10-8
numpy.std(S)->s
numpy.std(S, ddof=1)->s'
代码:# -*- coding: utf-8 -*- from __future__ import unicode_literals import numpy as np closing_prices = np.loadtxt( '../../data/aapl.csv', delimiter=',', usecols=(6), unpack=True) mean = np.mean(closing_prices) # 算数平均值 devs = closing_prices - mean # 离差 dsqs = devs ** 2 # 离差方 pvar = np.sum(dsqs) / dsqs.size # 总体方差 pstd = np.sqrt(pvar) # 总体标准差 svar = np.sum(dsqs) / (dsqs.size - 1) # 样本方差 sstd = np.sqrt(svar) # 样本标准差 print(pstd, sstd) pstd = np.std(closing_prices) # 总体标准差 sstd = np.std(closing_prices, ddof=1) # 样本标准差 print(pstd, sstd)
7.时间数据处理
- 按星期取平均值
代码:# -*- coding: utf-8 -*- from __future__ import unicode_literals import datetime as dt import numpy as np # 转换器函数:将日-月-年格式的日期字符串转换为星期 def dmy2wday(dmy): dmy = str(dmy, encoding='utf-8') date = dt.datetime.strptime( dmy, '%d-%m-%Y').date() wday = date.weekday() # 用0~6表示周一到周日 return wday wdays, closing_prices = np.loadtxt( '../../data/aapl.csv', delimiter=',', usecols=(1, 6), unpack=True, converters={1: dmy2wday}) print(wdays) ave_closing_prices = np.zeros(5) for wday in range(ave_closing_prices.size): ''' ave_closing_prices[wday] = np.take( closing_prices, np.where(wdays == wday)).mean() ''' ''' ave_closing_prices[wday] = \ closing_prices[np.where(wdays == wday)].mean() ''' ave_closing_prices[wday] = \ closing_prices[wdays == wday].mean() for wday, ave_closing_price in zip( ['MON', 'TUE', 'WED', 'THU', 'FRI'], ave_closing_prices): print(wday, np.round(ave_closing_price, 2))
- 按星期汇总数据
数组的轴向汇总
np.apply_along_axis(处理函数, 轴向, 数组)
沿着数组中所指定的轴向,调用处理函数,并将每次调用的返回值重新组织成数组返回。
代码:
sum.py# -*- coding: utf-8 -*- from __future__ import unicode_literals import numpy as np def foo(arg): print('foo:', arg) return arg.sum() a = np.arange(1, 10).reshape(3, 3) print(a) b = np.apply_along_axis(foo, 0, a) print(b) c = np.apply_along_axis(foo, 1, a) print(c)
# -*- coding: utf-8 -*- from __future__ import unicode_literals import datetime as dt import numpy as np def dmy2wday(dmy): dmy = str(dmy, encoding='utf-8') date = dt.datetime.strptime(dmy, '%d-%m-%Y').date() wday = date.weekday() return wday wdays, opening_prices, highest_prices, \ lowest_prices, closing_prices = np.loadtxt( '../../data/aapl.csv', delimiter=',', usecols=(1, 3, 4, 5, 6), unpack=True, converters={1: dmy2wday}) wdays = wdays[:16] opening_prices = opening_prices[:16] highest_prices = highest_prices[:16] lowest_prices = lowest_prices[:16] closing_prices = closing_prices[:16] # 第一个星期一的索引 first_monday = np.where(wdays == 0)[0][0] last_friday = np.where(wdays == 4)[0][-1] indices = np.arange(first_monday, last_friday + 1) indices = np.split(indices, 3) def week_summary(indices): opening_price = opening_prices[indices[0]] highest_price = highest_prices[indices].max() lowest_price = lowest_prices[indices].min() closing_price = closing_prices[indices[-1]] return opening_price, highest_price, \ lowest_price, closing_price summaries = np.apply_along_axis( week_summary, 1, indices) print(summaries) np.savetxt('../../data/summary.csv', summaries, delimiter=',', fmt='%g')
8.卷积
- 卷积积分
激励函数:g(t)
单位激励下的响应函数:f(t)
响应函数:g(t)f(t)dt - a = [1 2 3 4 5]
b = [6 7 8]
c = numpy.convolve(a,b,卷积类型)
40 61 82 - 有效卷积(valid)
19 40 61 82 67 - 同维卷积(same)
6 19 40 61 82 67 40 - 完全卷积(full)
0 0 1 2 3 4 5 0 0
8 7 6
8 7 6
8 7 6
8 7 6
8 7 6
8 7 6
8 7 6
代码:# -*- coding: utf-8 -*- from __future__ import unicode_literals import numpy as np a = np.array([1, 2, 3, 4, 5]) # 被卷积序列 b = np.array([6, 7, 8]) # 卷积核序列 print(a, b) c = np.convolve(a, b, 'full') # 完全卷积 print(c) d = np.convolve(a, b, 'same') # 同维卷积 print(d) e = np.convolve(a, b, 'valid') # 有效卷积 print(e)
9.移动均线
a b c d e f g h i j [1/5 1/5 1/5 1/5 1/5]
(a+b+c+d+e)/5
(b+c+d+e+f)/5
(c+d+e+f+g)/5
...
(f+g+h+i+j)/5
[A, B, C, D, E]
S=A+B+C+D+E
(aA+bB+cC+dD+eE)/S
aA/S+bB/S+cC/S+dD/S+eE/S
[A/S, B/S, C/S, D/S, E/S]
代码
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import datetime as dt
import numpy as np
import matplotlib.pyplot as mp
import matplotlib.dates as md
def dmy2ymd(dmy):
dmy = str(dmy, encoding='utf-8')
date = dt.datetime.strptime(dmy, '%d-%m-%Y').date()
ymd = date.strftime('%Y-%m-%d')
return ymd
dates, closing_prices = np.loadtxt(
'../../data/aapl.csv', delimiter=',',
usecols=(1, 6), unpack=True,
dtype='M8[D], f8', converters={1: dmy2ymd})
sma51 = np.zeros(closing_prices.size - 4)
for i in range(sma51.size):
sma51[i] = closing_prices[i:i + 5].mean()
sma52 = np.convolve(
closing_prices, np.ones(5) / 5, 'valid')
sma10 = np.convolve(
closing_prices, np.ones(10) / 10, 'valid')
mp.figure('Simple Moving Average',
facecolor='lightgray')
mp.title('Simple Moving Average', fontsize=20)
mp.xlabel('Date', fontsize=14)
mp.ylabel('Price', fontsize=14)
ax = mp.gca()
# 设置水平坐标每个星期一为主刻度
ax.xaxis.set_major_locator(md.WeekdayLocator(
byweekday=md.MO))
# 设置水平坐标每一天为次刻度
ax.xaxis.set_minor_locator(md.DayLocator())
# 设置水平坐标主刻度标签格式
ax.xaxis.set_major_formatter(md.DateFormatter(
'%d %b %Y'))
mp.tick_params(labelsize=10)
mp.grid(linestyle=':')
dates = dates.astype(md.datetime.datetime)
mp.plot(dates, closing_prices, c='lightgray',
label='Closing Price')
mp.plot(dates[4:], sma51, c='orangered',
label='SMA-5(1)')
mp.plot(dates[4:], sma52, c='limegreen', alpha=0.5,
linewidth=6, label='SMA-5(2)')
mp.plot(dates[9:], sma10, c='dodgerblue',
label='SMA-10')
mp.legend()
mp.gcf().autofmt_xdate()
mp.show()
ema.py
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import datetime as dt
import numpy as np
import matplotlib.pyplot as mp
import matplotlib.dates as md
def dmy2ymd(dmy):
dmy = str(dmy, encoding='utf-8')
date = dt.datetime.strptime(dmy, '%d-%m-%Y').date()
ymd = date.strftime('%Y-%m-%d')
return ymd
dates, closing_prices = np.loadtxt(
'../../data/aapl.csv', delimiter=',',
usecols=(1, 6), unpack=True,
dtype='M8[D], f8', converters={1: dmy2ymd})
weights = np.exp(np.linspace(-1, 0, 5))
weights /= weights.sum()
ema5 = np.convolve(
closing_prices, weights[::-1], 'valid')
weights = np.exp(np.linspace(-1, 0, 10))
weights /= weights.sum()
ema10 = np.convolve(
closing_prices, weights[::-1], 'valid')
mp.figure('Exponential Moving Average',
facecolor='lightgray')
mp.title('Exponential Moving Average', fontsize=20)
mp.xlabel('Date', fontsize=14)
mp.ylabel('Price', fontsize=14)
ax = mp.gca()
# 设置水平坐标每个星期一为主刻度
ax.xaxis.set_major_locator(md.WeekdayLocator(
byweekday=md.MO))
# 设置水平坐标每一天为次刻度
ax.xaxis.set_minor_locator(md.DayLocator())
# 设置水平坐标主刻度标签格式
ax.xaxis.set_major_formatter(md.DateFormatter(
'%d %b %Y'))
mp.tick_params(labelsize=10)
mp.grid(linestyle=':')
dates = dates.astype(md.datetime.datetime)
mp.plot(dates, closing_prices, c='lightgray',
label='Closing Price')
mp.plot(dates[4:], ema5, c='orangered',
label='EMA-5')
mp.plot(dates[9:], ema10, c='dodgerblue',
label='EMA-10')
mp.legend()
mp.gcf().autofmt_xdate()
mp.show()
10.布林带
- 中轨:移动平均线
- 上轨:中轨+2x标准差
- 下轨:中轨-2x标准差
代码:# -*- coding: utf-8 -*- from __future__ import unicode_literals import datetime as dt import numpy as np import matplotlib.pyplot as mp import matplotlib.dates as md def dmy2ymd(dmy): dmy = str(dmy, encoding='utf-8') date = dt.datetime.strptime(dmy, '%d-%m-%Y').date() ymd = date.strftime('%Y-%m-%d') return ymd dates, closing_prices = np.loadtxt( 'aapl.csv', delimiter=',', usecols=(1, 6), unpack=True, dtype='M8[D], f8', converters={1: dmy2ymd}) weights = np.exp(np.linspace(-1, 0, 5)) weights /= weights.sum() medios = np.convolve( closing_prices, weights[::-1], 'valid') stds = np.zeros(medios.size) for i in range(stds.size): stds[i] = closing_prices[i:i + 5].std() stds *= 2 lowers = medios - stds uppers = medios + stds mp.figure('Exponential Moving Average', facecolor='lightgray') mp.title('Exponential Moving Average', fontsize=20) mp.xlabel('Date', fontsize=14) mp.ylabel('Price', fontsize=14) ax = mp.gca() # 设置水平坐标每个星期一为主刻度 ax.xaxis.set_major_locator(md.WeekdayLocator( byweekday=md.MO)) # 设置水平坐标每一天为次刻度 ax.xaxis.set_minor_locator(md.DayLocator()) # 设置水平坐标主刻度标签格式 ax.xaxis.set_major_formatter(md.DateFormatter( '%d %b %Y')) mp.tick_params(labelsize=10) mp.grid(linestyle=':') dates = dates.astype(md.datetime.datetime) mp.plot(dates, closing_prices, c='lightgray', label='Closing Price') mp.plot(dates[4:], medios, c='dodgerblue', label='Medio') mp.plot(dates[4:], lowers, c='limegreen', label='Lower') mp.plot(dates[4:], uppers, c='orangered', label='Upper') mp.legend() mp.gcf().autofmt_xdate() mp.show()
11.线性模型
- 1 2 3 4 5
60 65 70 75 <80>
- 线性预测
a b c d e f
aA+bB+cC=d \
bA+cB+dC=e > -> A B C
cA+dB+eC= f /
dA+eB+fC -> ?
/ a b c \ / A \ / d \
| b c d | X | B | = | e |
\ c d e / \ C / \ f /
--------- ----- ----
a x b
= np.linalg.lstsq(a, b)[0]代码:
# -*- coding: utf-8 -*- from __future__ import unicode_literals import datetime as dt import numpy as np import pandas as pd import matplotlib.pyplot as mp import matplotlib.dates as md def dmy2ymd(dmy): dmy = str(dmy, encoding='utf-8') date = dt.datetime.strptime(dmy, '%d-%m-%Y').date() ymd = date.strftime('%Y-%m-%d') return ymd dates, closing_prices = np.loadtxt( '../../data/aapl.csv', delimiter=',', usecols=(1, 6), unpack=True, dtype='M8[D], f8', converters={1: dmy2ymd}) N = 5 pred_prices = np.zeros( closing_prices.size - 2 * N + 1) for i in range(pred_prices.size): a = np.zeros((N, N)) for j in range(N): a[j, ] = closing_prices[i + j:i + j + N] b = closing_prices[i + N:i + N * 2] x = np.linalg.lstsq(a, b)[0] pred_prices[i] = b.dot(x) mp.figure('Linear Prediction', facecolor='lightgray') mp.title('Linear Prediction', fontsize=20) mp.xlabel('Date', fontsize=14) mp.ylabel('Price', fontsize=14) ax = mp.gca() # 设置水平坐标每个星期一为主刻度 ax.xaxis.set_major_locator(md.WeekdayLocator( byweekday=md.MO)) # 设置水平坐标每一天为次刻度 ax.xaxis.set_minor_locator(md.DayLocator()) # 设置水平坐标主刻度标签格式 ax.xaxis.set_major_formatter(md.DateFormatter( '%d %b %Y')) mp.tick_params(labelsize=10) mp.grid(linestyle=':') dates = dates.astype(md.datetime.datetime) mp.plot(dates, closing_prices, 'o-', c='lightgray', label='Closing Price') dates = np.append(dates, dates[-1] + pd.tseries.offsets.BDay()) mp.plot(dates[2 * N:], pred_prices, 'o-', c='orangered', linewidth=3, label='Predicted Price') mp.legend() mp.gcf().autofmt_xdate() mp.show()
- 线性拟合
kx + b = y
kx1 + b = y1
kx2 + b = y2
...
kxn + b = yn
/ x1 1 \ / k \ / y1 \
| x2 1 | X | b | = | y2 |
| ... | \ / | ... |
\ xn 1/ \ yn /
a x b
= np.linalg.lstsq(a, b)[0]