11.线性模拟
- 线性预测
- 线性拟合
kx + b = y
kx1 + b = y1
kx2 + b = y2
...
kxn + b = yn
/ x1 1 \ / k \ / y1 \
| x2 1 | X | b | = | y2 |
| ... | \ / | ... |
\ xn 1/ \ yn /
a x b
= np.linalg.lstsq(a, b)[0]
y = kx + b
kx1 + b = y1' - y1
kx2 + b = y2' - y2
...
kxn + b = yn' - yn
[y1 - (kx1 + b)]^2 +
[y2 - (kx2 + b)]^2 + ... +
[yn - (kxn + b)]^2 = loss = f(k, b)
k, b? -> loss ->min
代码:# -*- coding: utf-8 -*- from __future__ import unicode_literals import datetime as dt import numpy as np import matplotlib.pyplot as mp import matplotlib.dates as md def dmy2ymd(dmy): dmy = str(dmy, encoding='utf-8') date = dt.datetime.strptime( dmy, '%d-%m-%Y').date() ymd = date.strftime('%Y-%m-%d') return ymd dates, opening_prices, highest_prices, \ lowest_prices, closing_prices = np.loadtxt( '../../data/aapl.csv', delimiter=',', usecols=(1, 3, 4, 5, 6), unpack=True, dtype='M8[D], f8, f8, f8, f8', converters={1: dmy2ymd}) trend_points = (highest_prices + lowest_prices + closing_prices) / 3 spreads = highest_prices - lowest_prices resistance_points = trend_points + spreads support_points = trend_points - spreads days = dates.astype(int) a = np.column_stack((days, np.ones_like(days))) x = np.linalg.lstsq(a, trend_points)[0] trend_line = days * x[0] + x[1] x = np.linalg.lstsq(a, resistance_points)[0] resistance_line = days * x[0] + x[1] x = np.linalg.lstsq(a, support_points)[0] support_line = days * x[0] + x[1] mp.figure('Trend', facecolor='lightgray') mp.title('Trend', fontsize=20) mp.xlabel('Date', fontsize=14) mp.ylabel('Price', fontsize=14) ax = mp.gca() ax.xaxis.set_major_locator(md.WeekdayLocator( byweekday=md.MO)) ax.xaxis.set_minor_locator(md.DayLocator()) ax.xaxis.set_major_formatter(md.DateFormatter( '%d %b %Y')) mp.tick_params(labelsize=10) mp.grid(linestyle=':') dates = dates.astype(md.datetime.datetime) rise = closing_prices - opening_prices >= 0.01 fall = opening_prices - closing_prices >= 0.01 fc = np.zeros(dates.size, dtype='3f4') ec = np.zeros(dates.size, dtype='3f4') fc[rise], fc[fall] = (1, 1, 1), (0.85, 0.85, 0.85) ec[rise], ec[fall] = (0.85, 0.85, 0.85), (0.85, 0.85, 0.85) mp.bar(dates, highest_prices - lowest_prices, 0, lowest_prices, color=fc, edgecolor=ec) mp.bar(dates, closing_prices - opening_prices, 0.8, opening_prices, color=fc, edgecolor=ec) mp.scatter(dates, trend_points, c='dodgerblue', alpha=0.5, s=60, zorder=2) mp.plot(dates, trend_line, c='dodgerblue', linewidth=3, label='Trend') mp.scatter(dates, resistance_points, c='orangered', alpha=0.5, s=60, zorder=2) mp.plot(dates, resistance_line, c='orangered', linewidth=3, label='Resistance') mp.scatter(dates, support_points, c='limegreen', alpha=0.5, s=60, zorder=2) mp.plot(dates, support_line, c='limegreen', linewidth=3, label='Support') mp.legend() mp.gcf().autofmt_xdate() mp.show()
12.裁剪,压缩和累乘
- ndarray.clip(min=下限,max=上限)
将调用数组中小于和大于下限和上限的元素替换为下限和上限,返回裁剪后的数组,调用数组保持不变 - ndarray.compress(条件)
返回由调用数组中满足条件的元素组成的新数组 - ndarray.prod()
返回调用数组中所有元素的乘积 ------累乘 - ndarray.cumprod()
返回调用数组中所有元素执行累乘的过程数组
代码
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import numpy as np
a = np.array([10, 20, 30, 40, 50])
print(a)
b = a.clip(min=15, max=45)
print(b)
c = a.compress((15 <= a) & (a <= 45))
print(c)
d = a.prod()
print(d)
e = a.cumprod()
print(e)
def jiecheng(n):
return n if n == 1 else n * jiecheng(n - 1)
n = 5
print(jiecheng(n))
jc = 1
for i in range(2, n + 1):
jc *= i
print(jc)
print(np.arange(2, n + 1).prod())
13.协方差,相关矩阵和相关系数
- 样本:
A = [a1,a2,...,an]
B = [b1,b2,...,bn] - 平均值:
ave_a = (a1+a2+...+an)/n
ave_b = (b1+b2+...+bn)/m - 离差
dev_a = [a1,a2,...,an]-ave_a
dev_b = [b1,b2,...,bn]-ave_b - 方差
var_a = ave(dev_a * dev_a)
var_b = ave(dev_b * dev_b) - 标准差
std_a = sqrt(var_a)
std_b = sqrt(var_b)
............................... - 协方差
cov_ab = ave(dev_a * dev_b)
cov_ba = ave(dev_b * dev_a) - 相关系数
cov_ab/(std_a * std_b) = cov_ba/(std_b * std_a)
[-1 , 1]
-1 <------------->0<------------->1
反相关 不相关 正相关
<-反相关性增强- -相关性增强-> - 相关矩阵
/ \
| var_a/(std_a x std_a) cov_ab/(std_a x std_b) |
| cov_ba/(std_b x std_a) var_b/(std_b x std_b) |
\ /
/ \
| 1 相关系数 |
| 相关系数 1 |
\ /
numpy.cov(a, b)->相关矩阵的分子矩阵
numpy.corrcoef(a,b) ->相关矩阵
代码:corr.py# -*- coding: utf-8 -*- from __future__ import unicode_literals import datetime as dt import numpy as np import matplotlib.pyplot as mp import matplotlib.dates as md def dmy2ymd(dmy): dmy = str(dmy, encoding='utf-8') date = dt.datetime.strptime( dmy, '%d-%m-%Y').date() ymd = date.strftime('%Y-%m-%d') return ymd dates, bhp_closing_prices = np.loadtxt( '../../data/bhp.csv', delimiter=',', usecols=(1, 6), unpack=True, dtype='M8[D], f8', converters={1: dmy2ymd}) vale_closing_prices = np.loadtxt( '../../data/vale.csv', delimiter=',', usecols=(6), unpack=True) bhp_returns = np.diff( bhp_closing_prices) / bhp_closing_prices[:-1] vale_returns = np.diff( vale_closing_prices) / vale_closing_prices[:-1] ave_a = bhp_returns.mean() dev_a = bhp_returns - ave_a var_a = (dev_a * dev_a).sum() / (dev_a.size - 1) std_a = np.sqrt(var_a) ave_b = vale_returns.mean() dev_b = vale_returns - ave_b var_b = (dev_b * dev_b).sum() / (dev_b.size - 1) std_b = np.sqrt(var_b) cov_ab = (dev_a * dev_b).sum() / (dev_a.size - 1) cov_ba = (dev_b * dev_a).sum() / (dev_b.size - 1) corr = np.array([ [var_a / (std_a * std_a), cov_ab / (std_a * std_b)], [cov_ba / (std_b * std_a), var_b / (std_b * std_b)]]) print(corr) covs = np.cov(bhp_returns, vale_returns) stds = np.array([ [std_a * std_a, std_a * std_b], [std_b * std_a, std_b * std_b]]) corr = covs / stds print(corr) corr = np.corrcoef(bhp_returns, vale_returns) print(corr) mp.figure('Correlation Of Returns', facecolor='lightgray') mp.title('Correlation Of Returns', fontsize=20) mp.xlabel('Date', fontsize=14) mp.ylabel('Returns', fontsize=14) ax = mp.gca() ax.xaxis.set_major_locator(md.WeekdayLocator( byweekday=md.MO)) ax.xaxis.set_minor_locator(md.DayLocator()) ax.xaxis.set_major_formatter(md.DateFormatter( '%d %b %Y')) mp.tick_params(labelsize=10) mp.grid(linestyle=':') dates = dates.astype(md.datetime.datetime) mp.plot(dates[:-1], bhp_returns, c='orangered', label='BHP') mp.plot(dates[:-1], vale_returns, c='dodgerblue', label='VALE') mp.legend() mp.gcf().autofmt_xdate() mp.show()
14.多项式拟合
- y = p0x^n + p1x^n-1 + p2x^n-2 + ... + pn = f(x)
y1' = f(x1) -> y1
y2' = f(x2) -> y2
...
yn' = f(xn) -> yn
(y1-y1')^2 + (y2-y2')^2 + ... + (yn-yn')^2
= loss (p0, ..., pn)
p0, ..., pn = ? -> loss -> min
X = [x1, x2, ..., xn] - 自变量
Y = [y1, y2, ..., yn] - 实际函数值
Y'= [y1',y2',...,yn'] - 拟合函数值
P = [p0, p1, ..., pn] - 多项式函数中的系数
Q = [q0, q1, ..., qn-1] - 多项式函数导函数的系数
np.polyfit(X, Y, 最高次幂)->P
np.polyval(P, X)->Y'
np.polyder(P)->Q
y = 4x^3 + 3x^2 + 2x + 1, P=[4,3,2,1]
dy/dx = 12x^2 + 6x + 2, Q=[12, 6, 2]
4x^3 + 3x^2 + 2x + 1 = 0的根:np.roots(P)
np.polysub(P1, P2)->两个多项式函数的差函数的系数
y = 4x^3 + 3x^2 + 2x + 1, P1=[4,3,2,1]
y = 5x^4 + x, P2=[5, 0, 0, 1, 0]
y = -5x^4 + 4x^3 + 3x^2 + x + 1
np.polysub(P1, P2)->[-5, 4, 4, 1, 1]
代码:poly.py# -*- coding: utf-8 -*- from __future__ import unicode_literals import datetime as dt import numpy as np import matplotlib.pyplot as mp import matplotlib.dates as md def dmy2ymd(dmy): dmy = str(dmy, encoding='utf-8') date = dt.datetime.strptime( dmy, '%d-%m-%Y').date() ymd = date.strftime('%Y-%m-%d') return ymd dates, bhp_closing_prices = np.loadtxt( '../../data/bhp.csv', delimiter=',', usecols=(1, 6), unpack=True, dtype='M8[D], f8', converters={1: dmy2ymd}) vale_closing_prices = np.loadtxt( '../../data/vale.csv', delimiter=',', usecols=(6), unpack=True) diff_closing_prices = bhp_closing_prices - \ vale_closing_prices days = dates.astype(int) p = np.polyfit(days, diff_closing_prices, 4) poly_closing_prices = np.polyval(p, days) q = np.polyder(p) roots_x = np.roots(q) roots_y = np.polyval(p, roots_x) mp.figure('Polynomial Fitting', facecolor='lightgray') mp.title('Polynomial Fitting', fontsize=20) mp.xlabel('Date', fontsize=14) mp.ylabel('Difference Price', fontsize=14) ax = mp.gca() ax.xaxis.set_major_locator(md.WeekdayLocator( byweekday=md.MO)) ax.xaxis.set_minor_locator(md.DayLocator()) ax.xaxis.set_major_formatter(md.DateFormatter( '%d %b %Y')) mp.tick_params(labelsize=10) mp.grid(linestyle=':') dates = dates.astype(md.datetime.datetime) mp.plot(dates, poly_closing_prices, c='limegreen', linewidth=3, label='Polynomial Fitting') mp.scatter(dates, diff_closing_prices, c='dodgerblue', alpha=0.5, s=60, label='Difference Price') roots_x = roots_x.astype(int).astype( 'M8[D]').astype(md.datetime.datetime) mp.scatter(roots_x, roots_y, marker='^', s=80, c='orangered', label='Peek', zorder=4) mp.legend() mp.gcf().autofmt_xdate() mp.show()
15.符号数组
- np.sign(数组)->符号数组
+ -> 1
- -> -1
0 -> 0
np.piecewise(源数组,条件序列,取值序列)->目标数组
针对源数组中每一个元素,检测其是否符合条件序列中的每一个条件,符合哪个条件就用取值系列中与之对应的值,表示该元素,放到目标数组中返回.
条件序列:[a<0,a==0,a>0]
取值序列:[-1,0,1]
代码:# -*- coding: utf-8 -*- from __future__ import unicode_literals import numpy as np a = np.array([70, 80, 60, 30, 40]) print(a) b = a - 60 print(b) c = np.sign(b) print(c) d = np.piecewise(a, [a < 60, a == 60, a > 60], [-1, 0, 1]) print(d)
obv.py
# -*- coding: utf-8 -*- from __future__ import unicode_literals import datetime as dt import numpy as np import matplotlib.pyplot as mp import matplotlib.dates as md def dmy2ymd(dmy): dmy = str(dmy, encoding='utf-8') date = dt.datetime.strptime( dmy, '%d-%m-%Y').date() ymd = date.strftime('%Y-%m-%d') return ymd dates, closing_prices, volumes = np.loadtxt( '../../data/bhp.csv', delimiter=',', usecols=(1, 6, 7), unpack=True, dtype='M8[D], f8, f8', converters={1: dmy2ymd}) diff_closing_prices = np.diff(closing_prices) #sign_closing_prices = np.sign(diff_closing_prices) sign_closing_prices = np.piecewise( diff_closing_prices, [ diff_closing_prices < 0, diff_closing_prices == 0, diff_closing_prices > 0], [-1, 0, 1]) obvs = volumes[1:] * sign_closing_prices mp.figure('On-Balance Volume', facecolor='lightgray') mp.title('On-Balance Volume', fontsize=20) mp.xlabel('Date', fontsize=14) mp.ylabel('OBV', fontsize=14) ax = mp.gca() ax.xaxis.set_major_locator(md.WeekdayLocator( byweekday=md.MO)) ax.xaxis.set_minor_locator(md.DayLocator()) ax.xaxis.set_major_formatter(md.DateFormatter( '%d %b %Y')) mp.tick_params(labelsize=10) mp.grid(axis='y', linestyle=':') dates = dates[1:].astype(md.datetime.datetime) mp.bar(dates, obvs, 1.0, color='dodgerblue', edgecolor='white', label='OBV') mp.legend() mp.gcf().autofmt_xdate() mp.show()
16.矢量化
- def 标量函数(标量参数1,标量参数2,...):
...
return 标量返回值1,标量返回值2,...
矢量参数1
矢量参数2
...
numpy.vectorize(标量函数)->矢量函数
矢量函数(矢量参数1,矢量参数2,...)
->矢量返回值1,矢量返回值2
代码:# -*- coding: utf-8 -*- from __future__ import unicode_literals import numpy as np def foo(x, y): return x + y, x - y, x * y x, y = 1, 4 print(foo(x, y)) X, Y = np.array([1, 2, 3]), np.array([4, 5, 6]) bar = np.vectorize(foo) print(bar(X, Y)) print(np.vectorize(foo)(X, Y))
sim.py
# -*- coding: utf-8 -*- from __future__ import unicode_literals import datetime as dt import numpy as np import matplotlib.pyplot as mp import matplotlib.dates as md def dmy2ymd(dmy): dmy = str(dmy, encoding='utf-8') date = dt.datetime.strptime( dmy, '%d-%m-%Y').date() ymd = date.strftime('%Y-%m-%d') return ymd dates, opening_prices, highest_prices, \ lowest_prices, closing_prices = np.loadtxt( 'bhp.csv', delimiter=',', usecols=(1, 3, 4, 5, 6), unpack=True, dtype='M8[D], f8, f8, f8, f8', converters={1: dmy2ymd}) def profit(opening_price, highest_price, lowest_price, closing_price): buying_price = opening_price * 0.99 if lowest_price <= buying_price <= highest_price: return (closing_price - buying_price) * \ 100 / buying_price return np.nan # 无效值 profits = np.vectorize(profit)( opening_prices, highest_prices, lowest_prices, closing_prices) nan = np.isnan(profits) dates, profits = dates[~nan], profits[~nan] gain_dates, gain_profits = dates[profits > 0], \ profits[profits > 0] loss_dates, loss_profits = dates[profits < 0], \ profits[profits < 0] mp.figure('Trading Simulation', facecolor='lightgray') mp.title('Trading Simulation', fontsize=20) mp.xlabel('Date', fontsize=14) mp.ylabel('Profit', fontsize=14) ax = mp.gca() ax.xaxis.set_major_locator(md.WeekdayLocator( byweekday=md.MO)) ax.xaxis.set_minor_locator(md.DayLocator()) ax.xaxis.set_major_formatter(md.DateFormatter( '%d %b %Y')) mp.tick_params(labelsize=10) mp.grid(linestyle=':') if dates.size > 0: dates = dates.astype(md.datetime.datetime) mp.plot(dates, profits, c='gray', label='Profit') mp.axhline(y=profits.mean(), linestyle='--', color='gray') if gain_dates.size > 0: gain_dates = gain_dates.astype(md.datetime.datetime) mp.plot(gain_dates, gain_profits, 'o', c='orangered', label='Gain Profit') mp.axhline(y=gain_profits.mean(), linestyle='--', color='orangered') if loss_dates.size > 0: loss_dates = loss_dates.astype(md.datetime.datetime) mp.plot(loss_dates, loss_profits, 'o', c='limegreen', label='Loss Profit') mp.axhline(y=loss_profits.mean(), linestyle='--', color='limegreen') mp.legend() mp.gcf().autofmt_xdate() mp.show()
17.数据平滑
- 降噪 + 拟合 + 特征识别
卷积 多项式 数学方法
y1 = f(x1)
y1 = g(x1)
f(x1) = g(x1)
f(x1) - g(x1) = 0
f(x) - g(x) = 0
代码:# -*- coding: utf-8 -*- from __future__ import unicode_literals import datetime as dt import numpy as np import matplotlib.pyplot as mp import matplotlib.dates as md def dmy2ymd(dmy): dmy = str(dmy, encoding='utf-8') date = dt.datetime.strptime( dmy, '%d-%m-%Y').date() ymd = date.strftime('%Y-%m-%d') return ymd dates, bhp_closing_prices = np.loadtxt( '../../data/bhp.csv', delimiter=',', usecols=(1, 6), unpack=True, dtype='M8[D], f8', converters={1: dmy2ymd}) vale_closing_prices = np.loadtxt( '../../data/vale.csv', delimiter=',', usecols=(6), unpack=True) bhp_returns = np.diff( bhp_closing_prices) / bhp_closing_prices[:-1] vale_returns = np.diff( vale_closing_prices) / vale_closing_prices[:-1] N = 8 weights = np.hanning(N) weights /= weights.sum() bhp_smooth_returns = np.convolve( bhp_returns, weights, 'valid') vale_smooth_returns = np.convolve( vale_returns, weights, 'valid') days = dates[N - 1:-1].astype(int) degree = 3 bhp_p = np.polyfit(days, bhp_smooth_returns, degree) bhp_fitted_returns = np.polyval(bhp_p, days) vale_p = np.polyfit(days, vale_smooth_returns, degree) vale_fitted_returns = np.polyval(vale_p, days) sub_p = np.polysub(bhp_p, vale_p) roots_x = np.roots(sub_p) roots_x = roots_x.compress( (days[0] <= roots_x) & (roots_x <= days[-1])) roots_y = np.polyval(bhp_p, roots_x) mp.figure('Smoothing Returns', facecolor='lightgray') mp.title('Smoothing Returns', fontsize=20) mp.xlabel('Date', fontsize=14) mp.ylabel('Returns', fontsize=14) ax = mp.gca() ax.xaxis.set_major_locator(md.WeekdayLocator( byweekday=md.MO)) ax.xaxis.set_minor_locator(md.DayLocator()) ax.xaxis.set_major_formatter(md.DateFormatter( '%d %b %Y')) mp.tick_params(labelsize=10) mp.grid(linestyle=':') dates = dates.astype(md.datetime.datetime) mp.plot(dates[:-1], bhp_returns, c='orangered', alpha=0.25, label='BHP') mp.plot(dates[:-1], vale_returns, c='dodgerblue', alpha=0.25, label='VALE') mp.plot(dates[N - 1:-1], bhp_smooth_returns, c='orangered', alpha=0.75, label='Smooth BHP') mp.plot(dates[N - 1:-1], vale_smooth_returns, c='dodgerblue', alpha=0.75, label='Smooth VALE') mp.plot(dates[N - 1:-1], bhp_fitted_returns, c='orangered', linewidth=3, label='Fitted BHP') mp.plot(dates[N - 1:-1], vale_fitted_returns, c='dodgerblue', linewidth=3, label='Fitted VALE') roots_x = roots_x.astype(int).astype( 'M8[D]').astype(md.datetime.datetime) mp.scatter(roots_x, roots_y, s=100, marker='x', c='firebrick', lw=3, zorder=3) mp.legend() mp.gcf().autofmt_xdate() mp.show()