1.符号数组
import matplotlib.pyplot as mp
import numpy as np
import matplotlib.dates as md
import datetime as dt
# 若相比上一天的收盘价上涨,则为正成交量;若相比上一天的收盘价下跌,则为负成交量。
# 绘制OBV柱状图
# 日期转换函数
def dmy2ymd(dmy):
dmy = str(dmy, encoding='utf-8')
time = dt.datetime.strptime(dmy, '%d-%m-%Y').date()
t = time.strftime('%Y-%m-%d')
return t
dates, closing_prices, volumes = np.loadtxt(
'data/da_data/bhp.csv', delimiter=',',
usecols=(1, 6, 7), unpack=True,
dtype='M8[D], f8, f8', converters={1: dmy2ymd})
diff_closing_prices = np.diff(closing_prices)
# 求取符号
sign_closing_prices = np.sign(diff_closing_prices)
obvs = volumes[1:] * sign_closing_prices
mp.figure('On-Balance Volume', facecolor='lightgray')
mp.title('On-Balance Volume', fontsize=20)
mp.xlabel('Date', fontsize=14)
mp.ylabel('OBV', fontsize=14)
ax = mp.gca()
ax.xaxis.set_major_locator(md.WeekdayLocator(byweekday=md.MO))
ax.xaxis.set_minor_locator(md.DayLocator())
ax.xaxis.set_major_formatter(md.DateFormatter('%d %b %Y'))
mp.tick_params(labelsize=10)
mp.grid(axis='y', linestyle=':')
dates = dates[1:].astype(md.datetime.datetime)
mp.bar(dates, obvs, 1.0, color='dodgerblue',
edgecolor='white', label='OBV')
mp.legend()
mp.gcf().autofmt_xdate()
mp.show()
# 数组处理函数
a = np.array([70, 80, 60, 30, 40])
d = np.piecewise(
a,
[a < 60, a == 60, a > 60],
[-1, 0, 1])
# d = [ 1 1 0 -1 -1]
2.矢量化
import numpy as np
import math
import matplotlib.pyplot as mp
import matplotlib.dates as md
import datetime as dt
"""numpy提供了vectorize函数,可以把处理标量的函数矢量化,
返回的函数可以直接处理ndarray数组
"""
def foo(x, y):
return math.sqrt(x**2+y**2)
x, y = 1, 4
print(foo(x, y))
X, Y = np.array([1, 2, 3, 4]), np.array([5, 6, 7, 8])
print(np.vectorize(foo)(X, Y))
Y = 5
print(np.vectorize(foo)(X, Y))
# 把foo转换成矢量函数,该矢量函数接收2个参数,返回一个结果
fun = np.frompyfunc(foo, 2, 1) # 精度更高
fun(X, Y)
# 日期转换函数
def dmy2ymd(dmy):
dmy = str(dmy, encoding='utf-8')
time = dt.datetime.strptime(dmy, '%d-%m-%Y').date()
t = time.strftime('%Y-%m-%d')
return t
dates, opening_prices, highest_prices, lowest_prices, closing_prices = \
np.loadtxt('data/da_data/bhp.csv',
delimiter=',',
usecols=(1, 3, 4, 5, 6), unpack=True,
dtype='M8[D], f8, f8, f8, f8', converters={1: dmy2ymd})
# 定义投资策略
def profit(opening_price, highest_price, lowest_price, closing_price):
buying_price = opening_price*0.99
if lowest_price <= buying_price <= highest_price:
return (closing_price-buying_price)*100/buying_price
return np.nan # 无效值
# 矢量化投资
profits = np.vectorize(profit)(opening_prices, highest_prices, lowest_prices, closing_prices)
print(profits)
# 除去nan
nan = np.isnan(profits)
dates, profits = dates[~nan], profits[~nan]
print(profits.mean())
mp.plot(dates, profits, 'o-', color='orangered', label='profits')
mp.legend()
mp.gcf().autofmt_xdate()
mp.show()
3.矩阵
import numpy as np
# 矩阵对象的创建
ary = np.arange(1, 9).reshape(2, 4)
print(ary.dtype)
m1 = np.matrix(ary) # 数据不随ary所改变
m2 = np.mat(ary) # 数据随ary所变化
print(m1, type(m1))
print(m2, type(m2))
ary *= 2
print(m1)
print(m2)
arr = np.mat('1, 2, 4; 5, 6, 7;8, 9, 10')
# 矩阵乘法
print(arr*arr)
# 矩阵的逆矩阵
e = np.mat('1 2 6; 3 5 7; 4 8 9')
print(e.I)
# linalg模块中的inv方法可以求取a的逆矩阵
e1 = np.linalg.inv(e)
print(e1)
print(e * e.I)
# dot点乘法求ndarray的点乘结果,与矩阵的乘法运算结果相同
print(np.dot(e, e.I))
print(e.dot(e.I))
# 矩阵求解Ax=b
prices = np.mat('3 3.2; 3.5 3.6')
totals = np.mat('118.4; 135.2')
persons = prices.I * totals
print(persons)
# linalg解法
x = np.linalg.lstsq(prices, totals)[0]
print(x)
x = np.linalg.solve(prices, totals)
print(x)
n = 35
# 使用递归实现斐波那契数列
def fibo(n):
return 1 if n < 3 else fibo(n - 1) + fibo(n - 2)
print(fibo(n))
# 使用矩阵实现斐波那契数列
print(int((np.mat('1. 1.; 1. 0.') ** (n - 1))[0, 0]))
4.通用函数
# 数组裁剪压缩
arr = np.array([1, 2, 3, 4, 5, 6])
res = arr.clip(min=3, max=5)
print(arr) # 原数组保持不变
print(res) # [3 3 3 4 5 5]
res1 = arr.compress((arr >= 3) & (arr <= 5))
print(res1) # [3 4 5]
res2 = arr.compress(np.all([arr >= 3, arr <= 5], axis=0))
print(res2) # [3 4 5]
# 加法与乘法通用函数
"""
np.add(a, a) # 两数组相加,a+a
np.add.reduce(a) # a数组元素累加和
np.add.accumulate(a) # 累加和过程
np.add.outer([10, 20, 30], a) # 外和
np.prod(a) # 累乘
np.cumprod(a) # 累乘过程
np.outer([10, 20, 30], a)
"""
a = np.arange(1, 5)
b = np.add(a, a)
print(b) # [2 4 6 8]
c = np.add.reduce(a)
print(c) # 10
d = np.add.accumulate(a)
print(d) # [ 1 3 6 10]
e = np.add.outer([10, 20, 30], a)
print(e)
"""
[[11 12 13 14]
[21 22 23 24]
[31 32 33 34]]
"""
print(np.prod(a)) # 24
print(np.cumprod(a)) # [ 1 2 6 24]
print(np.outer([10, 20, 30], a))
"""
[[ 10 20 30 40]
[ 20 40 60 80]
[ 30 60 90 120]]
"""
# 除法与取整通用函数
"""
np.divide(a, b) # a 真除 b
np.floor(a / b) # (真除的结果向下取整)
np.ceil(a / b) # (真除的结果向上取整)
np.trunc(a / b) # (真除的结果截断取整)
np.round(a / b) # (真除的结果四舍五入取整)
"""
a = np.array([20, 20, -20, -20])
b = np.array([3, -3, 6, -6])
# 真除
print(np.true_divide(a, b))
print(np.divide(a, b))
c = a / b
print('array:', c)
# 对ndarray做floor操作
d = np.floor(a / b)
print('floor_divide:', d) # [ 6. -7. -4. 3.]
# 对ndarray做ceil操作
e = np.ceil(a / b) # [ 7. -6. -3. 4.]
print('ceil ndarray:', e)
# 对ndarray做trunc操作
f = np.trunc(a / b)
print('trunc ndarray:', f) # [ 6. -6. -3. 3.]
# 对ndarray做around操作
g = np.around(a / b)
print('around ndarray:', g) # [ 7. -7. -3. 3.]
# 位运算通用函数
"""
位异或:
c = a ^ b
c = np.bitwise_xor(a, b)
位与:
e = a & b
e = np.bitwise_and(a, b)
位或:
e = a | b
e = np.bitwise_or(a, b)
位反:
e = ~a
e = np.bitwise_or(a, b)
移位:
<< __lshift__ left_shift
>> __rshift__ right_shift
"""
a = np.array([0, -1, 2, -3, 4, -5])
b = np.array([0, 1, 2, 3, 4, 5])
print(a, b)
c = a ^ b
print(c) # [ 0 -2 0 -2 0 -2]
print(a.__xor__(b))
print(np.bitwise_xor(a, b))
print(np.where(c < 0)[0]) # [1, 3, 5],不同的下标
print(1 ^ 0) # 1
print(1 ^ -1) # -2
print(-1 ^ 0) # -1
# 利用位与运算计算某个数字是否是2的幂
d = np.arange(1, 21)
print(d)
e = d & (d - 1)
print(d.__and__(d - 1))
print(np.bitwise_and(d, d - 1))
print(e)
5.特征值和特征向量
# 特征值和特征向量
import numpy as np
A = np.mat('3 -2; 1 0')
print(A)
eigvals, eigvecs = np.linalg.eig(A)
print(eigvals) # [2. 1.]
print(eigvecs)
"""
[[0.89442719 0.70710678]
[0.4472136 0.70710678]]
"""
# 根据特征值和特征向量求解原矩阵
S = np.mat(eigvecs) * np.mat(np.diag(eigvals)) * np.mat(eigvecs.I)
print(S)
import matplotlib.pyplot as plt
import scipy.misc as sm
original = sm.imread('data/da_data/lily.jpg', True)
# 提取部分特征
# print(original)
# print(original.dtype)
# print(original.shape) # (512, 512)
eigvals, eigvecs = np.linalg.eig(original)
eigvals[50:] = 0
# original2 = np.mat(eigvecs)*np.mat(np.diag(eigvals))*np.mat(eigvecs).I
original2 = np.mat(eigvecs)*np.mat(np.diag(eigvals))*np.mat(eigvecs).I
# print(original2)
# original2.astype('float32')
# print(original.shape) # complex
plt.figure('lily features')
plt.subplot(121)
plt.xticks([])
plt.yticks([])
plt.imshow(original, cmap='gray')
plt.subplot(122)
plt.xticks([])
plt.yticks([])
plt.imshow(original2.real, cmap='gray')
plt.tight_layout()
plt.show()