day3装环境来着,day4照着这位大佬的视频速成一些数字处理,虽然给了源码,还是手打了打代码,不会的函数百度百度,一晚上才看到第10P。
明天估计看看剩下的,好像会有一些简单的回归实战,之后的设想是做一做手写数字识别。
这视频感觉是真不错,他说是一天速成,其实就是对着代码讲讲,总共半个小时。很好地符合我 “以后遇到了知道怎么百度就行” 的需求。
numpy基础
import numpy
mylist = [1, 2, 3]
print(type(mylist)) # list
myarray = numpy.array(mylist)
print(type(myarray)) # array
mylist = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
mymatrix = numpy.array(mylist)
print(mymatrix)
# [[1 2 3]
# [4 5 6]
# [7 8 9]]
# array有很多属性,比如
print(mymatrix.shape) # (3,3)
# 可以创造0 1array 随机array
a = numpy.arange(0, 10) # 0~9的array
b = numpy.array(range(0, 10))
numpy.zeros(5)
numpy.ones((3, 3))
numpy.ones(2) + 3 # array([4,4]) 也可以用乘法
# 对array的操作相当于对每个元素都进行
numpy.linspace(0, 10, 3) # array([0,5,10])
numpy.eye(5) # 5*5单位阵
# 随机阵
numpy.random.seed(4) # 设定好seed seed一样的时候随机数也一样
numpy.random.rand(3) # 3个[0,1]里的随机数字
numpy.random.randn(3) # 三个标准正态分布数字
numpy.random.randint(1, 100, 10) # 10个1~100的整数
a = numpy.arange(25) # 0~24的整数
a = a.reshape(5, 5)
print(a)
print(a.max(), a.min(), a.argmax(), a.argmin()) # 最大最小的数值和下标
a = numpy.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(a[0:2, 1:3]) # 矩阵slice
print(a.sum(axis=0))
a = numpy.arange(10)
print(a > 5)
# [False False False False False False True True True True]
print(a[a > 5])
# [6 7 8 9]
print(a.sum(), a.mean(), a.max(), numpy.sin(a))
matplotlib基础
import numpy as np
import matplotlib.pyplot as mp
# 这个库用来简单绘图
mp.subplot(2, 2, 1)
x = np.array([1, 2, 3])
y = np.array([2, 3, 4])
mp.plot(x, y)
mp.subplot(2, 2, 2)
mp.hlines(1, 1, 1.5) # 水平线,y轴是1,x轴从1到1.5
mp.vlines(1, 1, 1.5)
mp.subplot(2, 2, 3)
mp.hlines(0, 1, 3)
mp.scatter(2, 0, color="g", marker="o", label="sample points", edgecolors="b", s=200)
mp.legend() # 显示标签
mp.show()
mp.figure(facecolor="lightgray")
mp.title("fill", fontsize=18)
x = np.linspace(0, 10, 1000)
sinx = np.sin(x)
cosx = np.cos(x / 2) / 2
mp.grid(linestyle=":", color="r") # 红色虚线
mp.plot(x, sinx, label=r'$y=sin(x)$') # 标签 matlab语法
mp.plot(x, cosx, color="orangered", label=r'$y=\frac{1}{2} cos(\frac{x}{2})$')
mp.legend()
mp.fill_between(x, sinx, cosx, sinx > cosx, color="pink", alpha=0.3)
mp.fill_between(x, sinx, cosx, sinx < cosx, color="green", alpha=0.3)
# 参数分别是x,y下限,y上限 还有个where参数
mp.show()
# 柱状图
xiaoming = np.array([10, 20, 30])
xiaohong = np.array([30, 20, 10])
mp.figure("Bar", facecolor="lightgray")
mp.title("Bar Chart", fontsize=18)
mp.grid(linestyle=":")
x = np.arange(xiaoming.size)
mp.bar(x - 0.2, xiaoming, 0.4, color="limegreen", label="xiaoming", align="center")
mp.bar(x + 0.2, xiaohong, 0.4, color="orangered", label="xiaohong", align="center")
# x轴中心,取值,宽度
mp.xticks(x, ["yuwen", "shuxue", "yingyu"])
mp.legend()
mp.show()
# 饼状图
values = np.linspace(10, 70, 7)
spaces = [0.01, 0.01, 0.01, 0.01, 0.01, 0.05, 0.1]
labels = ["yuwen", "shuxue", "yingyu", "wuli", "huaxue", "zhengzhi", "lishi"]
colors = ["red", "green", "orange", "blue", "purple", "pink", "black"]
mp.figure("Chart", facecolor='lightgray') # 图形对象名,背景颜色
mp.pie(values, spaces, labels, colors, "%.1f%%", shadow="True")
# mp.pie(值列表, 间隙列表, 标签, 颜色列表, 格式串,shadow=是否带阴影, startangle=起始角度)
mp.legend()
mp.show()
线性预测
import numpy as np
a = range(1, 7)
#问题是已知1,2,3,4,5,6预测第七项,列出如上的方程
A = np.zeros((3, 3))
for j in range(3):
A[j, :] = a[j:j + 3]
print(A)
B = a[3:6]
x = np.linalg.lstsq(A, B, rcond=None)[0]
# 估计线性模型中的系数:a=np.linalg.lstsq(x,b),有b=a*x
# 求方阵的逆矩阵np.linalg.inv(A)
# 求广义逆矩阵:np.linalg.pinv(A)
# 求矩阵的行列式:np.linalg.det(A)
# 解形如AX=b的线性方程组:np.linalg.solve(A,b)
# 求矩阵的特征值:np.linalg.eigvals(A)
# 求特征值和特征向量:np.linalg.eig(A)
# Svd分解:np.linalg.svd(A)
print(x)
print(np.dot(B, x))
# 假设每一个数字和前三个数字有线性关系
矩阵
import numpy as np
m = np.mat("1 2 3;2 3 4;3 4 6")
print(m)
print(m.I) # 逆矩阵
A = [[2, 1], [3, 2]]
B = [20, 35]
x = np.linalg.solve(A, B)
print(x)
A = np.array([[2, 3], [4, 5]])
A = np.mat("2,3;4,5")
a, b = np.linalg.eig(A)
print(a, b) # 特征值数组 特征向量数组
A2 = b * np.diag(a) * b.I # 只有matrix才有逆矩阵 所以要用mat初始化A
print(A2)
拟合
import numpy as np
import matplotlib.pyplot as mp
a = [20, 24, 17, 19, 15, 22, 13]
b = [18, 22, 25, 17, 14, 20, 10]
ave_a = np.mean(a)
ave_b = np.mean(b)
cov_ab = np.mean((a - ave_a) * (b - ave_b))
k = cov_ab / (np.std(a) * np.std(b))
print(k) # 协方差和相关系数
x = range(1, 8)
y = a
w = np.polyfit(x, y, 4) # 根据一组点生成四次方程的5个系数
mp.plot(x, y)
mp.plot(x, a, marker="o", linestyle="None", color="blue")
xx = np.linspace(1, 7, 1000)
yy = np.polyval(w, xx) # 根据系数和x确定y
mp.plot(xx, yy, color="pink")
mp.show()
分布 排序 插值 积分
import numpy as np
import matplotlib.pyplot as mp
a = np.random.binomial(10, 0.3, 10000)
# 随机分布 p=0.3,试验次数位10,结果长度为10000
print(a) # 长度10000,里面的数字是0~10随机
print(a == 3) # 结果是一堆bool值
print("P(3)=", (a == 3).sum() / 10000)
# 命中概率0.3,投球10次,中3次的概率
names = np.array(["amy", "bob", "cindy", "devil"])
score1 = np.array([10, 12, 12, 8])
score2 = np.array([4, 0, 8, 7])
index = np.lexsort((score2, score1))
# 先根据score1从小到大排序 相同时按着score2排序
print(names[index])
# 插值
import scipy.interpolate as si
min_x = -50
max_x = 50
x = np.linspace(min_x, max_x, 20)
y = np.sinc(x)
mp.scatter(x, y, s=60, color="hotpink", marker="o")
# scatter散点图
linear_x = np.linspace(-50, 50, 1000)
linear = si.interp1d(x, y, kind="cubic")
mp.plot(linear_x, linear(linear_x), color="red")
linear = si.interp1d(x, y, kind="zero")
mp.plot(linear_x, linear(linear_x), color="gray")
linear = si.interp1d(x, y, kind="linear")
mp.plot(linear_x, linear(linear_x), color="limegreen")
mp.show()
# 积分
import scipy.integrate as si
def f(x):
return 3 * x ** 2 + x + 1
r = si.quad(f, 5, -5)
print(r)
r = si.quad(lambda x: (3 * x ** 2 + x + 1), 5, -5)
print(r)
# (-260.0, 2.886579864025407e-12)
# 积分值,误差范围
三种插值方式的图