文章目录
第一章 绪论
1.1 假设空间
1.2 归纳偏好
1.2.1 NFL
1.2.2 奥卡姆剃刀
第二章 模型评估
2.1评估方法
2.1.1 留出法
#留出法
import numpy as np
from sklearn.model_selection import train_test_split
X,y = np.arange(10).reshape((5,2),range(5))
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.33,random_state=10)
2.1.2 交叉验证法
import numpy as np
from sklearn.model_selection import KFold,StratifiedKFold,RepeatedKFold
X = np.array([[1,2],[3,4],[1,2],[3,4]])
y = np.array([1,2,3,4])
#分层k折交叉验证
kf = StratifiedKFold(n_splits=2)
#k折交叉验证
kf = KFold(n_splits=2)
#p次k折交叉验证
kf = RepeatedKFold(n_splits=2,n_repeats=2)
for train_index,test_index in kf.split(X):
X_train,X_test = X[train_index],X[test_index]
y_train,y_test = y[train_index],y[test_index]
2.1.3 自助法
2.2 性能度量
2.2.1 均方误差
import numpy as np
from sklearn import metrics
#均方误差
y_true = np.array([1.0, 5.0, 4.0, 3.0, 2.0, 5.0, -3.0])
y_pred = np.array([1.0, 4.5, 3.5, 5.0, 8.0, 4.5, 1.0])
print(metrics.mean_squared_error(y_true,y_pred))
2.2.2 均方根误差
#均方根误差
import numpy as np
from sklearn import metrics
y_true = np.array([1.0, 5.0, 4.0, 3.0, 2.0, 5.0, -3.0])
y_pred = np.array([1.0, 4.5, 3.5, 5.0, 8.0, 4.5, 1.0])
print(np.sqrt(metrics.mean_squared_error(y_true,y_pred)))
2.2.3 平均绝对误差
import numpy as np
from sklearn import metrics
#平均绝对误差
y_true = np.array([1.0, 5.0, 4.0, 3.0, 2.0, 5.0, -3.0])
y_pred = np.array([1.0, 4.5, 3.5, 5.0, 8.0, 4.5, 1.0])
print(metrics.mean_absolute_error(y_true,y_pred))
2.3 准确率
import numpy as np
from sklearn.metrics import accuracy_score
y_pred = [0,2,1,3,4]
y_true = [0,1,2,3,4]
print(accuracy_score(y_true,y_pred))
2.4 查准率 查全率
#查准率
from sklearn.metrics import precision_score
y_true = [0,1,2,0,1,2]
y_pred = [0,2,1,0,0,1]
print(precision_score(y_true,y_pred,average='macro'))
print(precision_score(y_true,y_pred,average='micro'))
print(precision_score(y_true,y_pred,average='weighter'))
print(precision_score(y_true,y_pred,average=None))
2.5 PR图 ROC曲线
根据学习器的预测结果按正例可能性大小对样例进行排序,并逐个把样本作为正例进行预测,或根据预测值,采取不同的截断点,这样截断点就将样本分为两部分,前一部分为正例,后一部分为反例,然后计算每个截断点的P,R,fpr,trp, 根据这么多点,画出曲线
第三章 线性模型
3.1线性模型回归
手动计算梯度下降,参数调整
import torch
import matplotlib.pyplot as plt
def Produce_X(x):
x0 = torch.ones(x.numpy().size)
x = torch.stack((x,x0),dim=1)
return x
x = torch.Tensor([85,100,120,125,150])
y = torch.Tensor([250.93,293.97,366.56,400.10,471.72])
X = Produce_X(x)
w = torch.rand(2,requires_grad=True)
inputs = X
print("inputs",inputs)
target = y
#训练
def train(epochs=1,learning_rate=0.01):
for epoch in range(epochs):
#前向传播
output = inputs.mv(w)
print("output",output)
loss = (output - target).pow(2).sum()
# 反向传播
loss.backward()
w.data -= learning_rate * w.grad
w.grad.zero_()
if epoch % 80 == 0:
draw(output,loss)
return w,loss
# 画图部分不用看
def draw(output,loss):
plt.cla()
plt.scatter(x.numpy(),y.numpy())
plt.plot(x.numpy(),output.data.numpy(),'r-',lw=5)
plt.text(0.5,0,'Loss=%s'%(loss.item()),fontdict={'size':20,'color':'red'})
plt.pause(0.005)
W,loss = train(10000,learning_rate = 1e-6)
print(W)
print("final loss:",loss.item())
print("weigts:",w.data)
3.2 对数几率回归(LogisticRegression)
from sklearn import datasets
import numpy as np
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
#加载数据
iris = datasets.load_iris()
iris_x = iris.data
iris_y = iris.target
#分割数据
x_train,x_test,y_train,y_test = train_test_split(iris_x,iris_y,test_size=0.3,random_state=0)
print(y_train)
#创建对率回归模型
classifier = LogisticRegression(solver='lbfgs',multi_class='auto')
#训练模型
classifier.fit(x_train,y_train)
#测试模型
y_predict = classifier.predict(x_test)
print(accuracy_score(y_test,y_predict))
第五章 神经网络
5.1 感知机前向传播
import torch
import torch.nn as nn
#感知机定义
class Perception(nn.Module):
#初始化网络
def __init__(self,in_dim,hid_dim,out_dim):
#初始化父类
super(Perception,self).__init__()
#创建网络层
self.layer = nn.Sequential(
#线性层 输入为in个 输出为hid个
nn.Linear(in_dim,hid_dim),
#sigmoid 激活函数
nn.Sigmoid(),
#线性层 输入为in个 输出为hid个
nn.Linear(hid_dim,out_dim),
#sigmoid 激活函数
nn.Sigmoid()
)
#网络前向传播
def forward(self,x):
y = self.layer(x)
return y
#创建对象
perception = Perception(2,3,2)
print(perception)
data = torch.randn(4,2)
print(data)
output = perception(data)
print(output)
5.2 多层感知机优化
import torch
from torch import nn
from torch import optim
class MLP(nn.Module):
def __init__(self,in_dim,hid_dim1,hid_dim2,out_dim):
super(MLP,self).__init__()
self.layer = nn.Sequential(
nn.Linear(in_dim,hid_dim1),
nn.ReLU(),
nn.Linear(hid_dim1,hid_dim2),
nn.ReLU(),
nn.Linear(hid_dim2,out_dim),
nn.ReLU(0)
)
def forward(self,x):
x = self.layer(x)
return
#初始化网络
model = MLP(28*28,300,200,10)
data = torch.randn(10,28*28)
output = model(data)
label = torch.Tensor([1,0,4,7,9,3,4,5,3,2]).long()
print(label)
#创建SGD随机梯度 优化器
optimizer = optim.SGD(params=model.parameters(),lr=0.01)
#创建交叉熵损失函数
criterion = nn.CrossEntropyLoss()
#计算损失
loss = criterion(output,label)
#梯度清零
o = optimizer.zero_grad()
#反向传播 计算梯度
l = loss.backward()
# 反向传播
ol = optimizer.step()
第六章 支持向量机
6.1 支持向量分类(SVC)
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
X = np.array([[-1,-1],[-2,-1],[1,1],[2,1]])
y = np.array([1,1,2,2])
# kenel 核函数
clf = make_pipeline(StandardScaler(),SVC(kernel='rbf',gamma='auto'))
#训练数据
clf.fit(X,y)
print(clf.predict([[-0.8,-1],[0.8,1]]))
第七章 卷积神经网络
7.1 卷积神经网络
import torch.nn as nn
import torch
import torch.optim as optim
class SimpleCNN(nn.Module):
def __init__(self):
super(SimpleCNN,self).__init__()
#第一层
self.layer1 = nn.Sequential(
# 卷积网络 输入通道3 输出通道32
nn.Conv2d(3, 32, 3, 1, 1),
# 激活函数ReLU
nn.ReLU(True),
# 池化层 2*2
nn.MaxPool2d(2, 2)
)
self.layer2 = nn.Sequential(
nn.Conv2d(32, 64, 3, 1, 1),
nn.ReLU(True),
nn.MaxPool2d(2, 2)
)
self.layer3 = nn.Sequential(
nn.Conv2d(64, 128, 3, 1, 1),
nn.ReLU(True),
nn.MaxPool2d(2, 2)
)
self.layer4 = nn.Sequential(
nn.Linear(1152, 512),
nn.ReLU(True),
nn.Linear(512, 128),
nn.ReLU(True),
nn.Linear(128, 10)
)
def forward(self, x):
conv1 = self.layer1(x)
conv2 = self.layer2(conv1)
conv3 = self.layer3(conv2)
input = conv3.view(conv3.size(0), -1)
print(input.size())
out = self.layer4(input)
return out
model = SimpleCNN()
#数据集 [图像个数,图像通道数,图像高度,图像宽度]
data = torch.randn(10, 3, 28, 28)
#标签
label = torch.tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
#数据传给模型的forward
output = model(data)
#计算损失
loss = criterion(output, label)
# 梯度清零
optimizer.zero_grad()
# 计算梯度
loss.backward()
# 反向传播
optimizer.step()
print("损失为:")
print(loss)