机器学习复习

第一章 绪论

1.1 假设空间

1.2 归纳偏好

1.2.1 NFL

1.2.2 奥卡姆剃刀

第二章 模型评估

2.1评估方法

2.1.1 留出法

#留出法
import numpy as np
from sklearn.model_selection import train_test_split

X,y = np.arange(10).reshape((5,2),range(5))
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.33,random_state=10)

2.1.2 交叉验证法

import numpy as np
from sklearn.model_selection import KFold,StratifiedKFold,RepeatedKFold

X = np.array([[1,2],[3,4],[1,2],[3,4]])
y = np.array([1,2,3,4])
#分层k折交叉验证
kf = StratifiedKFold(n_splits=2)
#k折交叉验证
kf = KFold(n_splits=2)
#p次k折交叉验证
kf = RepeatedKFold(n_splits=2,n_repeats=2)
for train_index,test_index in kf.split(X):
    X_train,X_test = X[train_index],X[test_index]
    y_train,y_test = y[train_index],y[test_index]

2.1.3 自助法

2.2 性能度量

2.2.1 均方误差

import numpy as np
from sklearn import metrics
#均方误差
y_true = np.array([1.0, 5.0, 4.0, 3.0, 2.0, 5.0, -3.0])
y_pred = np.array([1.0, 4.5, 3.5, 5.0, 8.0, 4.5, 1.0])
print(metrics.mean_squared_error(y_true,y_pred))

2.2.2 均方根误差

#均方根误差
import numpy as np
from sklearn import metrics
y_true = np.array([1.0, 5.0, 4.0, 3.0, 2.0, 5.0, -3.0])
y_pred = np.array([1.0, 4.5, 3.5, 5.0, 8.0, 4.5, 1.0])
print(np.sqrt(metrics.mean_squared_error(y_true,y_pred)))

2.2.3 平均绝对误差

import numpy as np
from sklearn import metrics
#平均绝对误差
y_true = np.array([1.0, 5.0, 4.0, 3.0, 2.0, 5.0, -3.0])
y_pred = np.array([1.0, 4.5, 3.5, 5.0, 8.0, 4.5, 1.0])
print(metrics.mean_absolute_error(y_true,y_pred))

2.3 准确率

import numpy as np
from sklearn.metrics import accuracy_score

y_pred = [0,2,1,3,4]
y_true = [0,1,2,3,4]
print(accuracy_score(y_true,y_pred))

2.4 查准率 查全率

在这里插入图片描述

#查准率
from sklearn.metrics import precision_score

y_true = [0,1,2,0,1,2]
y_pred = [0,2,1,0,0,1]
print(precision_score(y_true,y_pred,average='macro'))
print(precision_score(y_true,y_pred,average='micro'))
print(precision_score(y_true,y_pred,average='weighter'))
print(precision_score(y_true,y_pred,average=None))


2.5 PR图 ROC曲线

根据学习器的预测结果按正例可能性大小对样例进行排序,并逐个把样本作为正例进行预测,或根据预测值,采取不同的截断点,这样截断点就将样本分为两部分,前一部分为正例,后一部分为反例,然后计算每个截断点的P,R,fpr,trp, 根据这么多点,画出曲线

第三章 线性模型

3.1线性模型回归

手动计算梯度下降,参数调整

import torch
import matplotlib.pyplot as plt
def Produce_X(x):
    x0 = torch.ones(x.numpy().size)
    x = torch.stack((x,x0),dim=1)
    return x
x = torch.Tensor([85,100,120,125,150])
y = torch.Tensor([250.93,293.97,366.56,400.10,471.72])
X = Produce_X(x)
w = torch.rand(2,requires_grad=True)

inputs = X
print("inputs",inputs)
target = y
#训练
def train(epochs=1,learning_rate=0.01):
    for epoch in range(epochs):
        #前向传播
        output = inputs.mv(w)
        print("output",output)
        loss = (output - target).pow(2).sum()
        # 反向传播
        loss.backward()
        w.data -= learning_rate * w.grad
        w.grad.zero_()

        if epoch % 80 == 0:
            draw(output,loss)
    return w,loss
# 画图部分不用看
def draw(output,loss):
    plt.cla()
    plt.scatter(x.numpy(),y.numpy())
    plt.plot(x.numpy(),output.data.numpy(),'r-',lw=5)
    plt.text(0.5,0,'Loss=%s'%(loss.item()),fontdict={'size':20,'color':'red'})
    plt.pause(0.005)

W,loss = train(10000,learning_rate = 1e-6)
print(W)
print("final loss:",loss.item())
print("weigts:",w.data)

3.2 对数几率回归(LogisticRegression)

from sklearn import datasets
import numpy as np
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
#加载数据
iris = datasets.load_iris()
iris_x = iris.data
iris_y = iris.target
#分割数据
x_train,x_test,y_train,y_test = train_test_split(iris_x,iris_y,test_size=0.3,random_state=0)
print(y_train)
#创建对率回归模型
classifier = LogisticRegression(solver='lbfgs',multi_class='auto')
#训练模型
classifier.fit(x_train,y_train)
#测试模型
y_predict = classifier.predict(x_test)
print(accuracy_score(y_test,y_predict))

第五章 神经网络

5.1 感知机前向传播

import torch
import torch.nn as nn
#感知机定义
class Perception(nn.Module):
	#初始化网络
    def __init__(self,in_dim,hid_dim,out_dim):
        #初始化父类
        super(Perception,self).__init__()
        #创建网络层
        self.layer = nn.Sequential(
            #线性层 输入为in个  输出为hid个
            nn.Linear(in_dim,hid_dim),
            #sigmoid 激活函数
            nn.Sigmoid(),
            #线性层 输入为in个  输出为hid个
            nn.Linear(hid_dim,out_dim),
            #sigmoid 激活函数
            nn.Sigmoid()
        )
    #网络前向传播
    def forward(self,x):
        y = self.layer(x)
        return y
#创建对象
perception = Perception(2,3,2)
print(perception)

data = torch.randn(4,2)
print(data)
output = perception(data)
print(output)

5.2 多层感知机优化

import torch
from torch import nn
from torch import optim
class MLP(nn.Module):
    def __init__(self,in_dim,hid_dim1,hid_dim2,out_dim):
        super(MLP,self).__init__()
        self.layer = nn.Sequential(
            nn.Linear(in_dim,hid_dim1),
            nn.ReLU(),
            nn.Linear(hid_dim1,hid_dim2),
            nn.ReLU(),
            nn.Linear(hid_dim2,out_dim),
            nn.ReLU(0)
        )
    def forward(self,x):
        x = self.layer(x)
        return
#初始化网络
model = MLP(28*28,300,200,10)
data = torch.randn(10,28*28)
output = model(data)
label  = torch.Tensor([1,0,4,7,9,3,4,5,3,2]).long()
print(label)
#创建SGD随机梯度 优化器
optimizer = optim.SGD(params=model.parameters(),lr=0.01)
#创建交叉熵损失函数
criterion = nn.CrossEntropyLoss()
#计算损失
loss = criterion(output,label)
#梯度清零
o = optimizer.zero_grad()
#反向传播 计算梯度
l = loss.backward()
# 反向传播
ol = optimizer.step()

第六章 支持向量机

6.1 支持向量分类(SVC)

import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
X = np.array([[-1,-1],[-2,-1],[1,1],[2,1]])
y = np.array([1,1,2,2])
# kenel 核函数
clf = make_pipeline(StandardScaler(),SVC(kernel='rbf',gamma='auto'))
#训练数据
clf.fit(X,y)
print(clf.predict([[-0.8,-1],[0.8,1]]))

第七章 卷积神经网络

7.1 卷积神经网络

import torch.nn as nn
import torch
import torch.optim as optim
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN,self).__init__()
		#第一层
        self.layer1 = nn.Sequential(
            # 卷积网络 输入通道3 输出通道32
            nn.Conv2d(3, 32, 3, 1, 1),
            # 激活函数ReLU
            nn.ReLU(True),
            # 池化层 2*2
            nn.MaxPool2d(2, 2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, 3, 1, 1),
            nn.ReLU(True),
            nn.MaxPool2d(2, 2)
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, 3, 1, 1),
            nn.ReLU(True),
            nn.MaxPool2d(2, 2)
        )
        self.layer4 = nn.Sequential(
            nn.Linear(1152, 512),
            nn.ReLU(True),
            nn.Linear(512, 128),
            nn.ReLU(True),
            nn.Linear(128, 10)
        )
    def forward(self, x):
        conv1 = self.layer1(x)
        conv2 = self.layer2(conv1)
        conv3 = self.layer3(conv2)
        input = conv3.view(conv3.size(0), -1)
        print(input.size())
        out = self.layer4(input)
        return out

model = SimpleCNN()
#数据集 [图像个数,图像通道数,图像高度,图像宽度]
data = torch.randn(10, 3, 28, 28)
#标签
label = torch.tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

#数据传给模型的forward
output = model(data)
#计算损失
loss = criterion(output, label)
# 梯度清零
optimizer.zero_grad()
# 计算梯度
loss.backward()
# 反向传播
optimizer.step()

print("损失为:")
print(loss)

  • 1
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 4
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值