Pytorch1全连接网络

import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import torch
import torch.nn as nn
from torch.optim import SGD,Adam
from torchviz import make_dot
import torch.utils.data as Data
import hiddenlayer as hl
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.preprocessing import StandardScaler,MinMaxScaler

下载数据集到本地,spambase.data
https://archive.ics.uci.edu/ml/machine-learning-databases/spambase/

#使用pandans读入数据,header=None时,即指明原始文件数据没有列索引,这样read_csv为自动加上列索引,除非你给定列索引的名字。
spam=pd.read_csv("D:\design_cov\spambase.data",header=None)
#查看数据
spam.head()
0123456789...48495051525354555657
00.000.640.640.00.320.000.000.000.000.00...0.000.0000.00.7780.0000.0003.756612781
10.210.280.500.00.140.280.210.070.000.94...0.000.1320.00.3720.1800.0485.11410110281
20.060.000.710.01.230.190.190.120.640.25...0.010.1430.00.2760.1840.0109.82148522591
30.000.000.000.00.630.000.310.630.310.63...0.000.1370.00.1370.0000.0003.537401911
40.000.000.000.00.630.000.310.630.310.63...0.000.1350.00.1350.0000.0003.537401911

5 rows × 58 columns

#iloc[   :  ,  : ] 前面的冒号就是取行数,后面的冒号是取列数
x=spam.iloc[:,0:57].values#取出所有行的1-56列,即特征列
y=spam.iloc[:,57].values#取出目标列,即target
print(x)
print(len(x),len(x[0]))
print(y)
print(len(y))
[[0.000e+00 6.400e-01 6.400e-01 ... 3.756e+00 6.100e+01 2.780e+02]
 [2.100e-01 2.800e-01 5.000e-01 ... 5.114e+00 1.010e+02 1.028e+03]
 [6.000e-02 0.000e+00 7.100e-01 ... 9.821e+00 4.850e+02 2.259e+03]
 ...
 [3.000e-01 0.000e+00 3.000e-01 ... 1.404e+00 6.000e+00 1.180e+02]
 [9.600e-01 0.000e+00 0.000e+00 ... 1.147e+00 5.000e+00 7.800e+01]
 [0.000e+00 0.000e+00 6.500e-01 ... 1.250e+00 5.000e+00 4.000e+01]]
4601 57
[1 1 1 ... 0 0 0]
4601
## train_test_split()函数是用来随机划分样本数据为训练集和测试集的,当然也可以人为的切片划分。
# 优点:随机客观的划分数据,减少人为因素
# train_X,test_X,train_y,test_y = train_test_split(train_data,train_target,test_size=0.3,random_state=123)
#
# 参数解释:
# train_data:待划分样本数据
# train_target:待划分样本数据的结果(标签)
# test_size:测试数据占样本数据的比例,若整数则样本数量
# random_state:设置随机数种子,保证每次都是同一个随机数。若为0或不填,则每次得到数据都不一样
train_x,test_x,train_y,test_y=train_test_split(x,y,test_size=0.25,random_state=123)
print(len(train_x))#行
print(len(train_x[0]))#列
3450
57

机器学习算法实践中,我们往往有着将不同规格的数据转换到同一规格,或将不同分布的数据转换到某个特定分布的需求,这种需求统称为将数据“无量纲化”。
在距离类模型,譬如K近邻,KMeans聚类中,无量纲化可以帮我们提升模型精度,避免某一个取值范围特别大的特征对距离计算造成影响。
一个特例是决策树和树的集成算法们,对决策树我们不需要无量纲化,决策树可以把任意数据都处理得很好。
数据的无量纲化可以是线性的,也可以是非线性的。线性的无量纲化包括中心化(Zero-centered或者Mean-subtraction)处理和缩放处理(Scale)。中心化的本质是让所有记录减去一个固定值,即让数据样本数据平移到某个位置。缩放的本质是通过除以一个固定值,将数据固定在某个范围之中,取对数也算是一种缩放处理。

preprocessing.MinMaxScaler

当数据(x)按照最小值中心化后,再按极差(最大值 - 最小值)缩放,数据移动了最小值个单位,并且会被收敛到[0,1]之间,而这个过程,就叫做数据归一化(Normalization,又称Min-Max Scaling)。

scales的集中预处理方法
fit(): Method calculates the parameters μ and σ and saves them as internal objects.
解释:简单来说,就是求得训练集X的均值,方差,最大值,最小值,这些训练集X固有的属性。
transform(): Method using these calculated parameters apply the transformation to a particular dataset.
解释:在fit的基础上,进行标准化,降维,归一化等操作(看具体用的是哪个工具,如PCA,StandardScaler等)。
fit_transform(): joins the fit() and transform() method for transformation of dataset.
解释:fit_transform是fit和transform的组合,既包括了训练又包含了转换。
transform()和fit_transform()二者的功能都是对数据进行某种统一处理(比如标准化~N(0,1),将数据缩放(映射)到某个固定区间,归一化,正则化等)

#对数据进行归一化,数据归一化MinMaxScaler
scales=MinMaxScaler(feature_range=(0,1))
train_x=scales.fit_transform(train_x)
test_x=scales.transform(test_x)
train_x
array([[0.        , 0.        , 0.        , ..., 0.00054471, 0.00030036,
        0.00044192],
       [0.        , 0.        , 0.0745098 , ..., 0.00177304, 0.00330396,
        0.00795455],
       [0.        , 0.        , 0.04901961, ..., 0.00312846, 0.00280336,
        0.01313131],
       ...,
       [0.18527919, 0.        , 0.07058824, ..., 0.00245574, 0.00530637,
        0.03011364],
       [0.        , 0.        , 0.        , ..., 0.00131094, 0.00750901,
        0.01243687],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.00025253]])
test_x
array([[0.        , 0.        , 0.        , ..., 0.00059464, 0.00070084,
        0.00296717],
       [0.        , 0.        , 0.        , ..., 0.00236042, 0.0015018 ,
        0.00448232],
       [0.        , 0.        , 0.        , ..., 0.00077167, 0.00140168,
        0.00227273],
       ...,
       [0.        , 0.        , 0.        , ..., 0.00085066, 0.00070084,
        0.00189394],
       [0.        , 0.06582633, 0.18431373, ..., 0.02315025, 0.02442932,
        0.02001263],
       [0.        , 0.03221289, 0.09019608, ..., 0.00104948, 0.00120144,
        0.00688131]])
# 将数据转换为张量
x_train_nots=torch.from_numpy(train_x.astype(np.float32))
y_train=torch.from_numpy(train_y.astype(np.int64))
x_test_nots=torch.from_numpy(test_x.astype(np.float32))
y_test=torch.from_numpy(test_y.astype(np.int64))
#通过TensorDataset将数据组装起来
train_data=Data.TensorDataset(x_train_nots,y_train)
print(x_train_nots)
print(x_train_nots.size())
print(y_train.size())
print(x_test_nots.size())
tensor([[0.0000, 0.0000, 0.0000,  ..., 0.0005, 0.0003, 0.0004],
        [0.0000, 0.0000, 0.0745,  ..., 0.0018, 0.0033, 0.0080],
        [0.0000, 0.0000, 0.0490,  ..., 0.0031, 0.0028, 0.0131],
        ...,
        [0.1853, 0.0000, 0.0706,  ..., 0.0025, 0.0053, 0.0301],
        [0.0000, 0.0000, 0.0000,  ..., 0.0013, 0.0075, 0.0124],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0003]])
torch.Size([3450, 57])
torch.Size([3450])
torch.Size([1151, 57])
train_note_loader=Data.DataLoader(
    dataset=train_data,#数据源
    batch_size=64,#数据源切分成若干组,每组的数量
    shuffle=True,#是否打乱然后分组
    num_workers=0# num_workers是加载数据(batch)的线程数目
)
class MLPclassifica(nn.Module):
    def __init__(self):
        super(MLPclassifica, self).__init__()
        self.hidden1=nn.Sequential(
            nn.Linear(57,30,True),
            nn.ReLU()
        )
        self.hidden2=nn.Sequential(
            nn.Linear(30,10),
            nn.ReLU()
        )
        self.classifica=nn.Sequential(
            nn.Linear(10,2),#最后2个以便使用CrossEntropyLoss
            nn.Sigmoid()
        )

    def forward(self,x):
        fc1=self.hidden1(x)
        fc2=self.hidden2(fc1)
        output=self.classifica(fc2)
        return output

mlpc=MLPclassifica()
optimizer=Adam(mlpc.parameters(),lr=0.01)
loss_func=nn.CrossEntropyLoss()#二分类损失函数
history1=hl.History()
canvas1=hl.Canvas()
print_step=25
mlpc
MLPclassifica(
  (hidden1): Sequential(
    (0): Linear(in_features=57, out_features=30, bias=True)
    (1): ReLU()
  )
  (hidden2): Sequential(
    (0): Linear(in_features=30, out_features=10, bias=True)
    (1): ReLU()
  )
  (classifica): Sequential(
    (0): Linear(in_features=10, out_features=2, bias=True)
    (1): Sigmoid()
  )
)
x=torch.randn(1,57).requires_grad_(True)
y=mlpc(x)
x.size()
a=torch.Tensor(y)
for step,(b_x,b_y) in enumerate(train_note_loader):
    output=mlpc(b_x)
#     print(step,b_x.size(),b_y.size(),output.size())
    train_loss=loss_func(output,b_y)
#     print(step,train_loss)
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()
    niter=0*len(train_note_loader)+step+1
#         print('niter:-',niter)
#     print(step,train_loss,niter,len(train_note_loader))
    if niter%print_step==0:
        output=mlpc(x_test_nots)
        _, pre_lab = torch.max(output,1)#去每行最大值
#         print(output.size(),pre_lab,len(pre_lab))
        test_accuracy=accuracy_score(y_test,pre_lab)
        print(test_accuracy)
        history1.log(niter,train_loss=train_loss,test_accuracy=test_accuracy)

        with canvas1:
            canvas1.draw_plot(history1['train_loss'])
            canvas1.draw_plot(history1['test_accuracy'])

在这里插入图片描述

for epoch in range(15):
    for step,(b_x,b_y) in enumerate(train_note_loader):
        output=mlpc(b_x)
        train_loss=loss_func(output,b_y)
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        niter=epoch*len(train_note_loader)+step+1
        if niter%print_step==0:
            output=mlpc(x_test_nots)
            _, pre_lab = torch.max(output,1)#去每行最大值
            test_accuracy=accuracy_score(y_test,pre_lab)
            history1.log(niter,train_loss=train_loss,test_accuracy=test_accuracy)
            print('niter:-',niter,test_accuracy)
            with canvas1:
                canvas1.draw_plot(history1['train_loss'])
                canvas1.draw_plot(history1['test_accuracy'])

在这里插入图片描述

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error,mean_absolute_error
from sklearn.datasets import fetch_california_housing
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import SGD,Adam
import torch.utils.data as Data
from matplotlib import pyplot as plt
import seaborn as sns
#使用sklearn的数据集
housedata=fetch_california_housing()
X_train,X_test,Y_train,Y_test=train_test_split(housedata.data,housedata.target,test_size=0.3,random_state=42)
scale=StandardScaler()
train_x=scale.fit_transform(X_train)
test_x=scale.transform(X_test)
# 查看数据:二维表
# 方式一
a=pd.concat([pd.DataFrame(train_x),pd.DataFrame(Y_train)],axis=1)
print(a)
              0         1         2         3         4         5         6  \
0      0.133506  0.509357  0.181060 -0.273850 -0.184117 -0.010825 -0.805682   
1     -0.532218 -0.679873 -0.422630 -0.047868 -0.376191 -0.089316 -1.339473   
2      0.170990 -0.362745  0.073128 -0.242600 -0.611240 -0.044800 -0.496645   
3     -0.402916 -1.155565  0.175848 -0.008560 -0.987495 -0.075230  1.690024   
4     -0.299285  1.857152 -0.259598 -0.070993  0.086015 -0.066357  0.992350   
...         ...       ...       ...       ...       ...       ...       ...   
14443  1.308827  0.509357  0.281603 -0.383849 -0.675265 -0.007030 -0.875918   
14444 -0.434100  0.350793  0.583037  0.383154  0.285105  0.063443 -0.763541   
14445 -0.494787  0.588640 -0.591570 -0.040978  0.287736  0.017201 -0.758858   
14446  0.967171 -1.076283  0.390149 -0.067164  0.306154  0.004821  0.903385   
14447 -0.683202  1.857152 -0.829656 -0.087729  1.044630 -0.081672  0.992350   

              7        0  
0      0.780934  1.93800  
1      1.245270  1.69700  
2     -0.277552  2.59800  
3     -0.706938  1.36100  
4     -1.430902  5.00001  
...         ...      ...  
14443  0.810891  2.29200  
14444  1.075513  0.97800  
14445  0.601191  2.22100  
14446 -1.186252  2.83500  
14447 -1.415923  3.25000  

[14448 rows x 9 columns]
# 方式二
# housedatadf=pd.DataFrame(data=train_x)
housedatadf=pd.DataFrame(data=train_x,columns=housedata.feature_names)
housedatadf['target']=Y_train
print(housedatadf)
         MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  \
0      0.133506  0.509357  0.181060  -0.273850   -0.184117 -0.010825   
1     -0.532218 -0.679873 -0.422630  -0.047868   -0.376191 -0.089316   
2      0.170990 -0.362745  0.073128  -0.242600   -0.611240 -0.044800   
3     -0.402916 -1.155565  0.175848  -0.008560   -0.987495 -0.075230   
4     -0.299285  1.857152 -0.259598  -0.070993    0.086015 -0.066357   
...         ...       ...       ...        ...         ...       ...   
14443  1.308827  0.509357  0.281603  -0.383849   -0.675265 -0.007030   
14444 -0.434100  0.350793  0.583037   0.383154    0.285105  0.063443   
14445 -0.494787  0.588640 -0.591570  -0.040978    0.287736  0.017201   
14446  0.967171 -1.076283  0.390149  -0.067164    0.306154  0.004821   
14447 -0.683202  1.857152 -0.829656  -0.087729    1.044630 -0.081672   

       Latitude  Longitude   target  
0     -0.805682   0.780934  1.93800  
1     -1.339473   1.245270  1.69700  
2     -0.496645  -0.277552  2.59800  
3      1.690024  -0.706938  1.36100  
4      0.992350  -1.430902  5.00001  
...         ...        ...      ...  
14443 -0.875918   0.810891  2.29200  
14444 -0.763541   1.075513  0.97800  
14445 -0.758858   0.601191  2.22100  
14446  0.903385  -1.186252  2.83500  
14447  0.992350  -1.415923  3.25000  

[14448 rows x 9 columns]
# 查看特征相关性热力图
datacor=np.corrcoef(housedatadf.values,rowvar=0)
datacor=pd.DataFrame(data=datacor,columns=housedatadf.columns,index=housedatadf.columns)
plt.figure(figsize=(8,6))
ax=sns.heatmap(datacor,square=True,annot=True,fmt=".3f",linewidths=.5,cmap="YlGnBu")
plt.show()

在这里插入图片描述

train_xt=torch.from_numpy(train_x.astype(np.float32))
train_yt=torch.from_numpy(Y_train.astype(np.float32))
test_xt=torch.from_numpy(test_x.astype(np.float32))
test_yt=torch.from_numpy(Y_test.astype(np.float32))
train_data=Data.TensorDataset(train_xt,train_yt)
test_data=Data.TensorDataset(test_xt,test_yt)
train_loader=Data.DataLoader(
    dataset=train_data,
    batch_size=64,
    shuffle=True,
    num_workers=0
)
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.hidden1=nn.Linear(8,100,bias=True)
        self.hidden2=nn.Linear(100,100)
        self.hidden3=nn.Linear(100,50)
        self.predict=nn.Linear(50,1)

    def forward(self,x):
        x=F.relu(self.hidden1(x))
        x=F.relu(self.hidden2(x))
        x=F.relu(self.hidden3(x))
        output=self.predict(x)
        return output
mlpreg=MLP()
print(mlpreg)
MLP(
  (hidden1): Linear(in_features=8, out_features=100, bias=True)
  (hidden2): Linear(in_features=100, out_features=100, bias=True)
  (hidden3): Linear(in_features=100, out_features=50, bias=True)
  (predict): Linear(in_features=50, out_features=1, bias=True)
)
optimizer=SGD(mlpreg.parameters(),lr=0.01)
loss_func=nn.MSELoss()
train_loss_all=[]
for epoch in range(30):
    train_loss=0
    train_num=0
    for step,(b_x,b_y) in enumerate(train_loader):
        output=mlpreg(b_x)
        loss=loss_func(output,b_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss+=loss.item()*b_x.size(0)
        train_num+=b_x.size(0)
        # 收集所有的loss/num
        print(epoch,loss.item(),b_x.size(0))
    train_loss_all.append(train_loss/train_num)

0 1.2354446649551392 64
0 1.1876583099365234 64
0 1.2846877574920654 64  省略
plt.figure(figsize=(10,6))
plt.plot(train_loss_all,'ro-',label="Train loss")
plt.legend()
plt.grid()
plt.xlabel("epoch")
plt.ylabel("loss")
plt.show()

在这里插入图片描述

pre_y=mlpreg(test_xt)
pre_y=pre_y.data.numpy()
mae=mean_absolute_error(Y_test,pre_y)
print("mae:",mae)
mae: 0.8968341607985509
index=np.argsort(Y_test)
plt.figure(figsize=(12,5))
plt.plot(np.arange(len(Y_test)),Y_test[index],'r',label="Original Y")
plt.scatter(np.arange(len(pre_y)),pre_y[index],s=3,c="b",label="Prediction")
plt.legend("upper left")
plt.grid()
plt.xlabel("Index")
plt.ylabel("Y")
plt.show()

在这里插入图片描述

  • 2
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值