ref:https://raw.githubusercontent.com/justinge/pic-go-for-xbotgo/master/credit-a.csv
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = pd.read_csv("/home/data_for_ai_justin/01learn/dataset/credit-a.csv",header=None)
data
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
0 0 30.83 0.000 0 0 9 0 1.25 0 0 1 1 0 202 0.0 -1
1 1 58.67 4.460 0 0 8 1 3.04 0 0 6 1 0 43 560.0 -1
2 1 24.50 0.500 0 0 8 1 1.50 0 1 0 1 0 280 824.0 -1
3 0 27.83 1.540 0 0 9 0 3.75 0 0 5 0 0 100 3.0 -1
4 0 20.17 5.625 0 0 9 0 1.71 0 1 0 1 2 120 0.0 -1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
648 0 21.08 10.085 1 1 11 1 1.25 1 1 0 1 0 260 0.0 1
649 1 22.67 0.750 0 0 0 0 2.00 1 0 2 0 0 200 394.0 1
650 1 25.25 13.500 1 1 13 7 2.00 1 0 1 0 0 200 1.0 1
651 0 17.92 0.205 0 0 12 0 0.04 1 1 0 1 0 280 750.0 1
652 0 35.00 3.375 0 0 0 1 8.29 1 1 0 0 0 0 0.0 1
653 rows × 16 columns
# 注意这种取值的方式
X = data.iloc[:,:-1]
Y= data.iloc[:,-1]
Y.unique()
# 但其实我们预期是使用0,1; -1和1是支持向量机的写法
Y = data.iloc[:,-1].replace(-1,0)
Y.unique()
# array([0, 1])
X=torch.from_numpy(X.values).type(torch.FloatTensor)
X.shape
# data.iloc[:,-1:].values.shape 用这种方式也是可以的
Y=torch.from_numpy(Y.values.reshape(-1,1)).type(torch.FloatTensor)
Y.shape
from torch import nn
model = nn.Sequential(
nn.Linear(15,1),
nn.Sigmoid()
)
model
Sequential(
(0): Linear(in_features=15, out_features=1, bias=True)
(1): Sigmoid()
)
# 交叉熵损失是二元的损失
loss_fn = nn.BCELoss()
loss_fn
# 优化方法
opt = torch.optim.Adam(model.parameters(), lr=0.001)
batch_size = 64
epoches = 1000
no_batch = X.size(0) // 64
no_batch
for epoch in range(epoches):
total_loss = 0
acc = 0
for i in range(no_batch):
start = i * batch_size
end = (i+1) * batch_size
x = X[start:end]
y = Y[start:end]
y_pred = model(x)
loss = loss_fn(y_pred, y)
total_loss += loss.data.item()
opt.zero_grad()
loss.backward()
opt.step()
y_pred - torch.argmax(y_pred,dim=1)
acc += (y_pred==y).float().mean()
print(epoch,"loss",total_loss)
model.state_dict()
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
0 0 30.83 0.000 0 0 9 0 1.25 0 0 1 1 0 202 0.0 -1
1 1 58.67 4.460 0 0 8 1 3.04 0 0 6 1 0 43 560.0 -1
2 1 24.50 0.500 0 0 8 1 1.50 0 1 0 1 0 280 824.0 -1
3 0 27.83 1.540 0 0 9 0 3.75 0 0 5 0 0 100 3.0 -1
4 0 20.17 5.625 0 0 9 0 1.71 0 1 0 1 2 120 0.0 -1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
648 0 21.08 10.085 1 1 11 1 1.25 1 1 0 1 0 260 0.0 1
649 1 22.67 0.750 0 0 0 0 2.00 1 0 2 0 0 200 394.0 1
650 1 25.25 13.500 1 1 13 7 2.00 1 0 1 0 0 200 1.0 1
651 0 17.92 0.205 0 0 12 0 0.04 1 1 0 1 0 280 750.0 1
652 0 35.00 3.375 0 0 0 1 8.29 1 1 0 0 0 0 0.0 1
653 rows × 16 columns
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 653 entries, 0 to 652
Data columns (total 16 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 0 653 non-null int64
1 1 653 non-null float64
2 2 653 non-null float64
3 3 653 non-null int64
4 4 653 non-null int64
5 5 653 non-null int64
6 6 653 non-null int64
7 7 653 non-null float64
8 8 653 non-null int64
9 9 653 non-null int64
10 10 653 non-null int64
11 11 653 non-null int64
12 12 653 non-null int64
13 13 653 non-null int64
14 14 653 non-null float64
15 15 653 non-null int64
dtypes: float64(4), int64(12)
memory usage: 81.8 KB
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
0 0 30.83 0.000 0 0 9 0 1.25 0 0 1 1 0 202 0.0
1 1 58.67 4.460 0 0 8 1 3.04 0 0 6 1 0 43 560.0
2 1 24.50 0.500 0 0 8 1 1.50 0 1 0 1 0 280 824.0
3 0 27.83 1.540 0 0 9 0 3.75 0 0 5 0 0 100 3.0
4 0 20.17 5.625 0 0 9 0 1.71 0 1 0 1 2 120 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
648 0 21.08 10.085 1 1 11 1 1.25 1 1 0 1 0 260 0.0
649 1 22.67 0.750 0 0 0 0 2.00 1 0 2 0 0 200 394.0
650 1 25.25 13.500 1 1 13 7 2.00 1 0 1 0 0 200 1.0
651 0 17.92 0.205 0 0 12 0 0.04 1 1 0 1 0 280 750.0
652 0 35.00 3.375 0 0 0 1 8.29 1 1 0 0 0 0 0.0
653 rows × 15 columns
array([-1, 1])
array([0, 1])
pandas.core.series.Series
torch.Size([653, 15])
torch.Size([653, 1])
Sequential(
(0): Linear(in_features=15, out_features=1, bias=True)
(1): Sigmoid()
)
BCELoss()
10
0 loss 3.319192185997963
1 loss 3.319184333086014
2 loss 3.3191766887903214
3 loss 3.3191688805818558
4 loss 3.319161057472229
5 loss 3.319153569638729
6 loss 3.3191456496715546
7 loss 3.319137752056122
8 loss 3.319130063056946
9 loss 3.3191223815083504
10 loss 3.319114476442337
11 loss 3.3191069066524506
12 loss 3.3190989196300507
13 loss 3.319091595709324
14 loss 3.319083906710148
15 loss 3.319075882434845
16 loss 3.319068267941475
17 loss 3.3190606459975243
18 loss 3.3190529569983482
19 loss 3.3190452307462692
20 loss 3.3190374821424484
21 loss 3.319029949605465
22 loss 3.319022014737129
23 loss 3.3190146163105965
24 loss 3.319006569683552
...
996 loss 3.3130557611584663
997 loss 3.3130510598421097
998 loss 3.3130463287234306
999 loss 3.313041388988495
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...
OrderedDict([('0.weight',
tensor([[-1.1939e-01, -3.8938e-03, 1.4590e-02, 3.6475e-01, 9.9049e-02,
-7.4199e-03, 1.6277e-01, -1.2790e-01, 3.5805e+00, 3.5779e-01,
-1.4424e-01, -2.7521e-01, -5.7277e-02, 1.1400e-03, -1.9910e-04]])),
('0.bias', tensor([-0.7137]))])
((model(X).data.numpy()>0.5).astype('int') == Y.numpy()).mean()
0.8683001531393568
这一行要好好琢磨一下,是啥意思!!!