# Pytorch实现Logistic Regression

-0.017612 14.053064 0
-1.395634 4.662541 1
-0.752157 6.538620 0
-1.322371 7.152853 0
0.423363 11.054677 0
0.406704 7.067335 1
0.667394 12.741452 0
-2.460150 6.866805 1
0.569411 9.548755 0
-0.026632 10.427743 0
0.850433 6.920334 1
1.347183 13.175500 0
1.176813 3.167020 1

def loadData():
fr = open('testSet.txt')
X = []
y = []
for line in fr.readlines():
lineArr = line.strip().split()
X.append([float(lineArr[0]), float(lineArr[1])])
y.append(int(lineArr[2]))
X, y = loadData()
X = np.array(X)
y = np.array(y)
# 转为tensor类型
X = torch.from_numpy(X)
y = torch.from_numpy(y)
return X, y


X = torch.Tensor(X)


class Logistic(nn.Module):
def __init__(self, input_size, hidden_size):
super(Logistic, self).__init__()
self.layer = nn.Linear(input_size, hidden_size)
self.sigmoid = nn.Sigmoid()

def forward(self, X):
out = self.layer(X)
out = self.sigmoid(out)
return out


def main():
X, y = loadData()
# print(y)
# 一定要将y的shape转化一下
y = y.reshape([100, 1])
#print(X)
#print(y)
# m为数据个数，n为特征数即input_size
m, n = X.shape
h_size = 1
# 模型为model
model = Logistic(n, h_size)
# 损失函数
criterion = nn.MSELoss()
# 优化器
optimizer = optim.SGD(model.parameters(), lr = 0.01)

for t in range(10000):
# 通过模型生成y_pred
y_pred = model(X)
#print(y_pred)
# 如果不转换为float，会出ValueError: only one element tensors can be converted to Python scalars
loss = criterion(y.float(), y_pred.float())
# 统计正确预测的个数
correct = (mask == y.float()).sum()
accuracy = correct.item()/X.size(0)
if t % 100 == 99:
print("第{}次损失为{}，正确率为{}".format(t, loss.item(), accuracy))
# 把梯度置零（必须要）
# 求loss
loss.backward()
# 反向传播求step
optimizer.step()


def plot(weights, bias):
# 加载数据集
dataMat, labelMat = loadData()
# 转换成numpy的array数组
dataArr = np.array(dataMat)
#print(dataArr)
# 数据个数
# 例如建立一个4*2的矩阵c，c.shape[1]为第一维的长度2， c.shape[0]为第二维的长度4
n = np.shape(dataMat)[0]
# 正样本
xcord1 = []
ycord1 = []
# 负样本
xcord2 = []
ycord2 = []
# 根据数据集标签进行分类
for i in range(n):
if int(labelMat[i]) == 1:
# 1为正样本
xcord1.append(dataArr[i, 0])
ycord1.append(dataArr[i, 1])
else:
# 0为负样本
xcord2.append(dataArr[i, 0])
ycord2.append(dataArr[i, 1])
# 新建图框
fig = plt.figure()
# 添加subplot
# 绘制正样本
ax.scatter(xcord1, ycord1, s=20, c='red', marker='s', alpha=.5)
# 绘制负样本
ax.scatter(xcord2, ycord2, s=20, c='green', alpha=.5)
# x轴坐标
x = np.arange(-3.0, 3.0, 0.1)
# w1*x1 + w2*x2 + b = 0
# x1 = x, x2 = y
y = (- bias - weights[0][0] * x ) / weights[0][1]
ax.plot(x, y)
# 绘制title
plt.title('BestFit')
# 绘制label
plt.xlabel('x1')
plt.ylabel('y2')
# 显示
plt.show()


# 取出线性层参数
params=model.state_dict()
weights = params['layer.weight']
bias = params['layer.bias']
#print(weights.numpy())
#print(bias.numpy()[0])
plot(weights.numpy(), bias.numpy()[0])


