机器学习-了解逻辑回归的逻辑过程

最新推荐文章于 2021-03-10 20:19:26 发布

轻小說控

最新推荐文章于 2021-03-10 20:19:26 发布

阅读量562

点赞数

分类专栏： python 机器学习文章标签： python 机器学习逻辑回归

本文链接：https://blog.csdn.net/Aqours/article/details/107500170

版权

python 同时被 2 个专栏收录

6 篇文章 0 订阅

订阅专栏

机器学习

3 篇文章 0 订阅

订阅专栏

机器学习-逻辑回归

预测乳腺癌案例

import numpy as np
import pandas as pd
# 机器学习
import sklearn
# 逻辑回归
from sklearn.linear_model import LogisticRegression
# 切割训练集和测试集
from sklearn.model_selection import train_test_split
# 画图工具
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = ['simHei']
mpl.rcParams['axes.unicode_minus'] = False


names = ['id',
          'Clump Thickness', # 肿块厚度
          'Uniformity of Cell Size',# 电池尺寸均匀性
          'Uniformity of Cell Shape',# 细胞形状均匀性
          'Marginal Adhesion',# 边缘粘连
          'Single Epithelial Cell Size',# 单个上皮细胞大小
          'Bare Nuclei',# 裸核
          'Bland Chromatin',# 乏味染色体
          'Normal Nucleoli',# 正常核
          'Mitoses',# 游戏分裂
          'Class']
# 读取数据
data = pd.read_csv('./data/乳腺癌分类/breast-cancer-wisconsin.data', names=names)

# value_counts()可以查看 有多少个不同的数字以及重复的数字有多少个
# 通过这个方法可以确定有多少列别，从而确定是几分类  data['Class'].value_counts()

# data.info()可以查看所有列的数据类型，当有一个数据类型是object时，说明这里面有空缺值
# print(data.info())

# 缺失值的替换,第一种：填0(不建议填0)
# data = data.replace('?', 0)
# 第二种 把包含?的一行数据删除掉
data = data.replace('?', np.nan).dropna(how='any')

# 获得X,iloc通过索引操作数据，先行再列  id那一列与得病无关所以将id这一列不归为数据集中
X = data.iloc[:, 1:10]
# 获得Y
Y = data.iloc[:, -1:]
# 分割训练集和测试集
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
# 创建逻辑回归
lr = LogisticRegression()
# 给予 训练集和测试
result = lr.fit(X_train, Y_train)

# 查看准确率
print("训练集合上的准确率", result.score(X_train, Y_train))
print("测试集合上的准确率", result.score(X_test, Y_test))

# 查看预测值
y_hat = lr.predict(X_test)
# 查看真实值 将DataFrame转换为Ndarray并拉平
Y_ = Y_test.values.reshape(-1)
# 对比预测值和真实值的差别
print(y_hat == Y_)
# [ True  True  True  True  True  True  True  True  True  True  True  True
#   True  True  True  True  True  True  True  True  True  True False  True
#   True  True  True  True]
Y_arr = (y_hat == Y_)
# 声明一个ndarray
Y_arr = np.array(Y_arr, dtype=int)
print(Y_arr) # False：0  True：1
# [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1]
# 求Y_arr得均值
print(np.mean(Y_arr))# 0.9642857142857143

# 画图
x_len = range(len(X_test))
# 创建画布
plt.figure()
# 给Y轴限制
plt.ylim(0, 6)
# plot 表示画折线图 加上ro变成散点图
# 真实数据
plt.plot(x_len, Y_test, 'ro', markersize=8, label='真实值', zorder=3)
# 预测数据
plt.plot(x_len, lr.predict(X_test), 'go', markersize=14, label='预测值', zorder=2)

# 显示label
# plt.legend(loc='upper left')
# plt.show()

# 绘制ROC曲线
from sklearn import metrics
from sklearn.preprocessing import label_binarize
# label_binarize将Y的2-4变为0-1
y_t = label_binarize(Y_test, classes=(2, 4))
print(y_t.reshape(-1))# [0 0 1 1 0 0 0 1 0 0 1 0 1 0 0 0 1 1 1 0 0 0 1 0 1 1 0 0]
Y_score = lr.decision_function(X_test)
# 计算真正例率和假正例率 y_true:真实值 y_score:y的评估分数
fpr, tpr, th = metrics.roc_curve(y_t.reshape(-1), Y_score)
# 计算曲线的面积
auc = metrics.auc(fpr, tpr)

# 创建画布
plt.figure()
# c:color lw:画笔宽度
plt.plot(fpr, tpr, c='r', lw=2)
# 设置横坐标和纵坐标
plt.xlim(-0.01, 1.01)
plt.ylim(-0.01, 1.01)
# 横纵坐标的间隔 从0到1.1 每次间隔0.1
plt.xticks(np.arange(0, 1.1, 0.1))
plt.yticks(np.arange(0, 1.1, 0.1))
# 画一条直线 (0,0) (1,1) ls：线的样式
plt.plot((0, 1), (0, 1), c='#a0a0a0', ls='--')
# 加入坐标的声明
plt.xlabel('假正例率', fontsize=16)
plt.ylabel('真正例率', fontsize=16)
# 画格子
plt.grid(b=True, ls=':')

plt.show()

在这里插入图片描述

泰坦尼克号案例

创建泰坦尼克号GUI.py

import tkinter as tk

# 1 活
# 0 死
# pclass 1是最好的
import tkinter as tk
from Titanic import Titanic

def on_start_button_click():
    sex_input = sex_entry.get()
    age_input = float(age_entry.get())
    bro_input = float(bro_entry.get())
    parents_input = float(parents_entry.get())
    fare_input = float(fare_entry.get())
    p_class_input = float(p_class_entry.get())
    result_str = t.predict(sex_input, age_input, bro_input, parents_input, fare_input, p_class_input)
    # 在文本框中插入str
    result_textarea.delete('1.0', 'end')
    result_textarea.insert('insert', result_str)
    pass

# 创建窗口
root = tk.Tk()
t =Titanic()
# 窗口标题
root.title('泰坦尼克号预测')
# 设置窗口大小 widthxheight+x+y
# x代表乘法  距离屏幕左上角+x+y的位置
# root.geometry('300x600+500+100')

# 标签Label显示 padx 左右编剧  pady上下边距
tk.Label(root, padx=20, pady=10, text='输入下面各个数值，来预测你在泰坦尼克号事故中是否存活下来').grid(row=0, columnspan=2)
tk.Label(root, text='性别（男/女）').grid(row=1, sticky=tk.E)#靠右
tk.Label(root, text='年龄').grid(row=2, sticky=tk.E)
tk.Label(root, text='船上的兄弟姐妹（0）').grid(row=3, sticky=tk.E)
tk.Label(root, text='父母亲（0）').grid(row=4, sticky=tk.E)
tk.Label(root, text='恐惧程度（0-1）').grid(row=5, sticky=tk.E)
tk.Label(root, text='仓位（1-3）').grid(row=6, sticky=tk.E)
tk.Label(root, text='Create By AzurLane').grid(row=7, sticky=tk.W)#靠左

# 输出框控件
sex_entry = tk.Entry(root)
sex_entry.grid(row=1, column=1, padx=10, pady=10)
age_entry = tk.Entry(root)
age_entry.grid(row=2, column=1, padx=10, pady=10)
bro_entry = tk.Entry(root)
bro_entry.grid(row=3, column=1, padx=10, pady=10)
parents_entry = tk.Entry(root)
parents_entry.grid(row=4, column=1, padx=10, pady=10)
fare_entry = tk.Entry(root)# 恐惧
fare_entry.grid(row=5, column=1, padx=10, pady=10)
p_class_entry = tk.Entry(root)# 仓位
p_class_entry .grid(row=6, column=1, padx=10, pady=10)


# 添加图片 rowspan列合并
photo = tk.PhotoImage(file='./data/siki.png')
tk.Label(image=photo).grid(row=1, column=2, rowspan=3, padx=20)
# 输入框
result_textarea = tk.Text(root, height=6, width=30)
result_textarea.grid(row=4, column=2, rowspan=2)
# 按钮控件
tk.Button(root, text='开始预测你的生死', command=on_start_button_click).grid(row=6, column=2, rowspan=2, ipadx=50)
# 显示
root.mainloop()

Titanic.py

# 处理数组
import numpy as np
import pandas as pd
# 机器学习
import sklearn
# 画图
import matplotlib.pyplot as plt
# 逻辑回归
from sklearn.linear_model import LogisticRegression


# 切割数据分为训练集和测试集
def split_data(data, train_size=0.8, random_state=0):
    # sample：随机抽样
    # n:抽取的行数 frac:抽取行比例 replace:是否放回抽取 weight：概率数组 random_state:随机种子 axis:抽取的是行还是lie
    data_upset = data.sample(frac=1, random_state=random_state)# 把原来的数据给打乱

    # 分割x与y
    x = data_upset.iloc[:, 1:]# 第一个参数：选择所有行  第二个参数：从第2行开始选择
    y = data_upset.iloc[:, :1]# 第一个参数：选择所有列  第二个参数：只选择第一列

    # 训练集合大小
    num_train = int(len(data_upset) * train_size)
    # 切割训练集合
    x_train = x.iloc[:num_train, :]
    y_train = y.iloc[:num_train, :]
    # 切割测试集合
    x_test = x.iloc[num_train:, :]
    y_test = y.iloc[num_train:, :]
    return x_train, y_train, x_test, y_test

class Titanic:
    # 构造方法
    def __init__(self):
        data = pd.read_csv('./data/titanic.csv')
        # 在这些数据中name属性与最终结果并无多大关系，把name这一列去掉
        # sex这一列数值是
        names = ['Survived', 'Pclass', 'Sex', 'Age', 'Siblings/Spouses Aboard', 'Parents/Children Aboard', 'Fare']
        data = data[names]

        # fixme 对于sex这一列未使用哑编码时的操作
        # female：1  male：0
        # 如果是male返回0  不是返回1
        # data['Sex'] = np.where(data['Sex'] == 'male', 0, 1)
        # print(data.info())

        # todo 对sex做哑编码操作
        sex_one_hot = pd.get_dummies(data['Sex'], prefix='Sex')
        # todo 对p_class做哑编码操作
        pclass_one_hot = pd.get_dummies(data['Pclass'], prefix='Pclass')
        # 去除sex和Pclass
        names = ['Survived', 'Age', 'Siblings/Spouses Aboard', 'Parents/Children Aboard', 'Fare']
        data = data[names]
        # 拼接DataFrame
        data = pd.concat([data, sex_one_hot, pclass_one_hot], axis=1)
        # todo 对连续型变量做无量纲化
        data['Fare'] = data['Fare'] = data['Fare'].apply(
            lambda x: (x - data['Fare'].min()) / (data['Fare'].max() - data['Fare'].min()))

        # print(data.info())
        # # 查看前五条数据
        # print(data.head(5))

        # 调用切割数据的函数
        x_train, y_train, x_test, y_test = split_data(data)

        # 逻辑回归
        # solver有以下的优化函数  ['liblinear','newton-cg','lbfgs','sag','saga']
        # liblinear:坐标轴下降法 lbfgs:拟牛顿法 newton-cg:牛顿法变种 seg:minibate
        self.lr = LogisticRegression(solver='lbfgs', max_iter=1000000)
        self.result = self.lr.fit(x_train, y_train)
        # 训练集上的准确率
        print('训练集上准确率', self.result.score(x_train, y_train))
        # 测试集上的准确率
        print('测试集上准确率', self.result.score(x_test, y_test))
        pass

    # 预测方法
    def predict(self, sex_input, age_input, bro_input, parents_input, fare_input, p_class_input):
        print(self.lr.coef_)
        print('预测')
        # print(self.lr.predict([[18, 0, 0, 0.2, 0, 1, 0, 0, 1]]))
        # 性别 年龄 兄妹 父母 恐惧程度 仓位
        x = [age_input, bro_input, parents_input, fare_input]
        # 已经对了sex_input和p_class_input进行哑编码操作，所以哑编码操作后生成的列就在fare_input后依次排列
        # 注意插入的顺序 要一一对应上
        if sex_input == '男':
            # extend是直接在后面追加0,1
            x.extend([0, 1])
            if p_class_input == 1:
                x.extend([1, 0, 0])
            elif p_class_input == 2:
                x.extend([0, 1, 1])
            elif p_class_input == 3:
                x.extend([0, 0, 1])
            else:
                print("仓位输入有误")
                return '仓位输入有误，请输入1到3任意数字，1为头等仓，3为经济舱'
        elif sex_input == '女':
            x.extend([1, 0])
            if p_class_input == 1:
                x.extend([1, 0, 0])
            elif p_class_input == 2:
                x.extend([0, 1, 1])
            elif p_class_input == 3:
                x.extend([0, 0, 1])
            else:
                print("仓位输入有误")
                return '仓位输入有误，请输入1到3任意数字，1为头等仓，3为经济舱'
        else:
            print("性别输入有误")
            return '性别输入有误，请输入男或女'
        result = self.lr.predict([x])
        if result == 0:
            result = '死'
        else:
            result = '活'
        return '在这次泰坦尼克号中，你'+result+'了'
    pass

在这里插入图片描述

葡萄酒分类

葡萄酒分类GUI.py

import tkinter as tk
from Winequality import  Winequality

def on_start_button_click():
    fixed_acidity_input = float(fixed_acidity.get())
    volatile_acidity_input = float(volatile_acidity.get())
    citric_acid_input = float(citric_acid.get())
    residual_sugar_input = float(residual_sugar.get())
    chlorides_input = float(chlorides.get())
    free_sulfur_dioxide_input = float(free_sulfur_dioxide.get())
    total_sulfur_dioxide_input= float(total_sulfur_dioxide.get())
    density_input = float(density.get())
    pH_input = float(pH.get())
    sulphates_input = float(sulphates.get())
    alcohol_input = float(alcohol.get())

    # 显示结果
    result = w.perdict(fixed_acidity_input, volatile_acidity_input, citric_acid_input, residual_sugar_input, chlorides_input, free_sulfur_dioxide_input, total_sulfur_dioxide_input, density_input, pH_input, sulphates_input, alcohol_input)
    result_textarea.delete('1.0', 'end')
    result_textarea.insert('insert', "分类的结果为:" + str(result))
    pass

w = Winequality()
# 创建窗口
root = tk.Tk()

root.title('葡萄酒分类质量')
# "";"";"";"";"";"";"";"";"";"";"";"quality"
tk.Label(root, padx=20, pady=10, text='输入下面各个数值').grid(row=0, columnspan=2)
# fixed acidity
tk.Label(root, text='非挥发性酸').grid(row=1, sticky=tk.E)
fixed_acidity = tk.Entry(root)
fixed_acidity.grid(row=1, column=1, padx=10, pady=10)

# volatile acidity
tk.Label(root, text='挥发性酸').grid(row=2, sticky=tk.E)
volatile_acidity = tk.Entry(root)
volatile_acidity.grid(row=2, column=1, padx=10, pady=10)

# citric acid
tk.Label(root, text='柠檬酸').grid(row=3, sticky=tk.E)
citric_acid = tk.Entry(root)
citric_acid.grid(row=3, column=1, padx=10, pady=10)

# residual sugar
tk.Label(root, text='残糖').grid(row=4, sticky=tk.E)
residual_sugar = tk.Entry(root)
residual_sugar.grid(row=4, column=1, padx=10, pady=10)

# chlorides
tk.Label(root, text='氯化物').grid(row=5, sticky=tk.E)
chlorides = tk.Entry(root)
chlorides.grid(row=5, column=1, padx=10, pady=10)

# free sulfur dioxide
tk.Label(root, text='无氯二氧化硫').grid(row=6, sticky=tk.E)
free_sulfur_dioxide = tk.Entry(root)
free_sulfur_dioxide.grid(row=6, column=1, padx=10, pady=10)

# total sulfur dioxide
tk.Label(root, text='二氧化硫总量').grid(row=7, sticky=tk.E)
total_sulfur_dioxide = tk.Entry(root)
total_sulfur_dioxide.grid(row=7, column=1, padx=10, pady=10)

# density
tk.Label(root, text='密度').grid(row=8, sticky=tk.E)
density = tk.Entry(root)
density.grid(row=8, column=1, padx=10, pady=10)

# pH
tk.Label(root, text='酸碱度').grid(row=9, sticky=tk.E)
pH = tk.Entry(root)
pH.grid(row=9, column=1, padx=10, pady=10)

# sulphates
tk.Label(root, text='硫酸盐').grid(row=10, sticky=tk.E)
sulphates = tk.Entry(root)
sulphates.grid(row=10, column=1, padx=10, pady=10)

# alcohol
tk.Label(root, text='酒精').grid(row=11, sticky=tk.E)
alcohol = tk.Entry(root)
alcohol.grid(row=11, column=1, padx=10, pady=10)
tk.Label(root, text='Create By AzurLane').grid(row=12, sticky=tk.W)#靠左

# 显示图片
photo = tk.PhotoImage(file='./data/siki.png')
tk.Label(root, image=photo).grid(row=1, column=2, rowspan=2, padx=10, pady=5)
# 设置提示框
result_textarea = tk.Text(root, height=7, width=40)
result_textarea.grid(row=3, column=2, rowspan=2, padx=10, pady=0)
# 设置button
tk.Button(root, text='开始预测', command=on_start_button_click).grid(row=11, rowspan=2, column=2, ipadx=120)
# 显示
root.mainloop()

Winequality.py

# 处理数组
import numpy as np
import pandas as pd
# 机器学习
import sklearn
# 画图
import matplotlib as mpl
import matplotlib.pyplot as plt
# 逻辑回归
from sklearn.linear_model import LogisticRegression
# fixme 防止出现中文乱码
mpl.rcParams['font.sans-serif'] = ['simHei']
mpl.rcParams['axes.unicode_minus'] = False
from sklearn.preprocessing import label_binarize
from sklearn import metrics


class Winequality:
    # 初始方法 构建逻辑回归
    def __init__(self):
        # csv文件默认为逗号来分割，如果是;来分割的话需要设置sep属性
        red_data = pd.read_csv('./data/winequality-red.csv', sep=';')
        white_data = pd.read_csv('./data/winequality-white.csv', sep=';')
        # 合并两个DataFrame  用concat函数  axis为0代表在下面拼接
        data = pd.concat([red_data, white_data], axis=0)
        # fixme 需要查看数据是否有空缺值或者类型不符合 info不能查看空缺值只能查看数据是否有str。object当心可能会出现str
        # 打开csv文件 用ctrl+F 比如搜索两个,,(这个csv文件以逗号分割)
        # print(data.info())

        # 7分类问题  value_counts()可以查看总共多少个分类
        # 6：2836    5：2138    7：1079  4：216   8：193  3：30  9：5
        print(data['quality'].value_counts())

        # 删除带有NaN
        data = data.dropna(how='any')
        # print(data.head(5))

        # 拿到训练集和测试集
        x_train, y_train, x_test, y_test = self.split_data(data)

        # 画图样本的个数
        self.test_num = len(x_test)
        self.test_y = y_test
        self.test_x = x_test


        # solver可以设置更换特征工程
        self.lr = LogisticRegression(max_iter=10000, solver='sag')
        self.lr.fit(x_train, y_train)

        print('训练集准确率', self.lr.score(x_train, y_train))
        print('测试集', self.lr.score(x_test, y_test))
        # 第一次是 0.545 0.536 明显是欠拟合 需要采取措施提高准确率
        # 更换特征方程为小梯度下降sag后 是0.534 0.528 说明这个模型本身而导致准确率过低
        # 6：2836    5：2138    7：1079  4：216   8：193  3：30  9：5  这是quality的七个分类
        # 我们在上面七个分类中可以明显看出第七个类别数量只有5个，数据量根本不够，这个可能导致本模型准确率偏低
        # todo 总结：训练的时候要求每个类的数据是均衡的数据，这样得出的准确率不会太过于低
        # todo 如果数据不均匀就用到特征工程了
        pass

    # 切割数据
    def split_data(self, data, train_size=0.8, random_state=0):
        # 先打乱数据 随机抽样
        data_upset = data.sample(frac=1, random_state=random_state)

        # 分割x与y
        # 列只要除了最后一列外的全部列
        x = data_upset.iloc[:, :-1]
        # 列只要最后一列
        y = data_upset.iloc[:, -1:]

        # 训练集合大小
        num_train = int(len(data_upset) * train_size)
        # 切割训练集合
        x_train = x.iloc[:num_train, :]
        y_train = y.iloc[:num_train, :]
        # 切割测试集合
        x_test = x.iloc[num_train:, :]
        y_test = y.iloc[num_train:, :]
        return x_train, y_train, x_test, y_test

    def perdict(self, fixed_acidity, volatile_acidity, citric_acid, residual_sugar, chlorides, free_sulfur_dioxide, total_sulfur_dioxide, density, pH, sulphates, alcohol):
        result = self.lr.predict([[fixed_acidity, volatile_acidity, citric_acid, residual_sugar, chlorides, free_sulfur_dioxide, total_sulfur_dioxide, density, pH, sulphates, alcohol]])
        return str(result[0])

    # 可视化
    def visualization(self):
        # 样本的个数
        x_len = range(self.test_num)
        plt.figure()
        # 真实值
        plt.plot(x_len, self.test_y, 'ro', markersize=8, zorder=3)
        # 预测值
        plt.plot(x_len, self.lr.predict(self.test_x), 'go', markersize=14, zorder=2)
        plt.show()
        pass
    # ROC曲线
    def roc(self):
        y_ = label_binarize(self.test_y, classes=(3, 4, 5, 6, 7, 8, 9))
        y_score = self.lr.decision_function(self.test_x)

        fpr, tpr, th = metrics.roc_curve(y_.reshape(-1), y_score.reshape(-1))

        # 面积
        auc = metrics.auc(fpr, tpr)
        print('auc面积', auc)
        plt.figure()
        # roc曲线
        plt.plot(fpr, tpr, c='r', lw=1)
        plt.show()
        pass

# t = Winequality()
# t.roc()

在这里插入图片描述

花分类案例

import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = ['simHei']
mpl.rcParams['axes.unicode_minus'] = False
import matplotlib.pyplot as plt

from sklearn import metrics
from sklearn.preprocessing import label_binarize

# KNN
from sklearn.neighbors import KNeighborsClassifier


# 改变y值
def change_data(x):
    if x == 'Iris-setosa':
        return 0
    elif x == 'Iris-versicolor':
        return 1
    elif x == 'Iris-virginica':
        return 2
    else:
        return -1
    pass

# 切割训练集和测试集
def split_data(data, train_size=0.8, random_state=2):
    data_upset = data.sample(frac=1, random_state=random_state, replace=False)
    # x_train y_train x_test y_test

    # 选择x y
    # 行都要 列选择除了最后一个外的
    x = data_upset.iloc[:, :-1]
    # 行都要 列只要最后一个
    y = data_upset.iloc[:, -1:]

    num = int(len(data_upset) * train_size)
    # 训练集
    x_train = x.iloc[:num, :]
    y_train = y.iloc[:num, :]

    # 测试集
    x_test = x.iloc[num:, :]
    y_test = y.iloc[num:, :]

    return x_train, x_test, y_train, y_test

# 添加列索引
names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'cla']
data = pd.read_csv('./data/鸢尾花数据分类/iris.data', names=names)
# 把类别变为0 1 2
data['cla'] = data['cla'].apply(change_data)

# 切割数据集
x_train, x_test, y_train, y_test = split_data(data, 0.7)

# 创建逻辑回归
lr = LogisticRegression()

# todo KNN
knn = KNeighborsClassifier(n_neighbors=3)

# 给逻辑回归参数
result = lr.fit(x_train, y_train)
result_knn = knn.fit(x_train, y_train)


# 准确率
print(result.score(x_train, y_train))
print('逻辑回归', result.score(x_test, y_test))
print('knn', result_knn.score(x_train, y_train))
print(result_knn.score(x_test, y_test))


# todo 数据格式化
# 创建画布
plt.figure()
x_len = range(len(x_test))
# 真实值
plt.plot(x_len, y_test, 'ro', markersize=8, zorder=3, label='真实值')
# 预测值
plt.plot(x_len, lr.predict(x_test), 'go', markersize=14, zorder=2, label='预测值')
plt.ylim(-1, 3)
# 显示label
plt.legend(loc='upper left')
plt.show()

# todo ROC曲线  三分类要把两个都拉直才行
y_ = label_binarize(y_test, classes=(0, 1, 2))
# 逻辑回归的损失
y_score = lr.decision_function(x_test)
# knn的损失
y_score_knn = knn.predict_proba(x_test)

fpr, tpr, th = metrics.roc_curve(y_.reshape(-1), y_score.reshape(-1))
fpr_knn, tpr_knn, th_knn = metrics.roc_curve(y_.reshape(-1), y_score_knn.reshape(-1))
# 计算面积
auc = metrics.auc(fpr, tpr)
print('面积{}'.format(auc))
auc_knn = metrics.auc(fpr, tpr)
print('knn面积{}'.format(auc_knn))

# 创建画布
plt.figure()
# lw：画笔宽度
plt.plot(fpr, tpr, c='r', lw=2)
plt.plot(fpr_knn, tpr_knn, c='b', lw=1)
# 绘制横坐标与纵坐标
plt.xlim(-0.01, 1.01)
plt.ylim(-0.01, 1.01)
# 坐标的间隔
plt.yticks(np.arange(0, 1.1, 0.1))
plt.xticks(np.arange(0, 1.1, 0.1))
# 绘制 y = x的图
plt.plot((0, 1), (0, 1), c='#a0a0a0', ls='--')
# 绘制格子
plt.grid(ls=':')

plt.show()