良/恶性乳腺癌肿瘤预测

最新推荐文章于 2022-08-24 17:04:02 发布

山歌嘎子

最新推荐文章于 2022-08-24 17:04:02 发布

阅读量710

点赞数 1

分类专栏：机器学习线性分类器文章标签： python算法线性模型学习 numpy

本文链接：https://blog.csdn.net/u012123197/article/details/72730994

版权

机器学习同时被 2 个专栏收录

13 篇文章 0 订阅

订阅专栏

线性分类器

1 篇文章 0 订阅

订阅专栏

  
  
   
   其中
  
  
  
  
   
   #
   
   导入
   
   numpy
   
   工具包，并且更名为
   
   np
  
  

  
  
   
   import 
   
   numpy 
   
   as 
   
   np
  
  

  
  
   
   #
   
   导入
   
   pandas
   
   工具包，并且更名为
   
   pd
  
  

  
  
   
   import 
   
   pandas 
   
   as 
   
   pd
  
  

  
  
   
   #
   
   导入
   
   matplotlib
   
   工具包中的
   
   pyplot
   
   ，并且更名为
   
   plt
  
  

  
  
   
   import 
   
   matplotlib.pyplot 
   
   as 
   
   plt
  
  

  
  
   
   #
   
   导入
   
   sklearn
   
   工具包中的逻辑回归分类器
  
  

  
  
   
   from 
   
   sklearn.linear_model 
   
   import 
   
   LogisticRegression
  
  

  
  
   
   #
   
   读取训练数据集（在UCI中可以找到）
  
  
  
  
   
   df_train = pd.read_csv(
   
   'breast-cancer-train.csv'
   
   )
  
  

  
  
   
   #
   
   读取测试数据集
  
  
  
  
   
   df_test = pd.read_csv(
   
   'breast-cancer-test.csv'
   
   )
  
  


  
  
   
   '''
   
   该数据总共有
   
   4
   
   列，除了编号之外，
  
  

  
  
   
   包含
   
   Clump Thickness
   
   ，
   
   Cell Size
   
   ，
   
   Type
   
   三个特征
   
   ,
  
  

  
  
   
   其中前两个特征是数值型的，最后一项是布尔型，分别代表肿瘤患者的良性与恶性
  
  

  
  
   
   '''
  
  
  
  
   
   

  
  
  
  
   
   

  
  

  
  
   
   #
   
   选取
   
   'Clump Thickness'
   
   与
   
   'Cell Size'
   
   作为特征
   
   (
   
   属性
   
   )
   
   ，构建测试集中的正负分类（恶性肿瘤，良性肿瘤）样本。
  
  

  
  
   
   #
   
   恶性肿瘤用
   
   0
   
   表示；良性肿瘤用
   
   1
   
   表示。
  
  

  
  
   
   df_test_negative = df_test.loc[df_test[
   
   'Type'
   
   ] == 
   
   0
   
   ][[
   
   'Clump Thickness'
   
   ,
   
   'Cell Size'
   
   ]]
  
  

  
  
   
   df_test_positive = df_test.loc[df_test[
   
   'Type'
   
   ] == 
   
   1
   
   ][[
   
   'Clump Thickness'
   
   ,
   
   'Cell Size'
   
   ]]
  
  



  
  
   
   #
   
   绘制良性肿瘤样本点，标记为红色的
   
   o
  
  

  
  
   
   plt.scatter(df_test_negative[
   
   'Clump Thickness'
   
   ],df_test_negative[
   
   'Cell Size'
   
   ],
   
   marker
   
   =
   
   'o'
   
   ,
   
   s
   
   =
   
   200
   
   ,
   
   c
   
   =
   
   'red'
   
   )
  
  

  
  
   
   #
   
   绘制恶性肿瘤样本点，标记为黑色的
   
   x
  
  

  
  
   
   plt.scatter(df_test_positive[
   
   'Clump Thickness'
   
   ],df_test_positive[
   
   'Cell Size'
   
   ],
   
   marker
   
   =
   
   'x'
   
   ,
   
   s
   
   =
   
   150
   
   ,
   
   c
   
   =
   
   'black'
   
   )
  
  


  
  
   
   #
   
   绘制
   
   x,y
   
   轴说明
  
  

  
  
   
   plt.xlabel(
   
   'Clump Thickness'
   
   )
  
  

  
  
   
   plt.ylabel(
   
   'Cell Size'
   
   )
  
  

  
  
   
   plt.show()
  
  



  
  
   
   #
   
   利用
   
   numpy
   
   中的
   
   random
   
   函数随机采样直线的截距和系数。
  
  

  
  
   
   intercept = np.random.random([
   
   1
   
   ])
  
  

  
  
   
   coef = np.random.random([
   
   2
   
   ])
  
  

  
  
   
   lx = np.arange(
   
   0
   
   ,
   
   12
   
   )
  
  

  
  
   
   ly = (-intercept-lx * coef[
   
   0
   
   ]) / coef[
   
   1
   
   ]
  
  

  
  
   
   #
   
   绘制一条随机直线
  
  

  
  
   
   plt.plot(lx,ly,
   
   c
   
   =
   
   'yellow'
   
   )
  
  



  
  
   
   plt.scatter(df_test_negative[
   
   'Clump Thickness'
   
   ],df_test_negative[
   
   'Cell Size'
   
   ],
   
   marker
   
   =
   
   'o'
   
   ,
   
   s
   
   =
   
   200
   
   ,
   
   c
   
   =
   
   'red'
   
   )
  
  

  
  
   
   plt.scatter(df_test_positive[
   
   'Clump Thickness'
   
   ],df_test_positive[
   
   'Cell Size'
   
   ],
   
   marker
   
   =
   
   'x'
   
   ,
   
   s
   
   =
   
   150
   
   ,
   
   c
   
   =
   
   'black'
   
   )
  
  



  
  
   
   plt.xlabel(
   
   'Clump Thickness'
   
   )
  
  

  
  
   
   plt.ylabel(
   
   'Cell Size'
   
   )
  
  

  
  
   
   plt.show()
  
  



  
  
   
   #
   
   引入
   
   LR
   
   分类器
  
  

  
  
   
   lr = LogisticRegression()
  
  

  
  
   
   #
   
   使用前
   
   10
   
   条训练样本学习直线的系数和截距
  
  

  
  
   
   lr.fit(df_train[[
   
   'Clump Thickness'
   
   ,
   
   'Cell Size'
   
   ]][:
   
   10
   
   ],df_train[
   
   'Type'
   
   ][:
   
   10
   
   ])
  
  

  
  
   
   print
   
   (
   
   "Testing accuracy(10 training samples):"
   
   ,lr.score(df_test[[
   
   'Clump Thickness'
   
   ,
   
   'Cell Size'
   
   ]],df_test[
   
   'Type'
   
   ]))
  
  



  
  
   
   intercept = lr.intercept_
  
  
  
  
   
   coef = lr.coef_[
   
   0
   
   ,:]
  
  

  
  
   
   ly = (-intercept - lx * coef[
   
   0
   
   ]) / coef[
   
   1
   
   ]
  
  



  
  
   
   plt.plot(lx,ly,
   
   c
   
   =
   
   'green'
   
   )
  
  



  
  
   
   plt.scatter(df_test_negative[
   
   'Clump Thickness'
   
   ],df_test_negative[
   
   'Cell Size'
   
   ],
   
   marker
   
   =
   
   'o'
   
   ,
   
   s
   
   =
   
   200
   
   ,
   
   c
   
   =
   
   'red'
   
   )
  
  

  
  
   
   plt.scatter(df_test_positive[
   
   'Clump Thickness'
   
   ],df_test_positive[
   
   'Cell Size'
   
   ],
   
   marker
   
   =
   
   'x'
   
   ,
   
   s
   
   =
   
   150
   
   ,
   
   c
   
   =
   
   'black'
   
   )
  
  



  
  
   
   plt.xlabel(
   
   'Clump Thickness'
   
   )
  
  

  
  
   
   plt.ylabel(
   
   'Cell Size'
   
   )
  
  

  
  
   
   plt.show()
  
  



  
  
   
   #
   
   使用所有训练样本学习直线的系数和截距
  
  

  
  
   
   lr.fit(df_train[[
   
   'Clump Thickness'
   
   ,
   
   'Cell Size'
   
   ]],df_train[
   
   'Type'
   
   ])
  
  

  
  
   
   print
   
   (
   
   "Testing accuracy(all training samples):"
   
   ,lr.score(df_test[[
   
   'Clump Thickness'
   
   ,
   
   'Cell Size'
   
   ]],df_test[
   
   'Type'
   
   ]))
  
  

  
  
   
   intercept=lr.intercept_
  
  

  
  
   
   coef=lr.coef_[
   
   0
   
   ,:]
  
  

  
  
   
   ly=(-intercept -lx*coef[
   
   0
   
   ])/coef[
   
   1
   
   ]
  
  

  
  
   
   plt.plot(lx,ly,
   
   c
   
   =
   
   'blue'
   
   )
  
  



  
  
   
   plt.scatter(df_test_negative[
   
   'Clump Thickness'
   
   ],df_test_negative[
   
   'Cell Size'
   
   ],
   
   marker
   
   =
   
   'o'
   
   ,
   
   s
   
   =
   
   200
   
   ,
   
   c
   
   =
   
   'red'
   
   )
  
  

  
  
   
   plt.scatter(df_test_positive[
   
   'Clump Thickness'
   
   ],df_test_positive[
   
   'Cell Size'
   
   ],
   
   marker
   
   =
   
   'x'
   
   ,
   
   s
   
   =
   
   150
   
   ,
   
   c
   
   =
   
   'black'
   
   )
  
  



  
  
   
   plt.xlabel(
   
   'Clump Thickness'
   
   )
  
  

  
  
   
   plt.ylabel(
   
   'Cell Size'
   
   )
  
  

  
  
   
   plt.show()

山歌嘎子

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
1
评论
良/恶性乳腺癌肿瘤预测

'''Created on 2017年5月25日@author: Administrator'''#导入numpy工具包，并且更名为npimport numpy as np#导入pandas工具包，并且更名为pdimport pandas as pd#导入matplotlib工具包中的pyplot，并且更名为pltimport matplotlib.pyplot as plt#导入sklearn工
复制链接

扫一扫