毕设 整理 相关资源及代码 2021-6-21

网易 乳腺癌 600 元课程

学堂云 阉割版 乳腺癌课程

python 非官方扩展包_欧文大学
python 非官方扩展包 清华镜像
python 非官方扩展包 华中科技大学镜像

PyQt5 教程 不错
CART 剪枝
CART剪枝 知乎
非常不错的代码实现的CART全过程
威斯康辛数据视频
决策树原理视频 b站
C4.5 原理代码 知乎
ID3 算法(DNA(MATLAB详细讲解代码))
基尼系数 手写笔记

.py文件转换.exe文件
pyqt5 视频不错
网易云 购课视频
30个特征的含义
数据处理 热点图绘制
重点 决策树算法 鸢尾花数据集实战篇
随机森林 代码

对数据的预处理表格之类的

在这里插入图片描述在这里插入图片描述

'''
pycharm 代码:最终实现PyQt5前端界面,随机森林实现,但对CART 算法没有进行可视化,在Spyder代码里实现了可视化
注:数据集需要放在桌面上
'''

import PyQt5
from PySide2.QtWidgets import QApplication, QMainWindow, QPushButton,  QPlainTextEdit,QMessageBox

import csv, pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn import preprocessing  # 预处理
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

data = pd.read_csv(r'C:\Users\Administrator\Desktop\原始数据cancer.csv')
    # 数据清洗
data.drop('id', axis=1, inplace=True)  # “id”没有实际意思,可以去掉
data['diagnosis'] = data['diagnosis'].map({'M': 1, 'B': 0})  # “diagnosis”字段的取值即分类结果为B或M,可以用0和1来替代
featurs_mean = list(data.columns[1:11])  # 后面30个字段可以分成3组
featurs_se = list(data.columns[12:21])
featurs_worst = list(data.columns[22:31])

    # 进行特征选择
features_remain = ['radius_mean', 'texture_mean', 'smoothness_mean', 'compactness_mean', 'symmetry_mean',
                       'fractal_dimension_mean']


    # 准备训练集和测试集
train, test = train_test_split(data, test_size=0.3)  # 抽取30%的数据作为测试集,其余作为训练集
train_X = train[features_remain]  # 抽取特征选择的数值作为训练和测试数据
y_train = train['diagnosis']
test_X = test[features_remain]
y_test = test['diagnosis']
    # 数据标准化
# X_train_scaled = preprocessing.scale(train_X)  # 预处理训练集
# x_test_scaled = preprocessing.scale(test_X)  # 预处理测试集
## 采用Z-Score标准化
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()      #采用Z-Score标准化,保证每个特征维度的数据均值为0,方差为1
X_train_scaled = ss.fit_transform(train_X)# 预处理训练集
x_test_scaled = ss.transform(test_X)

                         # 随机森林
# rfc = RandomForestClassifier(random_state=0) # 随机森林实例化
# rfc = rfc.fit(X_train_scaled, y_train)
# score_Random_Forest = rfc.score(x_test_scaled, y_test)
# print(score_Random_Forest)

#自动调参,将深度可选择的范围定位1至6
list_average_accuracy = []
depth = range(1, 6)
for i in depth:
    # max_depth=4限制决策树深度可以降低算法复杂度,获取更精确值
    '''
    DecisionTreeClassifier 类的构造函数:分类树的实现。(不是回归树的实现:sklearn.tree.DecisionTreeRegressor 类
    参数:def __init__(self, *,criterion="gini",splitter="best",max_depth=None,
    min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.,
    max_features=None,random_state=None, max_leaf_nodes=None,min_impurity_decrease=0.,
    min_impurity_split=None, class_weight=None,    ccp_alpha=0.0):
    '''
    tree = DecisionTreeClassifier(max_depth=i, random_state=0)# 默认是CART算法(基尼系数)构造决策树
    tree.fit(X_train_scaled, y_train)
    accuracy_training = tree.score(X_train_scaled, y_train)
    accuracy_test = tree.score(x_test_scaled, y_test)
    average_accuracy = (accuracy_training + accuracy_test) / 2.0
        # print("average_accuracy:",average_accuracy)
    list_average_accuracy.append(average_accuracy)

max_value = max(list_average_accuracy)
    # 索引是0开头,结果要加1
best_depth = list_average_accuracy.index(max_value) + 1
# print("best_depth:", best_depth)
    # 决策树
best_tree = DecisionTreeClassifier(max_depth=best_depth, random_state=0)
CART = best_tree.fit(X_train_scaled, y_train)
# CART = best_tree
accuracy_training = best_tree.score(X_train_scaled, y_train)  # score函数用于评分
accuracy_test = best_tree.score(x_test_scaled, y_test)

    #print("decision tree:")
#print("训练子集上的准确性:{:.3f}".format(best_tree.score(X_train_scaled, y_train)))
# print("测试子集上的准确性:{:.3f}".format(best_tree.score(x_test_scaled, y_test)))
print("分类模型在测试子集上的准确性:{:.3f}".format(accuracy_test))

def handleCalc():
    info = textEdit.toPlainText()
    #info=info.reshape(1,-1)
    for line in info.splitlines():
        if not line.strip():
            continue
        parts = line.split(' ')
        parts = [p for p in parts if p]
    parts_scaled = preprocessing.scale(parts)  # 预处理测试集
    #print(parts_scaled)
    parts_scaled=parts_scaled.reshape(1,-1)
    CART_predict1 = CART.predict(parts_scaled)
    # print(CART_predict1)
    def turn():
        if CART_predict1 == 0:
            print("良性")
        else:
            print("恶性")
    # CART_predict1['diagnosis'] = CART_predict1['diagnosis'].map({1:'恶性','B':'良性'})
    # data['diagnosis'] = data['diagnosis'].map({'M': 1, 'B': 0})

    QMessageBox.about(window,
                      '预测',
                      f'''良性或恶性:\n{turn()}
                       '''
                      )

app = QApplication([])

window = QMainWindow()
window.resize(500, 400)
window.move(300, 300)
window.setWindowTitle('癌症预测')

textEdit = QPlainTextEdit(window)
textEdit.setPlaceholderText("请输入癌细胞核6个值")
textEdit.move(10,25)
textEdit.resize(300,350)

button = QPushButton('预测', window)
button.move(380,80)
button.clicked.connect(handleCalc)

window.show()

app.exec_()


在这里插入图片描述在这里插入图片描述pycharm需要导入的包是:
在这里插入图片描述在这里插入图片描述在这里插入图片描述在这里插入图片描述

'''
spyder 代码:实现对CARTM算法进行可视化,主要进行数据分析,绘制决策树功能
'''
# graphviz函数绘制决策树图形,polt函数绘制因子比例
from sklearn.tree import export_graphviz 
import csv,pandas as pd
import numpy as np

from IPython.display import Image
#from sklearn.tree import export_graphviz
from sklearn.tree import DecisionTreeClassifier
#from sklearn.tree import  RandomForestClassifier

# scale 数据标准化预处理,用 决策树 分类器进行分类,但特征值还是30个
from sklearn import preprocessing # 预处理
from sklearn.svm import SVC
#from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
#cancer=load_breast_cancer()
#data=cancer.data
data = pd.read_csv(r'C:\Users\Administrator\Desktop\原始数据cancer.csv')
data.info() # 查看数据基本情况,没有缺失值
data.columns # 查看列名
# 数据清洗
data.drop('id',axis = 1,inplace=True)#“id”没有实际意思,可以去掉
data['diagnosis'] = data['diagnosis'].map({'M':1,'B':0})#“diagnosis”字段的取值即分类结果为B或M,可以用0和1来替代
featurs_mean = list(data.columns[1:11])#后面30个字段可以分成3组
featurs_se = list(data.columns[12:21])
featurs_worst = list(data.columns[22:31])
# 特征字段的筛选
import seaborn as sns #看整体良性、恶性肿瘤的诊断情况
import matplotlib.pyplot as plt
%matplotlib inline
sns.countplot(data['diagnosis'],label = 'Count')

#corr = data[featurs_mean].corr()#观察下featurs_mean各变量之间的关系
corr = data[featurs_mean].corr()
plt.figure(figsize=(14,14))
sns.heatmap(corr,annot=True)     #annot = True 显示每个方格的数据
#进行特征选择
features_remain = ['radius_mean','texture_mean', 'smoothness_mean','compactness_mean','symmetry_mean', 'fractal_dimension_mean'] 
# 准备训练集和测试集
train,test = train_test_split(data,test_size = 0.3)  #抽取30%的数据作为测试集,其余作为训练集
train_X = train[features_remain]   #抽取特征选择的数值作为训练和测试数据
y_train = train['diagnosis']
test_X = test[features_remain]
y_test = test['diagnosis']
#数据标准化

from sklearn.preprocessing import StandardScaler
ss = StandardScaler()      #采用Z-Score标准化,保证每个特征维度的数据均值为0,方差为1
X_train_scaled = ss.fit_transform(train_X)# 预处理训练集
x_test_scaled = ss.transform(test_X)


#自动调参,将深度可选择的范围定为1至6,限制树的最大深度,超过设定深度的树枝全部剪掉
list_average_accuracy=[]
depth=range(1,6)
for i in depth:
    #max_depth=4限制决策树深度可以降低算法复杂度,获取更精确值
    tree= DecisionTreeClassifier(max_depth=i,random_state=0)
    tree.fit(X_train_scaled,y_train)
    accuracy_training=tree.score(X_train_scaled,y_train)
    accuracy_test=tree.score(x_test_scaled,y_test)
    average_accuracy=(accuracy_training+accuracy_test)/2.0
    print("average_accuracy:",average_accuracy)
    list_average_accuracy.append(average_accuracy)

     
max_value=max(list_average_accuracy)
#索引是0开头,结果要加1
best_depth=list_average_accuracy.index(max_value)+1
print("best_depth:",best_depth)

# 决策树
best_tree= DecisionTreeClassifier(max_depth=best_depth,random_state=0)
best_tree.fit(X_train_scaled,y_train)
accuracy_training=best_tree.score(X_train_scaled,y_train)# score函数用于评分
accuracy_test=best_tree.score(x_test_scaled,y_test)

 
#print("decision tree:")   
#print("训练子集上的准确性:{:.3f}".format(best_tree.score(X_train_scaled,y_train)))
print("CART模型在测试子集上的准确性:{:.3f}".format(best_tree.score(x_test_scaled,y_test)))

#绘图,显示因子重要性
#n_features=data.info().shape[1]
#plt.barh(range(n_features),best_tree.feature_importances_,align='center')
#plt.yticks(np.arange(n_features),features_remain)#变换y轴的刻度
#plt.title("Decision Tree:")#设置标题
#plt.xlabel('Feature Importance')#设置x轴
#plt.ylabel('Feature')#设置y轴
#plt.show()#显示图像
#生成一个dot文件,再用命令窗口用cmd形式生成图片:类似dot -Tpng cancertree1.dot -o wineTree.png
export_graphviz(best_tree,out_file="cancertree1.dot",class_names=['malignant','benign'],feature_names=features_remain,impurity=False,filled=True)

在这里插入图片描述在这里插入图片描述
在这里插入图片描述

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值