鸢尾花python经验分享_Python 机器学习-鸢尾花分类

本文通过Python进行鸢尾花数据集的分析,涵盖了数据导入、描述统计、可视化以及多种机器学习算法(如逻辑回归、决策树、KNN等)的训练和评估。最终使用SVM模型进行验证,得到了预测准确性。
摘要由CSDN通过智能技术生成

[code]'''

#Python 机器学习-鸢尾花分类

'''

#导入类库

from pandas import read_csv

from pandas.plotting import scatter_matrix

from matplotlib import pyplot

from sklearn.model_selection import train_test_split

from sklearn.model_selection import KFold

from sklearn.model_selection import cross_val_score

from sklearn.metrics import classification_report

from sklearn.metrics import confusion_matrix

from sklearn.metrics import accuracy_score

from sklearn.linear_model import LogisticRegression

from sklearn.tree import DecisionTreeClassifier

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

from sklearn.neighbors import KNeighborsClassifier

from sklearn.naive_bayes import GaussianNB

from sklearn.svm import SVC

#导入数据

filename = 'iris.data.csv'

names = ['separ-length','separ-width','petal-length','petal-width','class']

dataset = read_csv(filename,names=names)

#查看数据纬度

print('数据纬度:行%s,列%s'%dataset.shape)

#查看数据前十行

print(dataset.head(10))

#统计描述数据

print(dataset.describe())

#数据分类分布

print(dataset.groupby('class').size())

#箱线图

dataset.plot(kind='box',subplots=True,layout=(2,2),sharex=False,sharey=False)

pyplot.show()

#直方图

dataset.hist()

pyplot.show()

#散点矩阵图

scatter_matrix(dataset)

pyplot.show()

#分离评估数据集

array=dataset.values

X=array[:,0:4]

Y=array[:,4]

validation_size=0.2

seed=7

X_train,X_validation,Y_train,Y_validation=\

train_test_split(X,Y,test_size=validation_size,

random_state=seed)

#算法审查

models={}

models['LR']=LogisticRegression()

models['LDA']=LinearDiscriminantAnalysis()

models['KNN']=KNeighborsClassifier()

models['CART']=DecisionTreeClassifier()

models['NB']=GaussianNB()

models['SVM']=SVC()

results=[]

for key in models:

kfold=KFold(n_splits=10,random_state=seed)

cv_results=cross_val_score(models[key],X_train,

Y_train,cv=kfold,scoring='accuracy')

results.append(cv_results)

print('%s:%f(%f)'%(key,cv_results.mean(),cv_results.std()))

#箱线图比较算法

fig = pyplot.figure()

fig.suptitle('Algorithm Comparison')

ax = fig.add_subplot(111)

pyplot.boxplot(results)

ax.set_xticklabels(models.keys())

pyplot.show()

#使用评估数据集评估算法

svm = SVC()

svm.fit(X=X_train,y=Y_train)

predictions = svm.predict(X_validation)

print(accuracy_score(Y_validation,predictions))

print(confusion_matrix(Y_validation,predictions))

print(classification_report(Y_validation,predictions))

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值