iris_逻辑回归二元分类_决策树

import matplotlib.pyplot as plt
import sklearn.datasets as sd
import pandas as pd

iris = sd.load_iris()
print(iris.keys())


##整理数据为df
data = pd.DataFrame(iris.data,columns=iris.feature_names)

data
iris.target

data['target'] = iris.target

#萼片的可视化
plt.scatter(data['sepal length (cm)'],
           data['sepal width (cm)'],
           c=data['target'],
           cmap='brg')
plt.colorbar()

#花瓣可视化

plt.scatter(data['petal length (cm)'],
           data['petal length (cm)'],
           c=data['target'],
           cmap='brg')
plt.colorbar()

#基于逻辑回归,二元分类
sub_data = data.tail(100)
import sklearn.linear_model as lm
import sklearn.model_selection as ms
import sklearn.metrics as sm


# 划分输入数据,类别标签
x = sub_data.iloc[:,:-1]
y = sub_data.iloc[:,-1]
#划分训练集和测试集
train_x,\
test_x,\
train_y,\
test_y = ms.train_test_split(x,y,
                             test_size=0.1,
                            random_state=7)


#构建逻辑回归模型
model = lm.LogisticRegression(solver='liblinear')

# 模型训练
model.fit(train_x,train_y)

#模型的预测
pred_test_y = model.predict(test_x)

#模型的评估
print('真实值',test_y.values)
print('预测值',pred_test_y)
print('准确率',(test_y==pred_test_y).sum()/test_y.size)

#多元分类
x = data.iloc[:,:-1]
y = data.iloc[:,-1]

train_x,\
test_x,\
train_y,\
test_y = ms.train_test_split(x,y,
                            test_size=0.1,
                            random_state=7,
                            stratify=y)

model = lm.LogisticRegression(solver='liblinear')

#做5次交叉验证
scores =ms.cross_val_predict(model,
                            x,
                            y,
                            cv=5)
#                             scoring='f1_weighted')
print('交叉验证均值得分:',scores.mean())

model.fit(train_x,train_y)

pred_test_y = model.predict(test_x)

print((test_y==pred_test_y).sum()/test_y.size)


print('真实值',test_y.values)
print('预测值',pred_test_y)


#p评估指标
import sklearn.metrics as sm
print('三个类别的查准率均值',sm.precision_score(test_y,pred_test_y,average='macro'))
print('三个类别的召回率',sm.recall_score(test_y,pred_test_y,average='macro'))
print('三个类别的f1得分均值:',sm.f1_score(test_y,pred_test_y,average='macro'))
#精度
print('精度:',sm.accuracy_score(test_y,pred_test_y))

print('混淆矩阵\n',sm.confusion_matrix(test_y,pred_test_y))


print('分类报告',sm.classification_report(test_y,pred_test_y))


#决策树分类
import sklearn.tree as st

model = st.DecisionTreeClassifier(max_depth=4,
                                  min_samples_split=5)
#做五次交叉验证
scores = ms.cross_val_score(model,x,y,
                           cv=5,
                           scoring='f1_weighted')
model.fit(train_x,train_y)
pred_test_y = model.predict(test_x)
print(sm.classification_report(test_y,pred_test_y))
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值