import matplotlib.pyplot as plt
import sklearn.datasets as sd
import pandas as pd
iris = sd.load_iris()
print(iris.keys())
##整理数据为df
data = pd.DataFrame(iris.data,columns=iris.feature_names)
data
iris.target
data['target'] = iris.target
#萼片的可视化
plt.scatter(data['sepal length (cm)'],
data['sepal width (cm)'],
c=data['target'],
cmap='brg')
plt.colorbar()
#花瓣可视化
plt.scatter(data['petal length (cm)'],
data['petal length (cm)'],
c=data['target'],
cmap='brg')
plt.colorbar()
#基于逻辑回归,二元分类
sub_data = data.tail(100)
import sklearn.linear_model as lm
import sklearn.model_selection as ms
import sklearn.metrics as sm
# 划分输入数据,类别标签
x = sub_data.iloc[:,:-1]
y = sub_data.iloc[:,-1]
#划分训练集和测试集
train_x,\
test_x,\
train_y,\
test_y = ms.train_test_split(x,y,
test_size=0.1,
random_state=7)
#构建逻辑回归模型
model = lm.LogisticRegression(solver='liblinear')
# 模型训练
model.fit(train_x,train_y)
#模型的预测
pred_test_y = model.predict(test_x)
#模型的评估
print('真实值',test_y.values)
print('预测值',pred_test_y)
print('准确率',(test_y==pred_test_y).sum()/test_y.size)
#多元分类
x = data.iloc[:,:-1]
y = data.iloc[:,-1]
train_x,\
test_x,\
train_y,\
test_y = ms.train_test_split(x,y,
test_size=0.1,
random_state=7,
stratify=y)
model = lm.LogisticRegression(solver='liblinear')
#做5次交叉验证
scores =ms.cross_val_predict(model,
x,
y,
cv=5)
# scoring='f1_weighted')
print('交叉验证均值得分:',scores.mean())
model.fit(train_x,train_y)
pred_test_y = model.predict(test_x)
print((test_y==pred_test_y).sum()/test_y.size)
print('真实值',test_y.values)
print('预测值',pred_test_y)
#p评估指标
import sklearn.metrics as sm
print('三个类别的查准率均值',sm.precision_score(test_y,pred_test_y,average='macro'))
print('三个类别的召回率',sm.recall_score(test_y,pred_test_y,average='macro'))
print('三个类别的f1得分均值:',sm.f1_score(test_y,pred_test_y,average='macro'))
#精度
print('精度:',sm.accuracy_score(test_y,pred_test_y))
print('混淆矩阵\n',sm.confusion_matrix(test_y,pred_test_y))
print('分类报告',sm.classification_report(test_y,pred_test_y))
#决策树分类
import sklearn.tree as st
model = st.DecisionTreeClassifier(max_depth=4,
min_samples_split=5)
#做五次交叉验证
scores = ms.cross_val_score(model,x,y,
cv=5,
scoring='f1_weighted')
model.fit(train_x,train_y)
pred_test_y = model.predict(test_x)
print(sm.classification_report(test_y,pred_test_y))
iris_逻辑回归二元分类_决策树
最新推荐文章于 2024-02-01 16:28:25 发布