题目需求:
我们通过使用鸢尾花数据集,用散点图的方式研究两个特征(鸢尾花的长度x轴和宽度y轴)对目标值(分类类别)的相互关系。
第一部分:导包
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
import pandas as pd
第二部分:加载数据集
# 加载鸢尾花数据集
iris = datasets.load_iris()
# 创建 DataFrame
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_df['species'] = iris.target
species_map = {0: 'Iris-setosa', 1: 'Iris-versicolor', 2: 'Iris-virginica'}
iris_df['species'] = iris_df['species'].map(species_map)
第三部分:绘制散点图
# 设置绘图风格
sns.set(style="whitegrid")
# 绘制散点图
plt.figure(figsize=(8,6))
sns.scatterplot(x=iris_df['sepal length (cm)'],
y=iris_df['sepal width (cm)'],
hue=iris_df['species'],
palette=['orange', 'green', 'blue'],
s=100) # 点的大小
# 添加图标题和标签
plt.title('Sepal Width vs. Sepal Length')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Sepal Width (cm)')
# 显示图像
plt.show()
第四部分:代码汇总
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
import pandas as pd
# 加载鸢尾花数据集
iris = datasets.load_iris()
# 创建 DataFrame
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_df['species'] = iris.target
species_map = {0: 'Iris-setosa', 1: 'Iris-versicolor', 2: 'Iris-virginica'}
iris_df['species'] = iris_df['species'].map(species_map)
# 设置绘图风格
sns.set(style="whitegrid")
# 绘制散点图
plt.figure(figsize=(8,6))
sns.scatterplot(x=iris_df['sepal length (cm)'],
y=iris_df['sepal width (cm)'],
hue=iris_df['species'],
palette=['orange', 'green', 'blue'],
s=100) # 点的大小
# 添加图标题和标签
plt.title('Sepal Width vs. Sepal Length')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Sepal Width (cm)')
# 显示图像
plt.show()
第五部分:效果展示
第六部分:三维特征空间
根据输出得到的结果,我发现'Iris-versicolor'和'Iris-virginica'无法区分,要将 'Iris-versicolor' 和 'Iris-virginica' 更好地区分开来,可以通过引入第三个特征,将二维散点图扩展到三维空间,这样能够更清楚地展示这些数据点之间的差异。我们可以使用第三个特征,例如“花瓣长度(petal length)”或“花瓣宽度(petal width)”,来增强区分度。
(1)使用Sepal Length+Sepal Width+Petal Length这三个特征
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
from sklearn import datasets
import pandas as pd
# 加载鸢尾花数据集
iris = datasets.load_iris()
# 创建 DataFrame
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_df['species'] = iris.target
species_map = {0: 'Iris-setosa', 1: 'Iris-versicolor', 2: 'Iris-virginica'}
iris_df['species'] = iris_df['species'].map(species_map)
# 创建三维绘图
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
# 使用花萼长度、花萼宽度和花瓣长度作为三个维度
ax.scatter(iris_df['sepal length (cm)'],
iris_df['sepal width (cm)'],
iris_df['petal length (cm)'],
c=iris_df['species'].apply(lambda x: {'Iris-setosa': 'orange', 'Iris-versicolor': 'green', 'Iris-virginica': 'blue'}[x]),
s=100)
# 添加轴标签和标题
ax.set_xlabel('Sepal Length (cm)')
ax.set_ylabel('Sepal Width (cm)')
ax.set_zlabel('Petal Length (cm)')
ax.set_title('3D Scatter Plot of Iris Dataset')
# 显示图像
plt.show()
(2)使用平面分割
生成的平面是基于每个类别(Setosa、Versicolor 和 Virginica)的平均花瓣长度mean
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
import numpy as np
from sklearn import datasets
# Load iris dataset
iris = datasets.load_iris()
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_df['species'] = iris.target
species_map = {0: 'Iris-setosa', 1: 'Iris-versicolor', 2: 'Iris-virginica'}
iris_df['species'] = iris_df['species'].map(species_map)
# Create 3D plot
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
# Colors for species
colors = {'Iris-setosa': 'blue', 'Iris-versicolor': 'green', 'Iris-virginica': 'orange'}
# Scatter plot with color coding
ax.scatter(iris_df['sepal length (cm)'],
iris_df['sepal width (cm)'],
iris_df['petal length (cm)'],
c=iris_df['species'].apply(lambda x: colors[x]),
s=100)
x = np.linspace(iris_df['sepal length (cm)'].min(), iris_df['sepal length (cm)'].max(), 10)
y = np.linspace(iris_df['sepal width (cm)'].min(), iris_df['sepal width (cm)'].max(), 10)
X, Y = np.meshgrid(x, y)
# Setosa 平面
# 生成的平面是基于每个类别(Setosa、Versicolor 和 Virginica)的平均花瓣长度mean
Z_setosa = np.ones_like(X) * iris_df[iris_df['species'] == 'Iris-setosa']['petal length (cm)'].mean()
ax.plot_surface(X, Y, Z_setosa, color='blue', alpha=0.3)
# Versicolor 平面
Z_versicolor = np.ones_like(X) * iris_df[iris_df['species'] == 'Iris-versicolor']['petal length (cm)'].mean()
ax.plot_surface(X, Y, Z_versicolor, color='green', alpha=0.3)
# Virginica 平面
Z_virginica = np.ones_like(X) * iris_df[iris_df['species'] == 'Iris-virginica']['petal length (cm)'].mean()
ax.plot_surface(X, Y, Z_virginica, color='orange', alpha=0.3)
# Add axis labels
ax.set_xlabel('Sepal Length (cm)')
ax.set_ylabel('Sepal Width (cm)')
ax.set_zlabel('Petal Length (cm)')
ax.set_title('3D Scatter Plot with Species Planes')
# Show plot
plt.show()
好啦,希望能够帮助到大家!