1. Iris数据集每个维度画盒图
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import numpy as np
import pandas as pd
def dreaw(filename):
'''
数据盒图
:param filename: 数据集的相对地址
:return:
'''
df = pd.read_csv(filename) # 读文件返回DataFrame对象
df = pd.DataFrame(df).drop(labels=['class'], axis=1)
data_arr = np.array(df)
font_set = FontProperties(fname=r"venv/Lib/site-packages/matplotlib/mpl-data/fonts/ttf/方正仿宋.TTF", size=30) # 中文字体
arr=[]
data=[ "sepal length in cm","sepal width in cm","petal length in cm","petal width in cm"]
colr=['#9999ff','#ef476f', '#ffd166', '#118AD5']
for i in range(len(data_arr.T)):
num = data_arr[:, i]
arr.append(num)
data1=dict(zip(data,arr))
data1=pd.DataFrame(data1)
plt.figure(figsize=(10,8))
plt.grid()
f=plt.boxplot(data1,labels=data,
patch_artist=True, # 要求用自定义颜色填充盒形图,默认白色填充
showmeans=True, # 以点的形式显示均值
#boxprops = {'color':'black','facecolor':'#9999ff'}, # 设置箱体属性,填充色和边框色
flierprops = {'marker':'o','markerfacecolor':'red','color':'black'}, # 设置异常值属性,点的形状、填充色和边框色
meanprops = {'marker':'D','markerfacecolor':'indianred'}, # 设置均值点的属性,点的形状、填充色
medianprops = {'linestyle':'--','color':'orange'}) # 设置中位数线的属性,线的类型和颜色)
for box, colr in zip(f['boxes'], colr): # 对箱线图设置颜色
box.set(color='black', linewidth=2)
box.set(facecolor=colr)
plt.xticks(rotation=20)
plt.suptitle("Iris数据集盒图",fontproperties=font_set,fontweight='bold')
plt.savefig("Iris数据集盒图")
plt.show()
dreaw('iris.csv')
运行图示:
代码分析:将data与arr两个数组构建成字典(data:arr),然后将字典转换为DataFrame类型,然后利用 matplotlib 库中的 boxplot 函数进行画图
2. risi数据集找分位数画直方图
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import numpy as np
import pandas as pd
import math
from pylab import mpl
def dreaw(filename):
'''
分位数直方图
:param filename: 数据集的相对地址
:return:
'''
df = pd.read_csv(filename) # 读文件返回DataFrame对象
df = pd.DataFrame(df).drop(labels=['class'], axis=1)
data_arr = np.array(df)
mpl.rcParams['font.sans-serif'] = ['SimHei']
font_set = FontProperties(size=20)
font_set1 = FontProperties(size=15) # 中文字体
p = [1, 3, 7, 9]
hear = ["sepal length in cm", "sepal width in cm", "petal length in cm", "petal width in cm"]
ax = plt.figure(figsize=(25, 5), dpi=100)
for i in range(len(data_arr.T)):
num = data_arr[:, i]
num3 = sorted(num)
n = len(num3)
n1 = int(math.ceil(n * 0.25))
n2 = int(math.ceil(n * 0.75))
b = np.split(num3, [n1, n2])
a = [i + 1 for i in range(len(num3))]
a = np.split(a, [n1, n2])
# plt.text(0, 2, '25%分位数{},75%分位数{}'.format(num3[n1], num3[n2]), fontsize=15, bbox=dict(fc='yellow'))
ax.add_subplot(1, 4, i + 1)
xpoint = np.array([n1, n1])
ypoint = np.array([0, num.max()])
plt.plot(xpoint, ypoint, color='r')
xpoint = np.array([n2, n2])
ypoint = np.array([0, num.max()])
plt.plot(xpoint, ypoint, color='b')
for j in range(len(b)):
x = np.array(a[j])
y = np.array(b[j])
# plt.grid()
plt.bar(x, y)
plt.xlabel('株数', fontproperties=font_set)
plt.ylabel('量化', fontproperties=font_set)
plt.title('{}(25%分位数{},75%分位数{})'.format(hear[i], n1, n2), fontproperties=font_set1)
plt.legend(labels=['25%分位数', '75%分位数', 'x<25%', 'x>25%&&x<75%', 'x<75%'], loc='upper center')
plt.savefig("Iris数据集分位数直方图")
plt.show()
dreaw('iris.csv')
运行图示:
3. iris数据每一维属性做一个饼图
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import numpy as np
import pandas as pd
def dreawCsv(filename):
'''
数据饼图
:param filename: 数据集的相对地址
:return:
'''
df = pd.read_csv(filename) # 读文件返回DataFrame对象
df = pd.DataFrame(df).drop(labels=['class'],axis=1)
data_arr = np.array(df)
font_set = FontProperties(fname=r"venv/Lib/site-packages/matplotlib/mpl-data/fonts/ttf/方正仿宋.TTF", size=100)#中文字体
for i in range(len(data_arr.T)):
num = data_arr[:, i]
num1=pd.value_counts(num)
num2=sorted(set(num),reverse=False)
plt.figure(figsize=(36,36))
num1=sorted(num1,reverse=False)
patches,l_texl,p_text=plt.pie(num1,labels=num2,radius=1,autopct='%.2f%%')
for t in l_texl:#图对的字
t.set_size(60)
for t in p_text:#图内的字
t.set_size(35)
plt.axis('equal')
plt.legend()
plt.suptitle("第{}列数据饼图".format(i+1),fontproperties=font_set,fontweight='bold')
plt.savefig('Iris数据集第{}列数据饼图'.format(i+1))
plt.show()
dreawCsv('iris.csv')
运行图示:
4.Falme数据集画散点图
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import numpy as np
import pandas as pd
def dreawCsv(filename):
'''
数据散点图
:param filename: 数据集的相对地址
:return:
'''
df = pd.read_csv(filename) # 读文件返回DataFrame对象
df = pd.DataFrame(df)
data_arr = np.array(df)
font_set = FontProperties(fname=r"venv/Lib/site-packages/matplotlib/mpl-data/fonts/ttf/方正仿宋.TTF", size=100) # 中文字体
for i in range(len(data_arr.T)):
num = data_arr[:, i]
plt.figure(figsize=(25, 15), dpi=100)
plt.grid(b=True, color='y', linestyle='--', linewidth=2)
# x=range(len(num))
y = num
x = range(data_arr.T.shape[1])
# y=num
plt.scatter(x, y, c=num, cmap='brg')
plt.colorbar()
plt.suptitle("第{}列数据饼散点图".format(i + 1), fontproperties=font_set, fontweight='bold')
plt.savefig('第{}列数据散点图'.format(i + 1))
plt.show()
dreawCsv('Flame.csv')
运行图示: