数据可视化

1. Iris数据集每个维度画盒图

import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import numpy as np
import pandas as pd


def dreaw(filename):
    '''
        数据盒图
        :param filename: 数据集的相对地址
        :return:
    '''
    df = pd.read_csv(filename)  # 读文件返回DataFrame对象
    df = pd.DataFrame(df).drop(labels=['class'], axis=1)
    data_arr = np.array(df)
    font_set = FontProperties(fname=r"venv/Lib/site-packages/matplotlib/mpl-data/fonts/ttf/方正仿宋.TTF", size=30)  # 中文字体
    arr=[]
    data=[ "sepal length in cm","sepal width in cm","petal length in cm","petal width in cm"]
    colr=['#9999ff','#ef476f', '#ffd166', '#118AD5']
    for i in range(len(data_arr.T)):
        num = data_arr[:, i]
        arr.append(num)
    data1=dict(zip(data,arr))
    data1=pd.DataFrame(data1)
    plt.figure(figsize=(10,8))
    plt.grid()
    f=plt.boxplot(data1,labels=data,
                patch_artist=True, # 要求用自定义颜色填充盒形图,默认白色填充
                showmeans=True, # 以点的形式显示均值
                #boxprops = {'color':'black','facecolor':'#9999ff'}, # 设置箱体属性,填充色和边框色
                flierprops = {'marker':'o','markerfacecolor':'red','color':'black'}, # 设置异常值属性,点的形状、填充色和边框色
                meanprops = {'marker':'D','markerfacecolor':'indianred'}, # 设置均值点的属性,点的形状、填充色
                medianprops = {'linestyle':'--','color':'orange'}) # 设置中位数线的属性,线的类型和颜色)
    for box, colr in zip(f['boxes'], colr):  # 对箱线图设置颜色
        box.set(color='black', linewidth=2)
        box.set(facecolor=colr)
    plt.xticks(rotation=20)
    plt.suptitle("Iris数据集盒图",fontproperties=font_set,fontweight='bold')
    plt.savefig("Iris数据集盒图")
    plt.show()


dreaw('iris.csv')

运行图示:

代码分析:将data与arr两个数组构建成字典(data:arr),然后将字典转换为DataFrame类型,然后利用 matplotlib 库中的 boxplot 函数进行画图

2. risi数据集找分位数画直方图

import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import numpy as np
import pandas as pd
import math
from pylab import mpl


def dreaw(filename):
    '''
         分位数直方图
        :param filename: 数据集的相对地址
        :return: 
    '''
    df = pd.read_csv(filename)  # 读文件返回DataFrame对象
    df = pd.DataFrame(df).drop(labels=['class'], axis=1)
    data_arr = np.array(df)
    mpl.rcParams['font.sans-serif'] = ['SimHei']
    font_set = FontProperties(size=20)
    font_set1 = FontProperties(size=15)  # 中文字体
    p = [1, 3, 7, 9]
    hear = ["sepal length in cm", "sepal width in cm", "petal length in cm", "petal width in cm"]
    ax = plt.figure(figsize=(25, 5), dpi=100)
    for i in range(len(data_arr.T)):
        num = data_arr[:, i]
        num3 = sorted(num)
        n = len(num3)
        n1 = int(math.ceil(n * 0.25))
        n2 = int(math.ceil(n * 0.75))
        b = np.split(num3, [n1, n2])
        a = [i + 1 for i in range(len(num3))]
        a = np.split(a, [n1, n2])
        # plt.text(0, 2, '25%分位数{},75%分位数{}'.format(num3[n1], num3[n2]), fontsize=15, bbox=dict(fc='yellow'))
        ax.add_subplot(1, 4, i + 1)
        xpoint = np.array([n1, n1])
        ypoint = np.array([0, num.max()])
        plt.plot(xpoint, ypoint, color='r')
        xpoint = np.array([n2, n2])
        ypoint = np.array([0, num.max()])
        plt.plot(xpoint, ypoint, color='b')
        for j in range(len(b)):
            x = np.array(a[j])
            y = np.array(b[j])
            # plt.grid()
            plt.bar(x, y)
        plt.xlabel('株数', fontproperties=font_set)
        plt.ylabel('量化', fontproperties=font_set)
        plt.title('{}(25%分位数{},75%分位数{})'.format(hear[i], n1, n2),         fontproperties=font_set1)
        plt.legend(labels=['25%分位数', '75%分位数', 'x<25%', 'x>25%&&x<75%', 'x<75%'], loc='upper center')
    plt.savefig("Iris数据集分位数直方图")
    plt.show()


dreaw('iris.csv')
运行图示:

3. iris数据每一维属性做一个饼图

import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import numpy as np
import pandas as pd

def dreawCsv(filename):
    '''
        数据饼图
        :param filename: 数据集的相对地址
        :return:
    '''
    df = pd.read_csv(filename)  # 读文件返回DataFrame对象
    df = pd.DataFrame(df).drop(labels=['class'],axis=1)
    data_arr = np.array(df)
    font_set = FontProperties(fname=r"venv/Lib/site-packages/matplotlib/mpl-data/fonts/ttf/方正仿宋.TTF", size=100)#中文字体
    for i in range(len(data_arr.T)):
        num = data_arr[:, i]
        num1=pd.value_counts(num)
        num2=sorted(set(num),reverse=False)
        plt.figure(figsize=(36,36))
        num1=sorted(num1,reverse=False)
        patches,l_texl,p_text=plt.pie(num1,labels=num2,radius=1,autopct='%.2f%%')
        for t in l_texl:#图对的字
            t.set_size(60)
        for t in p_text:#图内的字
            t.set_size(35)
        plt.axis('equal')
        plt.legend()
        plt.suptitle("第{}列数据饼图".format(i+1),fontproperties=font_set,fontweight='bold')
        plt.savefig('Iris数据集第{}列数据饼图'.format(i+1))
        plt.show()


dreawCsv('iris.csv')

运行图示:

4.Falme数据集画散点图

import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import numpy as np
import pandas as pd


def dreawCsv(filename):
    '''
            数据散点图
            :param filename: 数据集的相对地址
            :return:
        '''
    df = pd.read_csv(filename)  # 读文件返回DataFrame对象
    df = pd.DataFrame(df)
    data_arr = np.array(df)
    font_set = FontProperties(fname=r"venv/Lib/site-packages/matplotlib/mpl-data/fonts/ttf/方正仿宋.TTF", size=100)  # 中文字体
    for i in range(len(data_arr.T)):
        num = data_arr[:, i]
        plt.figure(figsize=(25, 15), dpi=100)
        plt.grid(b=True, color='y', linestyle='--', linewidth=2)
        # x=range(len(num))
        y = num
        x = range(data_arr.T.shape[1])
        # y=num
        plt.scatter(x, y, c=num, cmap='brg')
        plt.colorbar()
        plt.suptitle("第{}列数据饼散点图".format(i + 1), fontproperties=font_set, fontweight='bold')
        plt.savefig('第{}列数据散点图'.format(i + 1))
        plt.show()


dreawCsv('Flame.csv')

运行图示:

  • 12
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 4
    评论
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Ashpyxia

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值