# 运行以下代码# sort the values from the top to the least value and slice the first 5 itemsimport pandas as pd
import matplotlib.pyplot as plt
import numpy as np
path7 ='./data.csv'# train.csv
titanic = pd.read_csv(path7)print(titanic.describe())#输出平均值,方差之类的基本统计信息
df = titanic['data'].sort_values(ascending =False)# create bins interval using numpy
binsVal = np.arange(0,100000,4000)#边界自己设置,从0~100000,以4000为等差,进行增加# create the plot
plt.hist(df, bins = binsVal)# Set the title and labels
plt.xlabel('data')#设置x标签的名字
plt.ylabel('Frequency')#设置y标签的名字
plt.title('Fare Payed Histrogram')# show the plot
plt.show()
正态分布直方图
# -*- coding:utf-8 -*-import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import os
x =[]
csvFile1 =open('G:/Test/5-25/data.csv','r',newline='')with csvFile1 as f:for line in f.readlines():
x.append(int(line.split(',')[0]))
mu = np.mean(x)
sigma = np.std(x)# x = mu + sigma * np.random.randn(10000)# 在均值周围产生符合正态分布的x值
num_bins =50#50个柱形
n, bins, patches = plt.hist(x, num_bins, normed=1, facecolor='green', alpha=0.5)# 直方图函数: x为x轴的值,normed=1表示为概率密度,即和为一,绿色方块,色深参数0.5.# 返回值为n个概率,直方块左边线的x值,及各个方块对象
y = mlab.normpdf(bins, mu, sigma)# 画一条逼近的曲线,y只负责绘制一条高斯分布的曲线
plt.plot(bins, y,'r--')
plt.xlabel('Smarts')
plt.ylabel('Probability')
plt.title(r'Histogram of IQ: $\mu=100$ $\sigma=15$')# 中文标题 u'xxx'
plt.subplots_adjust(left=0.15)# 左边距
plt.show()
饼图
# 运行以下代码# sort the values from the top to the least value and slice the first 5 itemsimport pandas as pd
import matplotlib.pyplot as plt
import numpy as np
path7 ='./data.csv'# train.csv
titanic = pd.read_csv(path7)## 运行以下代码# sum the instances of males and females
class1 =(titanic['class']==1).sum()
class2 =(titanic['class']==2).sum()
class3 =(titanic['class']==3).sum()
class4 =(titanic['class']==4).sum()# put them into a list called proportions
proportions =[class1,class2,class3,class4]# Create a pie chart
plt.pie(# using proportions
proportions,# with the labels being officer names
labels =['class1','class2','class3','class4'],#标签# with no shadows
shadow =False,# with colors
colors =['blue','red','green','yellow'],#标签颜色# with one slide exploded out
explode =(0.15,0,0,0),#离心距离,在此只有蓝色,也就是第一个离心了# with the start angle at 90%
startangle =90,# with the percent listed as a fraction
autopct ='%1.1f%%'#保留小数点之后1位)# View the plot drop above
plt.axis('equal')# Set labels
plt.title("Sex Proportion")# View the plot
plt.tight_layout()
plt.show()