对于离散数据:
np.random_integers(low, high, size):返回随机的整数,位于闭区间 [low, high]
np.random.randint(low, high, size):返回随机的整数,位于半开区间 [low, high)
a.value_counts():统计a中各个离散值的频数,得到的也是series类型
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pylab import *
mpl.rcParams['font.sans-serif'] = ['SimHei']
a=np.random.random_integers(1,6,size=100)
a=pd.Series(a) # 先构造一个Series
b=a.value_counts()
print(b)
x=list(b.index)
print(x)
y=[]
for i in x:
y.append(b[i])
print(y)
x = np.array(x)
y = np.array(y)
#排序的依据是数值部分:x,定x轴先变y轴
y = y[np.argsort(x)]
x = x[np.argsort(x)]
# name_list1 = name_list1[np.argsort(num_list)]
# num_list = num_list[np.argsort(num_list)]
width=0.5
plt.bar(range(len(y)), y,width,color='c',tick_label=x)
plt.title("100个[1,6]之间随机数的频数分布",fontsize=18)
plt.ylabel('频数',fontsize=14)
plt.ylim(0,30)
plt.show()
结果:a=pd.Series(a)
0 3
1 1
2 3
3 2
4 2
..
95 5
96 5
97 6
98 3
99 1
b=a.value_counts()
1 19
5 18
2 18
4 16
6 15
3 14
b.index:[1, 5, 2, 4, 6, 3]
b[i]:[19, 18, 18, 16, 15, 14]
对于连续数据:区间自动划分和主动划分
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pylab import *
mpl.rcParams['font.sans-serif'] = ['SimHei'] #字体
a=np.random.random_integers(1,100,size=5000)
a=pd.Series(a)
b=a.value_counts(bins=5,sort=False) #分成五个区间
x=b.index
print(x)
y=list(b)
plt.bar(range(len(y)),y,width=0.4,color='c',tick_label=x)
plt.xlabel('区间',fontsize=14)
plt.ylabel('频数',fontsize=14)
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pylab import *
mpl.rcParams['font.sans-serif'] = ['SimHei']
a=np.random.random_integers(1,100,size=5000)
a=pd.Series(a)
fanwei=list(range(0,120,20)) #[0, 20, 40, 60, 80,100]
fenzu=pd.cut(a,fanwei,right=False) #分组区间
print(fenzu)
pinshu=fenzu.value_counts()#series,区间的个数
print(pinshu)
print(type(pinshu))
#前面计算出来后用数据构造series,注意差的数添加到[80,100)内,因为差的数就是100,没有被统计
pinshu=pd.Series([952,982,984,1009,1073],index=['[0, 20)','[20, 40)','[40, 60)','[60, 80)','[80, 100)'])
x=pinshu.index
y=list(pinshu)
#
plt.bar(range(len(y)),y,width=0.4,color='c',tick_label=x)
plt.xlabel('区间',fontsize=14)
plt.ylabel('频数',fontsize=14)
plt.show()