# seed 产生一个随机数种子保证每次的随机数都相等# randint(low , high , (type)) 产生一个有范围有形状的随机数数组defprintf(x):print(x)print("-"*60)import numpy.random as npr
# import numpy as np
npr.seed(10)
t = npr.randint(0,30,(3,4))
printf(t)#深拷贝和浅拷贝#赋值操作都是浅拷贝 , 一个会影响另一个#但是复制操作是深拷贝 , 不会产生影响
a = t.copy()
a[a <=40]=0
printf(a)
printf(t)
b = a
a[a ==0]=1
printf(b)#深拷贝互不影响#浅拷贝藕断丝连
numpy 数组 nan 的计数 np.count_nonzeros
# np.nan != np.nan 利用这个性质求出 nan 的个数# np.count_zero()#统计 nan 的个数import numpy as np
import numpy.random as npr
defprintf(x):print(x)print("-"*60)
t = npr.randint(0,30,(3,5))
printf(t)# printf(type(np.nan))
t = t.astype("float")
t[t <10]= np.nan
printf(t)print(np.count_nonzero(t!=t))
应用:用列平均值 替换数组中 nan 值
#用列平均值替换数组所有 nanimport numpy as np
# import numpy.random as nprdefprintf(x):print(x)print("-"*60)defchange(t1):for i inrange(t1.shape[1]):
nl = t1[:, i ]
cnt = np.count_nonzero(nl != nl)if(cnt !=0):
nl[nl != nl]= nl[nl == nl].mean()#t.mean(axis = x) 求平均值#t.max(axis = x) 求最大值#t.sum(axis = x) 求和 if __name__ =="__main__":
t = np.array(range(15)).reshape(3,5).astype("float")
t[2,3:]= np.nan
printf(t)
change(t)
printf(t)
numpy 数组提取文件数据然后做正方图
import numpy as np
from matplotlib import pyplot as plt
import matplotlib as mtb
defprintf(x):print(x)print("-"*60)
mtb.rcParams['font.sans-serif']=["SimHei"]
mtb.rcParams["axes.unicode_minus"]=False
plt.figure(figsize =(15,5), dpi =80)
us_file_path ="F:/All date/US_video_data_numbers.csv"
uk_file_path ="F:/All date/GB_video_data_numbers.csv"
t1 = np.loadtxt(us_file_path,delimiter=",",dtype="int")
t2 = np.loadtxt(uk_file_path,delimiter=",",dtype="int")
us = t1[:,-1]
uk = t2[:,-1]#一定要处理离群数据
us = us[us <=10000]
uk = us[us <=10000]# printf(max(us))
d =500
num =(max(us)-min(us))// d
plt.hist(us ,[min(us)+ i * d for i inrange(num +2)], color ="#FF7F50")
plt.xticks(range(min(us),max(us)+2* d , d))
plt.yticks(range(1,1000,50))#设置标签
plt.xlabel("评论数量")
plt.ylabel("人数")
plt.title("美国油管前1000评论数量统计")# 设置网格 alpha 是清晰度
plt.grid(alpha =0.3, color ="#000000")#显示图像
plt.show()