目录
一.什么是numpy
一个在Python中做科学计算的基础库,重在数值计算,也是大部分PYTHON科学计算库的基础库,多用于在大型、多维数组上执行数值运算。
二.numpy相关问题
1.如何选择一行或者多行的数据(列)?
2.如何给选取的行或者列赋值?
3.如何大于把大于10的值替换为10?
4.np.where如何使用?
5.np.clip如何使用?
6.如何转置(交换轴)?
7.读取和保存数据为csv
8.np.nan和np.inf是什么?
9.常用的统计函数你记得几个?
10.标准差反映出数据的什么信息?
三.numpy数组的创建
import numpy as np
import random
t1=np.array([1,2,3])
print(t1)
print(type(t1))
t2=np.array(range(10))
print(t2)
t3=np.arange(4,10,2)
print(t3)
print(type(t3))
print(t3.dtype)
t4=np.array(range(1,4),dtype="i1")
print(t4.dtype)
t5=np.array([1,1,0,1,0,0],dtype=bool)
print(t5)
print(t5.dtype)
t6=t5.astype("int8")
print(t6)
print(t6.dtype)
t7=np.array([random.random() for i in range(10)])
print(t7)
print(t7.dtype)
t8=np.round(t7,2)
print(t8)
/Users/lichengxiang/opt/anaconda3/bin/python /Users/lichengxiang/Desktop/python/数据分析/numpy数组的创建.py
[1 2 3]
<class 'numpy.ndarray'>
[0 1 2 3 4 5 6 7 8 9]
[4 6 8]
<class 'numpy.ndarray'>
int64
int8
[ True True False True False False]
bool
[1 1 0 1 0 0]
int8
[0.78969119 0.68313482 0.21280596 0.88104515 0.15284712 0.58684843
0.95809889 0.88654589 0.16578486 0.29570551]
float64
[0.79 0.68 0.21 0.88 0.15 0.59 0.96 0.89 0.17 0.3 ]进程已结束,退出代码0
四.numpy索引,切片与计算
import numpy as np
us_file_path="./youtube_video_data/US_video_data_numbers.csv"
uk_file_path="./youtube_video_data/GB_video_data_numbers.csv"
# t1=np.loadtxt(us_file_path,delimiter=",",dtype="int",unpack=True)#unpack 为转置
t2=np.loadtxt(us_file_path,delimiter=",",dtype="int")#dtype为数据类型
# print(t1)
# print("*"*50)
# print(t2)
# print(t2[0])#取单行
# print(t2[2:])#取连续多行
print(t2[[2,8,10]])#取不连续多行
#
print(t2[1,:])#取行和列交叉点的位置
# print(t2[2:,:])
# print(t2[:,0])
# print(t2[:,2:])
# print(t2[:,[0,3]])
# print(t2[2,3])
# print(type(t2[2,3]))
# print(t2[2:5,1:4])
print(t2[[0,2,2],[0,1,3]])#选出(0,0),(2,1),(2,3)
t2=np.arange(24).reshape(4,6)
# print(t2)
# print(t2<10)
# t2[t2<10]=3
# print(t2)
# print(t2[t2>20])
# print(t2>20)
# t2[t2>20]=20
# print(t2)
#
# print(np.where(t2<=3,100,300))
# print(t2)
# print(t2.clip(10,18))
# print(t2)
t2=t2.astype(float)#nan是float类型
# print(t2)
t2[3,3]=np.nan
#
# print(np.nan==np.nan)
# print(np.nan!=np.nan)
# print(t2)
# t2[:,0]=0
# print(t2)
# print(np.count_nonzero(t2))
print(t2!=t2)
# print(np.count_nonzero(t2!=t2))
# print(np.isnan(t2))
# print(np.count_nonzero(np.isnan(t2)))
# t2[np.isnan(t2)]=0 #把nan替换为0
print(t2)
t3=np.arange(12).reshape(3,4)
# print(np.sum(t3))
print(np.sum(t3,axis=0))
print(np.sum(t2,axis=0)) #0轴方向和
print(np.mean(t2,axis=0)) #0轴方向均值
print(np.median(t2,axis=0)) #0轴方向中位数
print(t2.max(axis=0))
print(t2.min(axis=0))
print(np.ptp(t2,axis=0)) #0轴方向极值(最大值和最小值之差)
print(t2.std(axis=0)) #0轴方向标准差
print(t2.ptp(axis=0))
/Users/lichengxiang/opt/anaconda3/bin/python /Users/lichengxiang/Desktop/python/数据分析/numpy索引,切片与计算.py
[[5845909 576597 39774 170708]
[1338533 69687 678 5643]
[ 859289 34485 726 1914]]
[7860119 185853 26679 0]
[4394029 576597 170708]
[[False False False False False False]
[False False False False False False]
[False False False False False False]
[False False False True False False]]
[[ 0. 1. 2. 3. 4. 5.]
[ 6. 7. 8. 9. 10. 11.]
[12. 13. 14. 15. 16. 17.]
[18. 19. 20. nan 22. 23.]]
[12 15 18 21]
[36. 40. 44. nan 52. 56.]
[ 9. 10. 11. nan 13. 14.]
[ 9. 10. 11. nan 13. 14.]
[18. 19. 20. nan 22. 23.]
[ 0. 1. 2. nan 4. 5.]
[18. 18. 18. nan 18. 18.]
[6.70820393 6.70820393 6.70820393 nan 6.70820393 6.70820393]
[18. 18. 18. nan 18. 18.]进程已结束,退出代码0
五.numpy数组的形状
import numpy as np
t1=np.arange(12)
print(t1)
print(t1.shape)
t3=np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
print(t3.shape)
t4=np.array(range(12))
print(t4)
print(t4.reshape(3,4))
print("*"*50)
print(t4.reshape(2,2,3))
print(t4)
t5=t4.reshape(3,4)
print("*"*50)
print(t5.reshape(3,4))
print(t5.reshape(12,1))
print(t5.reshape(12,))
print(t5.reshape(12))
print(t5.shape)
print(t5.reshape(t5.shape[0]*t5.shape[1]))
print(t5.reshape(t5.shape[0]*t5.shape[1],))
print(t5.flatten())
print(t5)
print(t5+2)#广播到每一个元素计算
print(t5/2)
# print(t5/0) #nan:0/0 inf:无穷大
t6=np.arange(100,112).reshape(3,4)
print(t6)
print(t6+t5)# 加减乘除,形状一样时是对应位置进行计算
t7=np.arange(4)
print(t5+t7)
t8=np.arange(3).reshape(3,1)#形状不一样时,要和行或者列对应
print(t8)
print(t5+t8)
print(t5*t8)
/Users/lichengxiang/opt/anaconda3/bin/python /Users/lichengxiang/Desktop/python/数据分析/numpy数组的形状.py
[ 0 1 2 3 4 5 6 7 8 9 10 11]
(12,)
(2, 2, 3)
[ 0 1 2 3 4 5 6 7 8 9 10 11]
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
**************************************************
[[[ 0 1 2]
[ 3 4 5]][[ 6 7 8]
[ 9 10 11]]]
[ 0 1 2 3 4 5 6 7 8 9 10 11]
**************************************************
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
[[ 0]
[ 1]
[ 2]
[ 3]
[ 4]
[ 5]
[ 6]
[ 7]
[ 8]
[ 9]
[10]
[11]]
[ 0 1 2 3 4 5 6 7 8 9 10 11]
[ 0 1 2 3 4 5 6 7 8 9 10 11]
(3, 4)
[ 0 1 2 3 4 5 6 7 8 9 10 11]
[ 0 1 2 3 4 5 6 7 8 9 10 11]
[ 0 1 2 3 4 5 6 7 8 9 10 11]
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
[[ 2 3 4 5]
[ 6 7 8 9]
[10 11 12 13]]
[[0. 0.5 1. 1.5]
[2. 2.5 3. 3.5]
[4. 4.5 5. 5.5]]
[[100 101 102 103]
[104 105 106 107]
[108 109 110 111]]
[[100 102 104 106]
[108 110 112 114]
[116 118 120 122]]
[[ 0 2 4 6]
[ 4 6 8 10]
[ 8 10 12 14]]
[[0]
[1]
[2]]
[[ 0 1 2 3]
[ 5 6 7 8]
[10 11 12 13]]
[[ 0 0 0 0]
[ 4 5 6 7]
[16 18 20 22]]进程已结束,退出代码0
六.numpy读取数据与转置
import numpy as np
us_file_path="./youtube_video_data/US_video_data_numbers.csv"
uk_file_path="./youtube_video_data/GB_video_data_numbers.csv"
t1=np.loadtxt(us_file_path,delimiter=",",dtype="int",unpack=True)#unpack 为转置
t2=np.loadtxt(us_file_path,delimiter=",",dtype="int")
print(t1)
print("*"*50)
print(t2)
t2=np.arange(24).reshape(4,6)
print(t2)
print(t2.transpose())#转置
print(t2.T)#转置
print(t2.swapaxes(1,0))#转置
/Users/lichengxiang/opt/anaconda3/bin/python /Users/lichengxiang/Desktop/python/数据分析/numpy读取数据与转置.py
[[4394029 7860119 5845909 ... 142463 2162240 515000]
[ 320053 185853 576597 ... 4231 41032 34727]
[ 5931 26679 39774 ... 148 1384 195]
[ 46245 0 170708 ... 279 4737 4722]]
**************************************************
[[4394029 320053 5931 46245]
[7860119 185853 26679 0]
[5845909 576597 39774 170708]
...
[ 142463 4231 148 279]
[2162240 41032 1384 4737]
[ 515000 34727 195 4722]]
[[ 0 1 2 3 4 5]
[ 6 7 8 9 10 11]
[12 13 14 15 16 17]
[18 19 20 21 22 23]]
[[ 0 6 12 18]
[ 1 7 13 19]
[ 2 8 14 20]
[ 3 9 15 21]
[ 4 10 16 22]
[ 5 11 17 23]]
[[ 0 6 12 18]
[ 1 7 13 19]
[ 2 8 14 20]
[ 3 9 15 21]
[ 4 10 16 22]
[ 5 11 17 23]]
[[ 0 6 12 18]
[ 1 7 13 19]
[ 2 8 14 20]
[ 3 9 15 21]
[ 4 10 16 22]
[ 5 11 17 23]]进程已结束,退出代码0
七.numpy中的随机方法
import numpy as np
us_file_path="./youtube_video_data/US_video_data_numbers.csv"
uk_file_path="./youtube_video_data/GB_video_data_numbers.csv"
uk_data=np.loadtxt(uk_file_path,delimiter=",",dtype="int")#dtype为数据类型
us_data=np.loadtxt(us_file_path,delimiter=",",dtype="int")#dtype为数据类型
zeros_data=np.zeros((us_data.shape[0],1),dtype="int")#用0填充的数组
ones_data=np.ones((uk_data.shape[0],1)).astype(int)#用1填充的数组
print(uk_data)
print(us_data)
print(zeros_data)
print(ones_data)
# us_data=np.hstack((us_data,zeros_data))#分别添加一列全为0,1的数据 横向拼接
# uk_data=np.hstack((uk_data,ones_data))
# final_data=np.vstack((us_data,uk_data))#纵向拼接
# print(final_data)
# print(np.eye(10))
t=np.eye(10)
print(t)
print(np.argmax(t,axis=0))#在0轴方向上,返回每一列中元素值最大的位置
# t[t==1]=-1
# print(np.argmax(t,axis=0))
# print(np.argmin(t,axis=1))#在0轴方向上,返回每一列中元素值最小的位置
print(np.random.rand(2,3))#取值范围为[0,1)
print(np.random.randn(2,3))#返回服从正态分布的随机值
print(np.random.randint(10,20,(4,5)))#产生随机整数
print(np.random.uniform(10,20,(4,5)))#从一个均匀分布[low,high)中随机采样,注意定义域是左闭右开
# np.random.seed(10)#锁定随机数
# print(np.random.randint(10,20,(4,5)))
/Users/lichengxiang/opt/anaconda3/bin/python /Users/lichengxiang/Desktop/python/数据分析/numpy中的随机方法.py
[[7426393 78240 13548 705]
[ 494203 2651 1309 0]
[ 142819 13119 151 1141]
...
[ 109222 4840 35 212]
[ 626223 22962 532 1559]
[ 99228 1699 23 135]]
[[4394029 320053 5931 46245]
[7860119 185853 26679 0]
[5845909 576597 39774 170708]
...
[ 142463 4231 148 279]
[2162240 41032 1384 4737]
[ 515000 34727 195 4722]]
[[0]
[0]
[0]
...
[0]
[0]
[0]]
[[1]
[1]
[1]
...
[1]
[1]
[1]]
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]
[0 1 2 3 4 5 6 7 8 9]
[[0.34394023 0.8508696 0.21094773]
[0.35183376 0.99188736 0.92579012]]
[[ 0.3666301 -1.49328513 -0.69215783]
[-2.60521752 0.40758821 0.80665866]]
[[14 11 17 14 19]
[10 11 14 11 15]
[14 10 11 12 14]
[18 13 11 11 19]]
[[16.64175442 17.18289735 14.08841347 16.93362721 18.67962872]
[19.34447383 12.09131172 11.56044524 11.34433511 14.402885 ]
[18.56743927 18.4704144 13.77114997 16.7272933 14.97226484]
[14.60955368 11.11351948 15.64157451 11.5891424 16.89661637]]进程已结束,退出代码0
八.numpy中在nan位置填充平均值
import numpy as np
def fill_nsarray(t1):
for i in range(t1.shape[1]):#遍历每一列
temp_col=t1[:,i]#当前这一列
nan_num=np.count_nonzero(temp_col!=temp_col)#np.isnan(temp_col) ;"()"中是条件,返回和原数组形状相同的元素类型为bool的数组
if nan_num!=0:
temp_not_nan_col=temp_col[temp_col==temp_col]#当前一列不为nan的数值
temp_col[np.isnan(temp_col)]=temp_not_nan_col.mean()#选中当前为nan的位置,把值赋值为不为nan的均值
return t1
if __name__=="__main__":
t1 = np.arange(24).reshape((4, 6)).astype("float")
t1[1, 2:] = np.nan
print(t1)
t1=fill_nsarray(t1)
print(t1)
/Users/lichengxiang/opt/anaconda3/bin/python /Users/lichengxiang/Desktop/python/数据分析/numpy中填充nan和youtube数据.py
[[ 0. 1. 2. 3. 4. 5.]
[ 6. 7. nan nan nan nan]
[12. 13. 14. 15. 16. 17.]
[18. 19. 20. 21. 22. 23.]]
[[ 0. 1. 2. 3. 4. 5.]
[ 6. 7. 12. 13. 14. 15.]
[12. 13. 14. 15. 16. 17.]
[18. 19. 20. 21. 22. 23.]]进程已结束,退出代码0
九.numpy实例1
import numpy as np
from matplotlib import pyplot as plt
us_file_path="./youtube_video_data/US_video_data_numbers.csv"
uk_file_path="./youtube_video_data/GB_video_data_numbers.csv"
t_us=np.loadtxt(us_file_path,delimiter=",",dtype="int")#dtype为数据类型
t_us_comments=t_us[:,-1]
t_us_comments=t_us_comments[t_us_comments<=5000]
print(t_us_comments.max(),t_us_comments.min())
d=50
bin_nums=(t_us_comments.max()-t_us_comments.min())//d
plt.figure(figsize=(20,8),dpi=80)
plt.hist(t_us_comments,bin_nums)
plt.savefig("./numpy实例1")
plt.show()
/Users/lichengxiang/opt/anaconda3/bin/python /Users/lichengxiang/Desktop/python/数据分析/numpy实例1.py
4995 0进程已结束,退出代码0
十.numpy实例2
import numpy as np
from matplotlib import pyplot as plt
us_file_path="./youtube_video_data/US_video_data_numbers.csv"
uk_file_path="./youtube_video_data/GB_video_data_numbers.csv"
t_uk=np.loadtxt(uk_file_path,delimiter=",",dtype="int")#dtype为数据类型
t_uk=t_uk[t_uk[:,1]<=500000]
t_uk_comment=t_uk[:,-1]
t_uk_like=t_uk[:,1]
plt.figure(figsize=(20,8),dpi=80)
plt.scatter(t_uk_like,t_uk_comment)
plt.savefig("./numpy实例2")
plt.show()