2.python数据分析与展示------Numpy数据存取与函数

1.数据的csv文件存取

CSV (Comma‐Separated Value,逗号分隔值) ,CSV是一种常见的文件格式,用来存储批量数据


csv文件:

                 np.savetxt(frame, array, fmt='%.18e', delimiter=None)

•frame : 文件、字符串或产生器,可以是.gz或.bz2的压缩文件
•array : 存入文件的数组
•fmt: 写入文件的格式,例如:%d %.2f %.18e

•delimiter : 分割字符串,默认是任何空格

import numpy as np
a=np.arange(100).reshape(5,20)
np.savetxt('a.csv',a,fmt='%d',delimiter=',')
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39
40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59
60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79
80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99
np.savetxt('b.csv',a,fmt='%.1f',delimiter=',')
0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0
20.0,21.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0,33.0,34.0,35.0,36.0,37.0,38.0,39.0
40.0,41.0,42.0,43.0,44.0,45.0,46.0,47.0,48.0,49.0,50.0,51.0,52.0,53.0,54.0,55.0,56.0,57.0,58.0,59.0
60.0,61.0,62.0,63.0,64.0,65.0,66.0,67.0,68.0,69.0,70.0,71.0,72.0,73.0,74.0,75.0,76.0,77.0,78.0,79.0
80.0,81.0,82.0,83.0,84.0,85.0,86.0,87.0,88.0,89.0,90.0,91.0,92.0,93.0,94.0,95.0,96.0,97.0,98.0,99.0
np.loadtxt(frame, dtype=np.float, delimiter=None,unpack=False)
•frame : 文件、字符串或产生器,可以是.gz或.bz2的压缩文件
•dtype: 数据类型,可选

•delimiter : 分割字符串,默认是任何空格

•unpack : 如果True,读入属性将分别写入不同变量

b=np.loadtxt('b.csv',delimiter=',')
print(b)
# [[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17.
#   18. 19.]
#  [20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30. 31. 32. 33. 34. 35. 36. 37.
#   38. 39.]
#  [40. 41. 42. 43. 44. 45. 46. 47. 48. 49. 50. 51. 52. 53. 54. 55. 56. 57.
#   58. 59.]
#  [60. 61. 62. 63. 64. 65. 66. 67. 68. 69. 70. 71. 72. 73. 74. 75. 76. 77.
#   78. 79.]
#  [80. 81. 82. 83. 84. 85. 86. 87. 88. 89. 90. 91. 92. 93. 94. 95. 96. 97.
#   98. 99.]]
b=np.loadtxt('b.csv',dtype=np.int32,delimiter=',')
print(b)
# [[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
#  [20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39]
#  [40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59]
#  [60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79]
#  [80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99]]

CSV的局限性

CSV只能有效存储一维和二维数组

np.savetxt() np.loadtxt()只能有效存取一维和二维数组

2.多维数据的存取

a.tofile(frame, sep='', format='%s')

•frame : 文件、字符串
•sep: 数据分割字符串,如果是空串,写入文件为二进制

•format : 写入数据的格式

a =np.arange(100).reshape(5,10,2)
a.tofile("b.dat",sep=",",format='%d')
# 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,\
# 26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,\
# 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,\
# 73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99

np.fromfile(frame, dtype=float, count=‐1, sep='')

•frame : 文件、字符串
•dtype: 读取的数据类型
•count : 读入元素个数,‐1表示读入整个文件
•sep: 数据分割字符串,如果是空串,写入文件为二进制

a =np.arange(100).reshape(5,10,2)
a.tofile("b.dat",sep=",",format='%d')
c =np.fromfile('b.dat',dtype=np.int32,sep=',')
print(c)
# [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
#  24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
#  48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
#  72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
#  96 97 98 99]
c =np.fromfile('b.dat',dtype=np.int32,sep=',').reshape(5,10,2)
print(c)
# [[[ 0  1]
#   [ 2  3]
#   [ 4  5]
#   [ 6  7]
#   [ 8  9]
#   [10 11]
#   [12 13]
#   [14 15]
#   [16 17]
#   [18 19]]
#
#  [[20 21]
#   [22 23]
#   [24 25]
#   [26 27]
#   [28 29]
#   [30 31]
#   [32 33]
#   [34 35]
#   [36 37]
#   [38 39]]
#
#  [[40 41]
#   [42 43]
#   [44 45]
#   [46 47]
#   [48 49]
#   [50 51]
#   [52 53]
#   [54 55]
#   [56 57]
#   [58 59]]
#
#  [[60 61]
#   [62 63]
#   [64 65]
#   [66 67]
#   [68 69]
#   [70 71]
#   [72 73]
#   [74 75]
#   [76 77]
#   [78 79]]
#
#  [[80 81]
#   [82 83]
#   [84 85]
#   [86 87]
#   [88 89]
#   [90 91]
#   [92 93]
#   [94 95]
#   [96 97]
#   [98 99]]]
a =np.arange(100).reshape(5,10,2)
a.tofile("b.dat",format='%d')
c=np.fromfile("b.dat",dtype=np.int32).reshape(5,10,2)
print(c)
# [[[ 0  1]
#   [ 2  3]
#   [ 4  5]
#   [ 6  7]
#   [ 8  9]
#   [10 11]
#   [12 13]
#   [14 15]
#   [16 17]
#   [18 19]]
# ...
# [[80 81]
#   [82 83]
#   [84 85]
#   [86 87]
#   [88 89]
#   [90 91]
#   [92 93]
#   [94 95]
#   [96 97]
#   [98 99]]]
注意:
该方法需要读取时知道存入文件时数组的维度和元素类型,a.tofile()和np.fromfile()需要配合使用,可以通过元数据文件来存储额外信息
3.Numpy便捷文件读取

np.save(fname, array) np.savez(fname, array)

             •fname: 文件名,以.npy为扩展名,压缩扩展名为.npz

             •array : 数组变量

np.load(fname)

            •fname: 文件名,以.npy为扩展名,压缩扩展名为.npz

a=np.arange(100).reshape(5,10,2)
np.save("a.npy",a)
# UMPY v
# {'descr': '<i4', 'fortran_order': False, 'shape': (5, 10, 2), }
#                                                    
# !   "   #   $   %   &   '   (   )   *   +   ,   -   .   /   0   1   2" \
#     "   3   4   5   6   7   8   9   :   ;   <   =   >   ?   @   A   B   C" \
#     "   D   E   F   G   H   I   J   K   L   M   N   O   P   Q   R   S   T" \
#     "   U   V   W   X   Y   Z   [   \   ]   ^   _   `   a   b   c
b=np.load("a.npy")
print(b)
4.Numpy的随机函数

NumPy的random子库 

                   np.random.rand()

                   np.random.randn()

                   np.random.randint()


import numpy as np
a =np.random.rand(3,4,5)
print(a)
# [[[0.11923456 0.0080324  0.23576131 0.71490196 0.39313982]
#   [0.26944438 0.30595366 0.89433112 0.76073646 0.54988195]
#   [0.58136344 0.82684317 0.08892499 0.96461801 0.10869441]
#   [0.48035625 0.58082037 0.97235769 0.83626598 0.77352449]]
#
#  [[0.05019939 0.69692701 0.5197847  0.14322148 0.80999927]
#   [0.09298827 0.49460859 0.9621793  0.38776821 0.37452434]
#   [0.13729232 0.50410236 0.68394837 0.87087505 0.33721868]
#   [0.32834593 0.7600151  0.8137906  0.03984698 0.81580278]]
#
#  [[0.82580339 0.03564352 0.55698346 0.44198408 0.69197987]
#   [0.78329794 0.8449475  0.68260885 0.35188764 0.13075481]
#   [0.11841985 0.07254895 0.34286141 0.86560175 0.2005601 ]
#   [0.41852062 0.63877623 0.33749892 0.98977597 0.61811358]]]
sn =np.random.randn(3,4,5)
print(sn)
# [[[-1.29145587 -0.02309264  1.02447127  0.51065452 -0.01289186]
#   [ 0.16930873 -0.80807135 -1.33109108 -0.29476181  3.0812429 ]
#   [ 0.26575456 -1.01242421 -2.10002667 -0.38129533  0.11584166]
#   [-1.5870124   1.2326421  -0.38786647 -1.26054727  1.38201501]]
#
#  [[ 0.0727602  -0.50899469  0.21616575 -0.32233134  0.35653899]
#   [ 1.15315991 -0.08415659 -0.01346529 -1.6210397  -0.18992538]
#   [-0.06780673  0.20946401 -0.42592983  0.22779739  1.27193371]
#   [ 2.9091403  -0.55126307 -0.36063733 -0.32533772 -0.22111197]]
#
#  [[ 0.14910911  0.27918515  2.09298654  0.1967028  -0.45330462]
#   [-0.56676479 -0.14943735 -0.17003379 -1.1706462   0.56048001]
#   [ 0.2485423  -1.02440498 -0.84386213  0.47372249 -0.33259582]
#   [ 1.16471167 -0.20821131  1.50395877  0.9741344  -0.27076424]]]
b =np.random.randint(100,200,(3,4))
print(b)
# [[122 102 149 133]
#  [168 185 139 162]
#  [163 143 173 122]]

np.random.seed(10)
print(np.random.randint(100,200,(3,4)))
#运行两次结果一样
# [[109 115 164 128]
#  [189 193 129 108]
#  [173 100 140 136]]

 
a=np.random.randint(100,200,(3,4))
print(a)
# [[176 178 105 120]
#  [178 176 155 130]
#  [111 110 168 198]]
np.random.shuffle(a)
print(a)
np.random.shuffle(a)
print(a)
# [[148 155 198 115]
#  [150 115 117 147]
#  [146 117 198 114]]
# [[150 115 117 147]
#  [148 155 198 115]
#  [146 117 198 114]]
print(a)
print(np.random.permutation(a))
print(a)
# [[163 155 163 173]
#  [174 108 165 194]
#  [135 158 121 196]]
# [[174 108 165 194]
#  [163 155 163 173]
#  [135 158 121 196]]
# [[163 155 163 173]
#  [174 108 165 194]
#  [135 158 121 196]]
b=np.random.randint(100,200,(8,))
print(b)
# [102 128 152 157 176 137 145 172]
print(np.random.choice(b,(3,2)))
# [[116 116]
#  [168 137]
#  [162 116]]
print(np.random.choice(b,(3,2),replace=False))
# [[141 194]
#  [130 119]
#  [116 134]]
print(np.random.choice(b,(3,2),p=b/np.sum(b)))
# [[113 162]
#  [113 149]
#  [151 113]]
 
u =np.random.uniform(0,10,(3,4))
print(u)
# [[5.46036254 6.12551993 5.53542549 7.72189327]
#  [3.43793947 9.94257227 3.15125202 3.60695433]
#  [5.67253129 5.95136365 0.79214474 9.264223  ]]
n =np.random.normal(10,5,(3,4))
print(n)
# [[ 2.53432016 12.77204898 15.57069479 14.18012267]
#  [12.62327262 16.08612479  9.45541684  7.99660799]
#  [ 8.9902546  17.84417588  7.42343768  9.52946522]]
5. Numpy的统计函数

NumPy直接提供的统计类函数 

                   np.std()

                   np.var()

                   np.average()                

import numpy as np
a =np.arange(15).reshape(3,5)
print(a)
# [[ 0  1  2  3  4]
#  [ 5  6  7  8  9]
#  [10 11 12 13 14]]
print(np.sum(a))
# 105
print(np.mean(a,axis=1))
# [ 2.  7. 12.]
print(np.mean(a,axis=0))
# [5. 6. 7. 8. 9.]
print(np.average(a,axis=0,weights=[10,5,1]))
# [2.1875 3.1875 4.1875 5.1875 6.1875]
#4.1875=2*10+7*5+1*12/(10+5+1)=4.1875
print(np.std(a))
# 4.320493798938574
print(np.var(a))
# 18.666666666666668
   
b=np.arange(15,0,-1).reshape(3,5)
print(b)
# [[15 14 13 12 11]
#  [10  9  8  7  6]
#  [ 5  4  3  2  1]]
print(np.max(b))
#15
print(np.argmax(b))
#0
#扁平化后的下标
print(np.unravel_index(np.argmax(b),b.shape))
#重塑成多维下标
#(0, 0)
print(np.ptp(b))
#14
print(np.median(b))
#8.0
6.Numpy的梯度函数           
  
import numpy as np
a =np.random.randint(0,20,(5))
print(a)
#[ 9 18  1  3 12]
print(np.gradient(a))
# [ 9.  -4.  -7.5  5.5  9. ]
#-4=(1-9)/2  存在两侧值
#9=(12-3)/1 只有一侧值
c=np.random.randint(0,50,(3,5))
print(c)
print(np.gradient(c))
#[[22 11 18  0 15]
#  [19 23 16 30 24]
#  [24 36 20 24 40]]
#最外层维度的梯度
# [array([[-3. , 12. , -2. , 30. ,  9. ],
#        [ 1. , 12.5,  1. , 12. , 12.5],
#        [ 5. , 13. ,  4. , -6. , 16. ]]), 
# 第二层维度的梯度
# array([[-11. ,  -2. ,  -5.5,  -1.5,  15. ],
#        [  4. ,  -1.5,   3.5,   4. ,  -6. ],
#        [ 12. ,  -2. ,  -6. ,  10. ,  16. ]])]
      
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值