Python数据分析_第04课:NumPy基础_笔记


GitHub: https://github.com/RealEmperor/Python-for-Data-Analysis

numpy

import numpy as np
from numpy.random import randn

#通用函数
arr = np.arange(10)
np.sqrt(arr)
array([ 0.        ,  1.        ,  1.41421356,  1.73205081,  2.        ,
        2.23606798,  2.44948974,  2.64575131,  2.82842712,  3.        ])
np.exp(arr)
array([  1.00000000e+00,   2.71828183e+00,   7.38905610e+00,
         2.00855369e+01,   5.45981500e+01,   1.48413159e+02,
         4.03428793e+02,   1.09663316e+03,   2.98095799e+03,
         8.10308393e+03])

np.maximum 元素级最大值

x = randn(8)
y = randn(8)
print(x)
print(y)
# 元素级最大值
np.maximum(x, y)
[-1.03760196 -1.0035245  -0.19109603  2.27398057 -0.51605815 -1.25481649
 -1.95118717 -0.09423245]
[-1.26195712 -0.70857631 -0.18729477  2.58847014  2.46277713 -1.04523397
  1.13501218  1.3499591 ]





array([-1.03760196, -0.70857631, -0.18729477,  2.58847014,  2.46277713,
       -1.04523397,  1.13501218,  1.3499591 ])

np.modf 按元素返回数组的小数部分和整数部分

arr = randn(7) * 5
print(arr)
# 按元素返回数组的小数部分和整数部分
np.modf(arr)
[ 8.01175821  3.46248512 -4.11785287  1.34226648  0.40194097  5.81213218
 -0.40446832]





(array([ 0.01175821,  0.46248512, -0.11785287,  0.34226648,  0.40194097,
         0.81213218, -0.40446832]), array([ 8.,  3., -4.,  1.,  0.,  5., -0.]))

np.meshgrid 从坐标向量返回坐标矩阵

###利用数组进行数据处理
# 向量化
points = np.arange(-5, 5, 0.01)  # 1000 equally spaced points
# 从坐标向量返回坐标矩阵
xs, ys = np.meshgrid(points, points)
print(ys)

[[-5.   -5.   -5.   ..., -5.   -5.   -5.  ]
 [-4.99 -4.99 -4.99 ..., -4.99 -4.99 -4.99]
 [-4.98 -4.98 -4.98 ..., -4.98 -4.98 -4.98]
 ..., 
 [ 4.97  4.97  4.97 ...,  4.97  4.97  4.97]
 [ 4.98  4.98  4.98 ...,  4.98  4.98  4.98]
 [ 4.99  4.99  4.99 ...,  4.99  4.99  4.99]]
import matplotlib.pyplot as plt

z = np.sqrt(xs ** 2 + ys ** 2)
print(z)
plt.imshow(z, cmap=plt.cm.gray)
plt.colorbar()
plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")

plt.draw()
[[ 7.07106781  7.06400028  7.05693985 ...,  7.04988652  7.05693985
   7.06400028]
 [ 7.06400028  7.05692568  7.04985815 ...,  7.04279774  7.04985815
   7.05692568]
 [ 7.05693985  7.04985815  7.04278354 ...,  7.03571603  7.04278354
   7.04985815]
 ..., 
 [ 7.04988652  7.04279774  7.03571603 ...,  7.0286414   7.03571603
   7.04279774]
 [ 7.05693985  7.04985815  7.04278354 ...,  7.03571603  7.04278354
   7.04985815]
 [ 7.06400028  7.05692568  7.04985815 ...,  7.04279774  7.04985815
   7.05692568]]

np.where

# 将条件逻辑表达为数组运算
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])

result = [(x if c else y)
          for x, y, c in zip(xarr, yarr, cond)]
print(result)
[1.1000000000000001, 2.2000000000000002, 1.3, 1.3999999999999999, 2.5]
result = np.where(cond, xarr, yarr)
print(result)
[ 1.1  2.2  1.3  1.4  2.5]
arr = randn(4, 4)
print(arr)
print(np.where(arr > 0, 2, -2))
print(np.where(arr > 0, 2, arr))  # set only positive values to 2
[[-0.09677059 -0.78473401 -0.00841639  1.39892368]
 [-1.14999224  0.33586593 -0.1844864   0.47664971]
 [-0.67508722  0.56130304 -0.8018509   0.07338623]
 [ 0.10375292  1.44174994  0.42788598 -0.66850794]]
[[-2 -2 -2  2]
 [-2  2 -2  2]
 [-2  2 -2  2]
 [ 2  2  2 -2]]
[[-0.09677059 -0.78473401 -0.00841639  2.        ]
 [-1.14999224  2.         -0.1844864   2.        ]
 [-0.67508722  2.         -0.8018509   2.        ]
 [ 2.          2.          2.         -0.66850794]]
"""
# 多条件一般表示方法
# Not to be executed
result = []
for i in range(n):
    if cond1[i] and cond2[i]:
        result.append(0)
    elif cond1[i]:
        result.append(1)
    elif cond2[i]:
        result.append(2)
    else:
        result.append(3)

# 多条件where表示方法
# Not to be executed
np.where(cond1 & cond2, 0,
         np.where(cond1, 1,
                  np.where(cond2, 2, 3)))

# Not to be executed
result = 1 * cond1 + 2 * cond2 + 3 * -(cond1 | cond2)
"""
'\n# 多条件一般表示方法\n# Not to be executed\nresult = []\nfor i in range(n):\n    if cond1[i] and cond2[i]:\n        result.append(0)\n    elif cond1[i]:\n        result.append(1)\n    elif cond2[i]:\n        result.append(2)\n    else:\n        result.append(3)\n\n# 多条件where表示方法\n# Not to be executed\nnp.where(cond1 & cond2, 0,\n         np.where(cond1, 1,\n                  np.where(cond2, 2, 3)))\n\n# Not to be executed\nresult = 1 * cond1 + 2 * cond2 + 3 * -(cond1 | cond2)\n'

数学与统计方法

randn 标准正态分布数据

# 数学与统计方法

arr = np.random.randn(5, 4)  # 标准正态分布数据
print(arr.mean())
print(np.mean(arr))
print(arr.sum())

print(arr.mean(axis=1))
print(arr.sum(0))

arr = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
print(arr.cumsum(0))
print(arr.cumprod(1))
0.299738473867
0.299738473867
5.99476947734
[ 0.33172725 -0.49981575  0.35973217  0.39621625  0.91083245]
[ 4.58629248  2.22968175 -1.88744743  1.06624268]
[[ 0  1  2]
 [ 3  5  7]
 [ 9 12 15]]
[[  0   0   0]
 [  3  12  60]
 [  6  42 336]]

用于布尔型数组的方法

# 用于布尔型数组的方法
arr = randn(100)
(arr > 0).sum()  # 正值的数量

bools = np.array([False, False, True, False])
print(bools.any())
print(bools.all())
True
False

排序

# 排序
arr = randn(8)
print(arr)
arr.sort()
print(arr)

arr = randn(5, 3)
print(arr)
arr.sort(1)
print(arr)

[-0.17018254  1.29292169  1.87999871 -0.25529225  1.1058983  -0.27456269
 -1.17911236  0.30155365]
[-1.17911236 -0.27456269 -0.25529225 -0.17018254  0.30155365  1.1058983
  1.29292169  1.87999871]
[[-0.31552106  0.95227657  0.08006334]
 [ 0.86493167  0.66028869  0.56929258]
 [-1.30046025 -1.03020373 -0.80371581]
 [-0.74412785  0.2413104  -0.81418268]
 [-1.16001837 -0.70517682 -0.5816708 ]]
[[-0.31552106  0.08006334  0.95227657]
 [ 0.56929258  0.66028869  0.86493167]
 [-1.30046025 -1.03020373 -0.80371581]
 [-0.81418268 -0.74412785  0.2413104 ]
 [-1.16001837 -0.70517682 -0.5816708 ]]

5%分位数

large_arr = randn(1000)
large_arr.sort()
large_arr[int(0.05 * len(large_arr))]  # 5%分位数
-1.7061490455426676

np.unique 唯一化 以及其他的集合逻辑

# 唯一化以及其他的集合逻辑
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
np.unique
  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值