Python数据分析_第04课：NumPy基础_笔记

最新推荐文章于 2023-08-17 10:00:00 发布

RealEmperor

最新推荐文章于 2023-08-17 10:00:00 发布

阅读量767

点赞数

分类专栏： Python数据分析文章标签： Python

本文链接：https://blog.csdn.net/weixin_42018258/article/details/99454097

版权

这篇博客介绍了Python数据分析中NumPy库的基础使用，包括元素级最大值、小数和整数部分分离、坐标矩阵生成、条件判断操作，以及数学统计方法如标准正态分布、排序、唯一化等。此外，还探讨了线性代数中的随机数生成，并通过实例展示了如何利用NumPy进行历史股价分析，涉及股票收益率、日期处理、波动幅度、移动平均线和趋势线的计算。

摘要由CSDN通过智能技术生成

GitHub: https://github.com/RealEmperor/Python-for-Data-Analysis

numpy

import numpy as np
from numpy.random import randn

#通用函数
arr = np.arange(10)
np.sqrt(arr)

array([ 0.        ,  1.        ,  1.41421356,  1.73205081,  2.        ,
        2.23606798,  2.44948974,  2.64575131,  2.82842712,  3.        ])

np.exp(arr)

array([  1.00000000e+00,   2.71828183e+00,   7.38905610e+00,
         2.00855369e+01,   5.45981500e+01,   1.48413159e+02,
         4.03428793e+02,   1.09663316e+03,   2.98095799e+03,
         8.10308393e+03])

np.maximum 元素级最大值

x = randn(8)
y = randn(8)
print(x)
print(y)
# 元素级最大值
np.maximum(x, y)

[-1.03760196 -1.0035245  -0.19109603  2.27398057 -0.51605815 -1.25481649
 -1.95118717 -0.09423245]
[-1.26195712 -0.70857631 -0.18729477  2.58847014  2.46277713 -1.04523397
  1.13501218  1.3499591 ]





array([-1.03760196, -0.70857631, -0.18729477,  2.58847014,  2.46277713,
       -1.04523397,  1.13501218,  1.3499591 ])

np.modf 按元素返回数组的小数部分和整数部分

arr = randn(7) * 5
print(arr)
# 按元素返回数组的小数部分和整数部分
np.modf(arr)

[ 8.01175821  3.46248512 -4.11785287  1.34226648  0.40194097  5.81213218
 -0.40446832]





(array([ 0.01175821,  0.46248512, -0.11785287,  0.34226648,  0.40194097,
         0.81213218, -0.40446832]), array([ 8.,  3., -4.,  1.,  0.,  5., -0.]))

np.meshgrid 从坐标向量返回坐标矩阵

###利用数组进行数据处理
# 向量化
points = np.arange(-5, 5, 0.01)  # 1000 equally spaced points
# 从坐标向量返回坐标矩阵
xs, ys = np.meshgrid(points, points)
print(ys)

[[-5.   -5.   -5.   ..., -5.   -5.   -5.  ]
 [-4.99 -4.99 -4.99 ..., -4.99 -4.99 -4.99]
 [-4.98 -4.98 -4.98 ..., -4.98 -4.98 -4.98]
 ..., 
 [ 4.97  4.97  4.97 ...,  4.97  4.97  4.97]
 [ 4.98  4.98  4.98 ...,  4.98  4.98  4.98]
 [ 4.99  4.99  4.99 ...,  4.99  4.99  4.99]]

import matplotlib.pyplot as plt

z = np.sqrt(xs ** 2 + ys ** 2)
print(z)
plt.imshow(z, cmap=plt.cm.gray)
plt.colorbar()
plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")

plt.draw()

[[ 7.07106781  7.06400028  7.05693985 ...,  7.04988652  7.05693985
   7.06400028]
 [ 7.06400028  7.05692568  7.04985815 ...,  7.04279774  7.04985815
   7.05692568]
 [ 7.05693985  7.04985815  7.04278354 ...,  7.03571603  7.04278354
   7.04985815]
 ..., 
 [ 7.04988652  7.04279774  7.03571603 ...,  7.0286414   7.03571603
   7.04279774]
 [ 7.05693985  7.04985815  7.04278354 ...,  7.03571603  7.04278354
   7.04985815]
 [ 7.06400028  7.05692568  7.04985815 ...,  7.04279774  7.04985815
   7.05692568]]

np.where

# 将条件逻辑表达为数组运算
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])

result = [(x if c else y)
          for x, y, c in zip(xarr, yarr, cond)]
print(result)

[1.1000000000000001, 2.2000000000000002, 1.3, 1.3999999999999999, 2.5]

result = np.where(cond, xarr, yarr)
print(result)

[ 1.1  2.2  1.3  1.4  2.5]

arr = randn(4, 4)
print(arr)
print(np.where(arr > 0, 2, -2))
print(np.where(arr > 0, 2, arr))  # set only positive values to 2

[[-0.09677059 -0.78473401 -0.00841639  1.39892368]
 [-1.14999224  0.33586593 -0.1844864   0.47664971]
 [-0.67508722  0.56130304 -0.8018509   0.07338623]
 [ 0.10375292  1.44174994  0.42788598 -0.66850794]]
[[-2 -2 -2  2]
 [-2  2 -2  2]
 [-2  2 -2  2]
 [ 2  2  2 -2]]
[[-0.09677059 -0.78473401 -0.00841639  2.        ]
 [-1.14999224  2.         -0.1844864   2.        ]
 [-0.67508722  2.         -0.8018509   2.        ]
 [ 2.          2.          2.         -0.66850794]]

"""
# 多条件一般表示方法
# Not to be executed
result = []
for i in range(n):
    if cond1[i] and cond2[i]:
        result.append(0)
    elif cond1[i]:
        result.append(1)
    elif cond2[i]:
        result.append(2)
    else:
        result.append(3)

# 多条件where表示方法
# Not to be executed
np.where(cond1 & cond2, 0,
         np.where(cond1, 1,
                  np.where(cond2, 2, 3)))

# Not to be executed
result = 1 * cond1 + 2 * cond2 + 3 * -(cond1 | cond2)
"""

'\n# 多条件一般表示方法\n# Not to be executed\nresult = []\nfor i in range(n):\n    if cond1[i] and cond2[i]:\n        result.append(0)\n    elif cond1[i]:\n        result.append(1)\n    elif cond2[i]:\n        result.append(2)\n    else:\n        result.append(3)\n\n# 多条件where表示方法\n# Not to be executed\nnp.where(cond1 & cond2, 0,\n         np.where(cond1, 1,\n                  np.where(cond2, 2, 3)))\n\n# Not to be executed\nresult = 1 * cond1 + 2 * cond2 + 3 * -(cond1 | cond2)\n'

数学与统计方法

randn 标准正态分布数据

# 数学与统计方法

arr = np.random.randn(5, 4)  # 标准正态分布数据
print(arr.mean())
print(np.mean(arr))
print(arr.sum())

print(arr.mean(axis=1))
print(arr.sum(0))

arr = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
print(arr.cumsum(0))
print(arr.cumprod(1))

0.299738473867
0.299738473867
5.99476947734
[ 0.33172725 -0.49981575  0.35973217  0.39621625  0.91083245]
[ 4.58629248  2.22968175 -1.88744743  1.06624268]
[[ 0  1  2]
 [ 3  5  7]
 [ 9 12 15]]
[[  0   0   0]
 [  3  12  60]
 [  6  42 336]]

用于布尔型数组的方法

# 用于布尔型数组的方法
arr = randn(100)
(arr > 0).sum()  # 正值的数量

bools = np.array([False, False, True, False])
print(bools.any())
print(bools.all())

True
False

排序

# 排序
arr = randn(8)
print(arr)
arr.sort()
print(arr)

arr = randn(5, 3)
print(arr)
arr.sort(1)
print(arr)

[-0.17018254  1.29292169  1.87999871 -0.25529225  1.1058983  -0.27456269
 -1.17911236  0.30155365]
[-1.17911236 -0.27456269 -0.25529225 -0.17018254  0.30155365  1.1058983
  1.29292169  1.87999871]
[[-0.31552106  0.95227657  0.08006334]
 [ 0.86493167  0.66028869  0.56929258]
 [-1.30046025 -1.03020373 -0.80371581]
 [-0.74412785  0.2413104  -0.81418268]
 [-1.16001837 -0.70517682 -0.5816708 ]]
[[-0.31552106  0.08006334  0.95227657]
 [ 0.56929258  0.66028869  0.86493167]
 [-1.30046025 -1.03020373 -0.80371581]
 [-0.81418268 -0.74412785  0.2413104 ]
 [-1.16001837 -0.70517682 -0.5816708 ]]

5%分位数

large_arr = randn(1000)
large_arr.sort()
large_arr[int(0.05 * len(large_arr))]  # 5%分位数

-1.7061490455426676

np.unique 唯一化以及其他的集合逻辑

# 唯一化以及其他的集合逻辑
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
np.unique(names)

array(['Bob', 'Joe', 'Will'], 
      dtype='<U4')

ints = np.array([3, 3, 3, 2, 2, 1, 1, 4, 4])
np.unique(ints)

array([1, 2, 3, 4])

sorted

最低0.47元/天解锁文章

RealEmperor

关注

0
点赞
踩
5

收藏

觉得还不错? 一键收藏
0
评论
Python数据分析_第04课：NumPy基础_笔记

numpyimport numpy as npfrom numpy.random import randn#通用函数arr = np.arange(10)np.sqrt(arr)array([ 0. , 1. , 1.41421356, 1.73205081, 2. , 2.23606798, 2.44948974...
复制链接

扫一扫