Numpy & Pandas (数据处理教程)

最新推荐文章于 2024-07-04 16:04:39 发布

@小半

最新推荐文章于 2024-07-04 16:04:39 发布

阅读量292

点赞数

分类专栏： python数据处理文章标签： python

本文链接：https://blog.csdn.net/weixin_53331942/article/details/115652920

版权

python数据处理专栏收录该内容

2 篇文章 0 订阅

订阅专栏

numpy属性

import numpy as np
# 将列表装换成矩阵使用np.array()
array = np.array([[1, 2, 3],
                  [2, 3, 4]])
# 打印矩阵
print(array)
# 打印该矩阵是几维的 ndim
print(array.ndim)
# 打印矩阵的形状 shape
print(array.shape)
# 打印矩阵的尺寸 size
print(array.size)

在这里插入图片描述

numpy的创建 array

import numpy as np
# 一般使用列表的形式 注意：打印后没有逗号
# 使用 dtype来定义类型  dtype = np.int
a = np.array([1, 2, 3], dtype=np.int_)
# 查看类型
print(a.dtype)

import numpy as np
# 如果要定义一个全部为零的矩阵
# 需要在（）中指定几行几列
a = np.zeros((3, 4))
print(a)

在这里插入图片描述

import numpy as np
# 如果要定义一个有序的矩阵  使用arange()
a = np.arange(1, 12, 2)
print(a)

在这里插入图片描述

import numpy as np
# 如果要定义一个特定行数与列数的有序的矩阵  使用reshape
a = np.arange(12).reshape((3, 4))
print(a)

import numpy as np
# 生成一个线段
a = np.linspace(1, 10, 6).reshape((3, 2))
print(a)

在这里插入图片描述

numpy的基础运算(1)

import numpy as np
a = np.array([12, 32, 13, 21])
b = np.arange(4)
# array 的减法
c = a-b
print(c)  # [12 31 11 18]
# array 的加法
c = a+b  # [12 33 15 24]
print(c)
# 输出b的平方  注意python中的平方是双**
c = b**2
print(c)   # [0 1 4 9]
# 求sin的值  使用np.sin()  cos 等其他的类似
c = np.sin(a)
print(c)  # [-0.53657292  0.55142668  0.42016704  0.83665564]
#
print(b < 3)   # [ True  True  True False]

import numpy as np
m = np.array([[1, 2],
              [2, 3]])
n = np.arange(4).reshape((2, 2))
# 一一相乘使用普通的乘法
c = m*n
print(c)
'''[[0 2]
    [4 9]]
'''
# 如果要使用矩阵的乘法 则需要使用np.dot()方法
c_dot = np.dot(m, n)
print(c_dot)
'''[[ 4  7]  
    [ 6 11]]
'''

# 生成随机的矩阵
import numpy as np
a = np.random.random((2, 4))
print(a)
'''
[[0.67842665 0.05748582 0.20033281 0.15301078]
 [0.29284424 0.70485296 0.45562099 0.08003088]]
'''
# np.sum()为求和  np.max()为求最大值  np.min()为求最小值
# axis = 1  是在第一行求
print(np.sum(a, axis=1))

numpy的基础运算(2)

import numpy as np
A = np.arange(14, 2, -1).reshape((4, 3))
# 使用np.argmin()来获取A矩阵中最小的索引
print(np.nanargmin(A))  # 0
# 使用np.argmax()来获取A矩阵中最大的索引
print(np.nanargmax(A))  # 11
# 使用np.mean(A)获取A矩阵的平均值, axis=0对列进行继续计算，axis=1对行进行继续计算
print(np.nanmean(A))

# 使用np.median(A)获取A矩阵的中位数
print(np.median(A))
# 使用np.cumsum(A)获取A矩阵的累加
print(np.cumsum(A))   # [ 2  5  9 14 20 27 35 44 54 65 77 90]
# 使用np.diff(A)获取A矩阵的累差
print(np.diff(A))
# 使用np.nonzero(A)获取A矩阵的非零数的位置行列
print(np.nonzero(A))
# (array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3], dtype=int32), array([0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], dtype=int32))
# 使用np.sort(A)获取A矩阵的排序
print(np.sort(A))
# 使用np.transpose(A)获取A矩阵的反向 行变列  列变行
print(np.transpose(A))
'''
[[14 11  8  5]
 [13 10  7  4]
 [12  9  6  3]]
'''
# 使用np，clip(A, x, y)  让矩阵中小于x的都等于x，大于y的都等于y，中间的不变
print(np.clip(A, 5, 9))
'''
[[9 9 9]
 [9 9 9]
 [8 7 6]
 [5 5 5]]
'''
print(np.mean(A, axis=0))  # [9.5 8.5 7.5]每列的平均值
print(np.mean(A, axis=1))  # [13. 10.  7.  4.]每行的平均值

pandas 的选择数据

import pandas as pd
import numpy as np
dates = pd.date_range('20130101', periods=6)
df = pd.DataFrame(np.arange(24).reshape((6, 4)), index=dates, columns=['A', 'B', 'C', 'D'])
print(df)
'''
             A   B   C   D
2013-01-01   0   1   2   3
2013-01-02   4   5   6   7
2013-01-03   8   9  10  11
2013-01-04  12  13  14  15
2013-01-05  16  17  18  19
2013-01-06  20  21  22  23
'''

print(df['A'])
'''
2013-01-01     0
2013-01-02     4
2013-01-03     8
2013-01-04    12
2013-01-05    16
2013-01-06    20
Freq: D, Name: A, dtype: int32
'''
# 打印行
print(df[0:3])
'''
            A  B   C   D
2013-01-01  0  1   2   3
2013-01-02  4  5   6   7
2013-01-03  8  9  10  11
'''
# 换个方式打印行
print(df['20130102':'20130104'])
'''
             A   B   C   D
2013-01-02   4   5   6   7
2013-01-03   8   9  10  11
2013-01-04  12  13  14  15
'''

# 使用loc[] 来选择  标签
print(df.loc['20130101'])
'''
A    0
B    1
C    2
D    3
Name: 2013-01-01 00:00:00, dtype: int32
'''

print(df.loc[:, ['A', 'B']])
'''
             A   B
2013-01-01   0   1
2013-01-02   4   5
2013-01-03   8   9
2013-01-04  12  13
2013-01-05  16  17
2013-01-06  20  21
'''

print(df.loc['20130101', ['A', 'B']])
'''
A    0
B    1
Name: 2013-01-01 00:00:00, dtype: int32
'''

# 打印第三行
print(df.iloc[3])
'''
A    12
B    13
C    14
D    15
Name: 2013-01-04 00:00:00, dtype: int32
'''

print(df.iloc[3, 1])
'''
13
'''

print(df.iloc[3:5, 1:3])
'''
             B   C
2013-01-04  13  14
2013-01-05  17  18
# '''

pandas的导入导出

import pandas as pd
# 使用read_csv()来读取csv文件
date = pd.read_csv('../数据/城市数据_加盐.csv')
print(date)
'''
     Unnamed: 0  year_id  month_id cty_cd      旅客量       客座率
0             0     2016        11    SSS  1537975  0.833608
1             1     2015         7    AAA  3603737  0.842056
2             2     2016         6    SSS  1405626  0.847005
3             3     2015         5    CCC   632758  0.853672
4             4     2017         8    HHH   771068  0.895236
..          ...      ...       ...    ...      ...       ...
715         715     2017         2    UUU   866150  0.830707
716         716     2015         4    AAA  3502777  0.835406
717         717     2015         3    RRR  1621762  0.831659
718         718     2015         2    WWW  2685700  0.801839
719         719     2017         2    MMM   749305  0.843860

[720 rows x 6 columns]
'''
# 使用to_csv来保存csv文件
date.to_pickle('city.pickle')

pandas合并concat

@小半

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
2
评论
Numpy & Pandas (数据处理教程)

numpy属性import numpy as np# 将列表装换成矩阵使用np.array()array = np.array([[1, 2, 3], [2, 3, 4]])# 打印矩阵print(array)# 打印该矩阵是几维的 ndimprint(array.ndim)# 打印矩阵的形状 shapeprint(array.shape)# 打印矩阵的尺寸 sizeprint(array.size)numpy的创建 arrayimport
复制链接

扫一扫