pandas-1118-02

本文深入探讨了 Python 中广泛使用的数据分析库 Pandas,讲解了如何加载数据、数据清洗、数据转换、聚合操作以及数据可视化等核心功能,帮助读者提升数据处理能力。
摘要由CSDN通过智能技术生成
import pandas as pd
import numpy as np
s = pd.Series(np.random.randn(5), index=list("abcde"))
s
a    0.635448
b   -0.010749
c    0.283827
d   -1.205425
e    0.278554
dtype: float64
s.index
Index(['a', 'b', 'c', 'd', 'e'], dtype='object')
s.index.name = "aaaa"
s
aaaa
a    0.635448
b   -0.010749
c    0.283827
d   -1.205425
e    0.278554
dtype: float64
df = pd.DataFrame(np.random.randn(4,3), columns=["one","two","three"])
df
onetwothree
01.230136-0.0146900.309240
10.8145000.439564-0.367011
2-0.457673-0.5226341.284895
30.359858-1.525145-2.300325
df.columns
Index(['one', 'two', 'three'], dtype='object')
df.index.name = "row"
df.columns.name = "col"
df
colonetwothree
row
01.230136-0.0146900.309240
10.8145000.439564-0.367011
2-0.457673-0.5226341.284895
30.359858-1.525145-2.300325
s = pd.Series(np.arange(6), index=list("abcbda"))
s
a    0
b    1
c    2
b    3
d    4
a    5
dtype: int32
s["a"]
a    0
a    5
dtype: int32
s["c"]
2
s.index.is_unique
False
s.index.unique()
Index(['a', 'b', 'c', 'd'], dtype='object')
s.groupby(s.index).sum()
a    5
b    4
c    2
d    4
dtype: int32
s.groupby(s.index).first()
a    0
b    1
c    2
d    4
dtype: int32
s.groupby(s.index).mean()
a    2.5
b    2.0
c    2.0
d    4.0
dtype: float64
a = [["a","a","a","b","b","c","c"], [1,2,3,1,2,2,3]]
t = list(zip(*a))
t
[('a', 1), ('a', 2), ('a', 3), ('b', 1), ('b', 2), ('c', 2), ('c', 3)]
index = pd.MultiIndex.from_tuples(t, names=["level1","level2"])
index
MultiIndex(levels=[['a', 'b', 'c'], [1, 2, 3]],
           labels=[[0, 0, 0, 1, 1, 2, 2], [0, 1, 2, 0, 1, 1, 2]],
           names=['level1', 'level2'])
s = pd.Series(np.random.randn(7),index=index)
s
level1  level2
a       1        -2.474161
        2         1.143163
        3         1.250040
b       1         0.871456
        2         1.106340
c       2        -0.651479
        3         1.326582
dtype: float64
s["b"]
level2
1    0.871456
2    1.106340
dtype: float64
s["b":"c"]
level1  level2
b       1         0.871456
        2         1.106340
c       2        -0.651479
        3         1.326582
dtype: float64
s[["a","c"]]
level1  level2
a       1        -2.474161
        2         1.143163
        3         1.250040
c       2        -0.651479
        3         1.326582
dtype: float64
s[:, 2]
level1
a    1.143163
b    1.106340
c   -0.651479
dtype: float64
df = pd.DataFrame(np.random.randint(1,10,(4,3)),
                 index=[["a","a","b","b"],[1,2,1,2]],
                 columns=[["one","one","two"],["blue","red","blue"]])
df.index.names=["row-1","row-2"]
df.columns.names=["col-1","col-2"]
df
col-1onetwo
col-2blueredblue
row-1row-2
a1229
2486
b1746
2163
df.loc["a"]
col-1onetwo
col-2blueredblue
row-2
1229
2486
df.loc["a",1]
col-1  col-2
one    blue     2
       red      2
two    blue     9
Name: (a, 1), dtype: int32
df2 = df.swaplevel("row-1","row-2")
df2
col-1onetwo
col-2blueredblue
row-2row-1
1a229
2a486
1b746
2b163
df2.sortlevel(0)
col-1onetwo
col-2blueredblue
row-2row-1
1a229
b746
2a486
b163
df.sum(level=0)
col-1onetwo
col-2blueredblue
row-1
a61015
b8109
df.sum(level=1)
col-1onetwo
col-2blueredblue
row-2
19615
25149
df = pd.DataFrame({
    "a":range(7),
    "b":range(7,0,-1),
    "c":["one","one","one","two","two","two","two"],
    "d":[0,1,2,0,1,2,3]
})
df
abcd
007one0
116one1
225one2
334two0
443two1
552two2
661two3
df.set_index("c")
abd
c
one070
one161
one252
two340
two431
two522
two613
df2 = df.set_index(["c","d"])
df2
ab
cd
one007
116
225
two034
143
252
361
df2.reset_index()
cdab
0one007
1one116
2one225
3two034
4two143
5two252
6two361
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值