import pandas as pd
import numpy as np
s = pd.Series(np.random.randn(5), index=list("abcde"))
s
a 0.635448
b -0.010749
c 0.283827
d -1.205425
e 0.278554
dtype: float64
s.index
Index(['a', 'b', 'c', 'd', 'e'], dtype='object')
s.index.name = "aaaa"
s
aaaa
a 0.635448
b -0.010749
c 0.283827
d -1.205425
e 0.278554
dtype: float64
df = pd.DataFrame(np.random.randn(4,3), columns=["one","two","three"])
df
| one | two | three |
---|
0 | 1.230136 | -0.014690 | 0.309240 |
---|
1 | 0.814500 | 0.439564 | -0.367011 |
---|
2 | -0.457673 | -0.522634 | 1.284895 |
---|
3 | 0.359858 | -1.525145 | -2.300325 |
---|
df.columns
Index(['one', 'two', 'three'], dtype='object')
df.index.name = "row"
df.columns.name = "col"
df
col | one | two | three |
---|
row | | | |
---|
0 | 1.230136 | -0.014690 | 0.309240 |
---|
1 | 0.814500 | 0.439564 | -0.367011 |
---|
2 | -0.457673 | -0.522634 | 1.284895 |
---|
3 | 0.359858 | -1.525145 | -2.300325 |
---|
s = pd.Series(np.arange(6), index=list("abcbda"))
s
a 0
b 1
c 2
b 3
d 4
a 5
dtype: int32
s["a"]
a 0
a 5
dtype: int32
s["c"]
2
s.index.is_unique
False
s.index.unique()
Index(['a', 'b', 'c', 'd'], dtype='object')
s.groupby(s.index).sum()
a 5
b 4
c 2
d 4
dtype: int32
s.groupby(s.index).first()
a 0
b 1
c 2
d 4
dtype: int32
s.groupby(s.index).mean()
a 2.5
b 2.0
c 2.0
d 4.0
dtype: float64
a = [["a","a","a","b","b","c","c"], [1,2,3,1,2,2,3]]
t = list(zip(*a))
t
[('a', 1), ('a', 2), ('a', 3), ('b', 1), ('b', 2), ('c', 2), ('c', 3)]
index = pd.MultiIndex.from_tuples(t, names=["level1","level2"])
index
MultiIndex(levels=[['a', 'b', 'c'], [1, 2, 3]],
labels=[[0, 0, 0, 1, 1, 2, 2], [0, 1, 2, 0, 1, 1, 2]],
names=['level1', 'level2'])
s = pd.Series(np.random.randn(7),index=index)
s
level1 level2
a 1 -2.474161
2 1.143163
3 1.250040
b 1 0.871456
2 1.106340
c 2 -0.651479
3 1.326582
dtype: float64
s["b"]
level2
1 0.871456
2 1.106340
dtype: float64
s["b":"c"]
level1 level2
b 1 0.871456
2 1.106340
c 2 -0.651479
3 1.326582
dtype: float64
s[["a","c"]]
level1 level2
a 1 -2.474161
2 1.143163
3 1.250040
c 2 -0.651479
3 1.326582
dtype: float64
s[:, 2]
level1
a 1.143163
b 1.106340
c -0.651479
dtype: float64
df = pd.DataFrame(np.random.randint(1,10,(4,3)),
index=[["a","a","b","b"],[1,2,1,2]],
columns=[["one","one","two"],["blue","red","blue"]])
df.index.names=["row-1","row-2"]
df.columns.names=["col-1","col-2"]
df
| col-1 | one | two |
---|
| col-2 | blue | red | blue |
---|
row-1 | row-2 | | | |
---|
a | 1 | 2 | 2 | 9 |
---|
2 | 4 | 8 | 6 |
---|
b | 1 | 7 | 4 | 6 |
---|
2 | 1 | 6 | 3 |
---|
df.loc["a"]
col-1 | one | two |
---|
col-2 | blue | red | blue |
---|
row-2 | | | |
---|
1 | 2 | 2 | 9 |
---|
2 | 4 | 8 | 6 |
---|
df.loc["a",1]
col-1 col-2
one blue 2
red 2
two blue 9
Name: (a, 1), dtype: int32
df2 = df.swaplevel("row-1","row-2")
df2
| col-1 | one | two |
---|
| col-2 | blue | red | blue |
---|
row-2 | row-1 | | | |
---|
1 | a | 2 | 2 | 9 |
---|
2 | a | 4 | 8 | 6 |
---|
1 | b | 7 | 4 | 6 |
---|
2 | b | 1 | 6 | 3 |
---|
df2.sortlevel(0)
| col-1 | one | two |
---|
| col-2 | blue | red | blue |
---|
row-2 | row-1 | | | |
---|
1 | a | 2 | 2 | 9 |
---|
b | 7 | 4 | 6 |
---|
2 | a | 4 | 8 | 6 |
---|
b | 1 | 6 | 3 |
---|
df.sum(level=0)
col-1 | one | two |
---|
col-2 | blue | red | blue |
---|
row-1 | | | |
---|
a | 6 | 10 | 15 |
---|
b | 8 | 10 | 9 |
---|
df.sum(level=1)
col-1 | one | two |
---|
col-2 | blue | red | blue |
---|
row-2 | | | |
---|
1 | 9 | 6 | 15 |
---|
2 | 5 | 14 | 9 |
---|
df = pd.DataFrame({
"a":range(7),
"b":range(7,0,-1),
"c":["one","one","one","two","two","two","two"],
"d":[0,1,2,0,1,2,3]
})
df
| a | b | c | d |
---|
0 | 0 | 7 | one | 0 |
---|
1 | 1 | 6 | one | 1 |
---|
2 | 2 | 5 | one | 2 |
---|
3 | 3 | 4 | two | 0 |
---|
4 | 4 | 3 | two | 1 |
---|
5 | 5 | 2 | two | 2 |
---|
6 | 6 | 1 | two | 3 |
---|
df.set_index("c")
| a | b | d |
---|
c | | | |
---|
one | 0 | 7 | 0 |
---|
one | 1 | 6 | 1 |
---|
one | 2 | 5 | 2 |
---|
two | 3 | 4 | 0 |
---|
two | 4 | 3 | 1 |
---|
two | 5 | 2 | 2 |
---|
two | 6 | 1 | 3 |
---|
df2 = df.set_index(["c","d"])
df2
| | a | b |
---|
c | d | | |
---|
one | 0 | 0 | 7 |
---|
1 | 1 | 6 |
---|
2 | 2 | 5 |
---|
two | 0 | 3 | 4 |
---|
1 | 4 | 3 |
---|
2 | 5 | 2 |
---|
3 | 6 | 1 |
---|
df2.reset_index()
| c | d | a | b |
---|
0 | one | 0 | 0 | 7 |
---|
1 | one | 1 | 1 | 6 |
---|
2 | one | 2 | 2 | 5 |
---|
3 | two | 0 | 3 | 4 |
---|
4 | two | 1 | 4 | 3 |
---|
5 | two | 2 | 5 | 2 |
---|
6 | two | 3 | 6 | 1 |
---|