import numpy as np
import pandas as pd
None == None
True
np.nan == np.nan
False
df = pd.DataFrame(np.random.randn(5, 3),index=["a", "c", "e", "f", "h"],columns=["one", "two", "three"],)
df2 = df.reindex(["a", "b", "c", "d", "e", "f", "g", "h"])
df2
| one | two | three |
---|
a | 0.523109 | 1.171167 | 0.083875 |
---|
b | NaN | NaN | NaN |
---|
c | 1.144898 | 0.023587 | -1.429626 |
---|
d | NaN | NaN | NaN |
---|
e | 0.850141 | -1.096858 | -0.877410 |
---|
f | -2.938262 | -1.470641 | -0.760278 |
---|
g | NaN | NaN | NaN |
---|
h | -0.358836 | -1.072762 | 0.015806 |
---|
df2.fillna(0)
| one | two | three |
---|
a | 0.523109 | 1.171167 | 0.083875 |
---|
b | 0.000000 | 0.000000 | 0.000000 |
---|
c | 1.144898 | 0.023587 | -1.429626 |
---|
d | 0.000000 | 0.000000 | 0.000000 |
---|
e | 0.850141 | -1.096858 | -0.877410 |
---|
f | -2.938262 | -1.470641 | -0.760278 |
---|
g | 0.000000 | 0.000000 | 0.000000 |
---|
h | -0.358836 | -1.072762 | 0.015806 |
---|
df2["one"].fillna("missing")
a 0.523109
b missing
c 1.1449
d missing
e 0.850141
f -2.93826
g missing
h -0.358836
Name: one, dtype: object
df2.fillna(method="ffill")
| one | two | three |
---|
a | 0.523109 | 1.171167 | 0.083875 |
---|
b | 0.523109 | 1.171167 | 0.083875 |
---|
c | 1.144898 | 0.023587 | -1.429626 |
---|
d | 1.144898 | 0.023587 | -1.429626 |
---|
e | 0.850141 | -1.096858 | -0.877410 |
---|
f | -2.938262 | -1.470641 | -0.760278 |
---|
g | -2.938262 | -1.470641 | -0.760278 |
---|
h | -0.358836 | -1.072762 | 0.015806 |
---|
df2.fillna(method="ffill", limit=1)
| one | two | three |
---|
a | 0.523109 | 1.171167 | 0.083875 |
---|
b | 0.523109 | 1.171167 | 0.083875 |
---|
c | 1.144898 | 0.023587 | -1.429626 |
---|
d | 1.144898 | 0.023587 | -1.429626 |
---|
e | 0.850141 | -1.096858 | -0.877410 |
---|
f | -2.938262 | -1.470641 | -0.760278 |
---|
g | -2.938262 | -1.470641 | -0.760278 |
---|
h | -0.358836 | -1.072762 | 0.015806 |
---|
df2.fillna(method="backfill")
| one | two | three |
---|
a | 0.523109 | 1.171167 | 0.083875 |
---|
b | 1.144898 | 0.023587 | -1.429626 |
---|
c | 1.144898 | 0.023587 | -1.429626 |
---|
d | 0.850141 | -1.096858 | -0.877410 |
---|
e | 0.850141 | -1.096858 | -0.877410 |
---|
f | -2.938262 | -1.470641 | -0.760278 |
---|
g | -0.358836 | -1.072762 | 0.015806 |
---|
h | -0.358836 | -1.072762 | 0.015806 |
---|
dff = pd.DataFrame(np.random.randn(10, 3), columns=list("ABC"))
dff.iloc[3:5, 0] = np.nan
dff.iloc[4:6, 1] = np.nan
dff.iloc[5:8, 2] = np.nan
dff
| A | B | C |
---|
0 | 0.214955 | -0.857945 | -1.136325 |
---|
1 | -0.213562 | -0.719626 | 0.431266 |
---|
2 | -1.053291 | 1.812349 | 0.491484 |
---|
3 | NaN | 0.016753 | -0.218812 |
---|
4 | NaN | NaN | 2.033018 |
---|
5 | -0.642261 | NaN | NaN |
---|
6 | -0.881779 | -0.995053 | NaN |
---|
7 | 1.454017 | 0.591962 | NaN |
---|
8 | 2.000574 | -0.420521 | -0.245732 |
---|
9 | -0.893512 | -0.472257 | 0.181229 |
---|
dff.fillna(dff.mean())
| A | B | C |
---|
0 | 0.214955 | -0.857945 | -1.136325 |
---|
1 | -0.213562 | -0.719626 | 0.431266 |
---|
2 | -1.053291 | 1.812349 | 0.491484 |
---|
3 | -0.001857 | 0.016753 | -0.218812 |
---|
4 | -0.001857 | -0.130542 | 2.033018 |
---|
5 | -0.642261 | -0.130542 | 0.219447 |
---|
6 | -0.881779 | -0.995053 | 0.219447 |
---|
7 | 1.454017 | 0.591962 | 0.219447 |
---|
8 | 2.000574 | -0.420521 | -0.245732 |
---|
9 | -0.893512 | -0.472257 | 0.181229 |
---|
dff.fillna(dff.mean()["B":"C"])
| A | B | C |
---|
0 | 0.214955 | -0.857945 | -1.136325 |
---|
1 | -0.213562 | -0.719626 | 0.431266 |
---|
2 | -1.053291 | 1.812349 | 0.491484 |
---|
3 | NaN | 0.016753 | -0.218812 |
---|
4 | NaN | -0.130542 | 2.033018 |
---|
5 | -0.642261 | -0.130542 | 0.219447 |
---|
6 | -0.881779 | -0.995053 | 0.219447 |
---|
7 | 1.454017 | 0.591962 | 0.219447 |
---|
8 | 2.000574 | -0.420521 | -0.245732 |
---|
9 | -0.893512 | -0.472257 | 0.181229 |
---|
dff.dropna(axis=0)
| A | B | C |
---|
0 | 0.214955 | -0.857945 | -1.136325 |
---|
1 | -0.213562 | -0.719626 | 0.431266 |
---|
2 | -1.053291 | 1.812349 | 0.491484 |
---|
8 | 2.000574 | -0.420521 | -0.245732 |
---|
9 | -0.893512 | -0.472257 | 0.181229 |
---|
dff.dropna(axis=1)
dff["A"].dropna()
0 0.214955
1 -0.213562
2 -1.053291
5 -0.642261
6 -0.881779
7 1.454017
8 2.000574
9 -0.893512
Name: A, dtype: float64
ser = pd.Series([0.0, 1.0, 2.0, 3.0, 4.0])
ser.replace(0, 5)
0 5.0
1 1.0
2 2.0
3 3.0
4 4.0
dtype: float64
ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0])
0 4.0
1 3.0
2 2.0
3 1.0
4 0.0
dtype: float64
df = pd.DataFrame({"a": [0, 1, 2, 3, 4], "b": [5, 6, 7, 8, 9]})
df
df.replace({"a": 0, "b": 5}, 100)
d = {"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]}
df = pd.DataFrame(d)
df
df.replace(".", np.nan)
| a | b | c |
---|
0 | 0 | a | a |
---|
1 | 1 | b | b |
---|
2 | 2 | NaN | NaN |
---|
3 | 3 | NaN | d |
---|
df.replace(r"\s*\.\s*", np.nan, regex=True)
| a | b | c |
---|
0 | 0 | a | a |
---|
1 | 1 | b | b |
---|
2 | 2 | NaN | NaN |
---|
3 | 3 | NaN | d |
---|
df.replace(["a", "."], ["b", np.nan])
| a | b | c |
---|
0 | 0 | b | b |
---|
1 | 1 | b | b |
---|
2 | 2 | NaN | NaN |
---|
3 | 3 | NaN | d |
---|