pandas思维导图扩充:
作业:
import pandas as pd
import numpy as np
df = pd.read_csv('data/pokemon (2).csv')
print((df[['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']].sum(1) != df['Total']).mean())
# 0.0
- 对于 # 重复的妖怪只保留第一条记录,解决以下问题:
dp_dup = df.drop_duplicates('#', keep = 'first')
(1)求第一属性的种类数量
df_unique = dp_dup['Type 1'].nunique()
print(df_unique)
# 18
(2)前三多数量对应的种类
df_top3 = dp_dup['Type 1'].value_counts().index[:3]
print(df_top3)
# Index(['Water', 'Normal', 'Grass'], dtype='object')
(3)求第一属性和第二属性的组合种类
attr_dup = dp_dup.drop_duplicates(['Type 1', 'Type 2'])
print(attr_dup.shape[0])
# 143
(4)求尚未出现过的属性组合`
attr_dup = dp_dup.drop_duplicates(['Type 1', 'Type 2'])
L_full = [' '.join([i, j]) if i != j else i for j in dp_dup['Type 1']. unique() for i in dp_dup['Type 1']. unique()]
L_part = [' '.join([i, j]) if type(j)!=float else i for i, j in zip(attr_dup['Type 1'], attr_dup['Type 2'])]
res = set(L_full).difference(set(L_part))
print(len(res))
# 181
- 按照下述要求,构造 Series :
(a) 取出物攻,超过 120 的替换为 high ,不足 50 的替换为 low ,否则设为 mid
se_att = df['Attack']
print(se_att.head(10))
se_attr = se_att.mask(se_att<50, 'low').mask(se_att>120, 'high').mask((se_att<=120)&(se_att>=50),'mid')
print(se_attr.head(10))
#
Name: Attack, dtype: int64
0 low
1 mid
2 mid
3 mid
4 mid
5 mid
6 mid
7 high
8 mid
9 low
Name: Attack, dtype: object
(b) 取出第一属性,分别用 replace 和 apply 替换所有字母为大写
upper_name = df['Type 1'].replace({i:str.upper(i) for i in df['Type 1']})
print(upper_name.head(10))
UPPER_NAME = df['Type 1'].apply(lambda x:str.upper(x))
print(UPPER_NAME.head(10))
#
0 GRASS
1 GRASS
2 GRASS
3 GRASS
4 FIRE
5 FIRE
6 FIRE
7 FIRE
8 FIRE
9 WATER
Name: Type 1, dtype: object
© 求每个妖怪六项能力的离差,即所有能力中偏离中位数最大的值,添加到 df 并从大到小排序
df['Deviation'] = df[['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']].apply(lambda x:np.max((x-x.mean()).abs()), 1)
df_sort = df.sort_values('Deviation', ascending = False)
print(df_sort.head(10))
#
# Name Type 1 ... Sp. Def Speed Deviation
121 113 Chansey Normal ... 105 50 175.000000
261 242 Blissey Normal ... 135 55 165.000000
230 213 Shuckle Bug ... 230 5 145.833333
224 208 SteelixMega Steelix Steel ... 95 30 128.333333
333 306 AggronMega Aggron Steel ... 80 50 125.000000
217 202 Wobbuffet Psychic ... 58 33 122.500000
223 208 Steelix Steel ... 65 30 115.000000
415 378 Regice Ice ... 200 50 103.333333
414 377 Regirock Rock ... 100 50 103.333333
789 713 Avalugg Ice ... 46 28 98.333333
[10 rows x 12 columns]
指数加权窗口
np.random.seed(0)
s = pd.Series(np.random.randint(-1, 2, 30).cumsum())
print(s.ewm(alpha = 0.2).mean().head(10))
#
0 -1.000000
1 -1.000000
2 -1.409836
3 -1.609756
4 -1.725845
5 -1.529101
6 -1.648273
7 -1.492481
8 -1.609720
9 -1.921223
def ewm_func(x, alpha = 0.2):
win = (1-alpha)**np.arange(x.shape[0])[::-1]
res = (win*x).sum()/win.sum()
return res
print(s.expanding().apply(ewm_func).head(10))
#
0 -1.000000
1 -1.000000
2 -1.409836
3 -1.609756
4 -1.725845
5 -1.529101
6 -1.648273
7 -1.492481
8 -1.609720
9 -1.921223
dtype: float64
print(s.rolling(window = 4).apply(ewm_func).head(10))
#
0 NaN
1 NaN
2 NaN
3 -1.609756
4 -1.826558
5 -1.661247
6 -1.728997
7 -1.444444
8 -1.555556
9 -2.121951
dtype: float64
Process finished with exit code 0