pandas常用数据分析语法(三):查询
df_2018['Area ID'].value_counts().head(1).values[0]
筛选某些值
ChargeGroups_selected = ['Vehicle Theft', 'Robbery', 'Burglary', 'Receive Stolen Property']
df_6 = df_6.loc[df_6['Address'].str.contains('PICO')]
df_2018[df_2018['Charge Group Description'].isin(ChargeGroups_selected)]
df_4 = df_4[~df_4['Charge Group Description'].isin(excluded)]
df_6 = df_6.loc[(df_6['Lon']<right1)&(df_6['Lon']>left1)]
分位数
Age.quantile(0.95)
排序
z_scores.abs().sort_values(ascending = False)
分组
df_4.groupby('Charge Group Description')['Age'].mean()
最大最小
op.idxmax()
op.max()
loca1 = df_6[df_6['Lat']==df_6['Lat'].max()].Location.head(1).values[0]
分列
mp = loca.split(',')
lon = float(mp[0][1:])
lat = float(mp[1][:-1])
df_6['Lon'] = df_6['Location'].str.split(',').str[0].str[1:]
apply
df_5.loc[:,'Location'] = df_5.loc[:,'Location'].apply(within_2km,args=(center,))
数据格式
df_6['Lon'] = df_6['Lon'].astype('float')
df_8['Arrest Date'].astype('str').str.split('-').str[0]
loc
df_8.loc[df_8.loc[:,'Arrest Type Code']=='F']
对bool用sum