3、loc索引器:基于元素的索引器
二、多级索引
五、练习
```python
import numpy as np
import pandas as pd
df=pd.read_csv('company.csv')
df.head(3)
#(1)
df_demo=df.loc[(df.age<=40)&(df.department.isin(['Dairy','Bakery']))&(df.gender=='M')]
df_demo=df.query('(age< =40)&(department==["Dairy","Bakery"])&(gender=="M")')
df_demo.shape#有401个
df.query('(age<40)and(department in["Dairy","Bakery"])')
#(2)
df_ID_odd=df.loc[df.EmployeeID%2==1]
df_ID_odd.head(5)
df_ID_odd.tail(5)
df_ID_odd.iloc[[0,2,-2],:]
#3
#df_3=df.set_index(['department','job_title','gender'])
df_3=df.set_index(df.columns[-3:].tolist())
df_3.head(100)
df_3=df_3.swaplevel(0,2,axis=0)
#df.reorder_levels([2,0,1],axis=0)
#df_3=df_3.reset_index('job_title')
df_3=df_3.reset_index(level=1)
df_3=df_3.rename_axis(index={'gender':'Gender'})
new_idx=df_3.index.map(lambda x:x[0]+'-'+x[1])
df_3.index=new_idx
df_3.index.values
new_idx1=df_3.index.map(lambda x:tuple(x.split('-')))
df_3.index=new_idx1
df_3.index.values
df_3.index.names=(['gender','department'])
df_3.head()
df_3=df_3.reset_index(['gender','department'])
df_3.head()
df_3=df_3.reindex_like(df)
df_3.head()
2、巧克力数据集
```python
```python
```python
```python
```python
#参考答案
df=pd.read_csv('chocolate.csv')
#(1)把列索引名中的 \n 替换为空格。
df.columns=df.columns.map(lambda x:x.replace('\r\n',' '))
#(2)巧克力 Rating 评分为1至5,每0.25分一档,请选出2.75分及以下且可可含量 Cocoa Percent 高于中位数的样本。
#思路:query()函数;可可含量要先转换为number:用apply()函数;多个条件用&
df['Cocoa Percent']=df['Cocoa Percent'].apply(lambda x:float(x[:-1])/100)
df.query('(Rating<=2.75)&(`Cocoa Percent`>`Cocoa Percent`.median())')
#将 Review Date 和 Company Location 设为索引后,选出 Review Date 在2012年之后且 Company Location 不属于 France, Canada, Amsterdam, Belgium 的样本。
#思路:1、用set_index设置索引 2、loc[idx[*,*],idx[*,*]]函数;排序后选取2012之后的;用get_level_values函数得到内层索引的值,并进行判断
idx=pd.IndexSlice
temp=df.set_index(['Review Date','Company Location']).sort_index(level=0)
temp.loc[idx[2012:,~temp.index.get_level_values(1).isin(["France", "Canada", "Amsterdam", "Belgium"])],:]