#制作数据源
boolean=[True,False]
gender=["男","女"]
color=["white","black","yellow"]
data=pd.DataFrame({
"height":np.random.randint(150,190,100),
"weight":np.random.randint(40,90,100),
"smoker":[boolean[x] for x in np.random.randint(0,2,100)],
"gender":[gender[x] for x in np.random.randint(0,2,100)],
"age":np.random.randint(15,90,100),
"color":[color[x] for x in np.random.randint(0,len(color),100) ]
}
)
#三种方法之一:map用法
#使用字典进行映射
data["gender"] = data["gender"].map({"男":1, "女":0})
#三种方法之二:使用函数
def gender_map(x):
gender = 1 if x == "男" else 0
return gender
data["gender"] = data["gender"].map(gender_map)
def apply_age(x,bias):
return x+bias
#三种方法之二:apply用法
data["age"] = data["age"].apply(apply_age,args=(-3,))
########################
#处理行
def BMI(series):
weight = series["weight"]
height = series["height"]/100
BMI = weight/height**2
return BMI
def BMI(df):
weight = df["weight"]
height = df["height"]/100
BMI = weight/height**2
return BMI
data["BMI"]=data.apply(BMI,axis=1)
# 处理列
a=data[["height","weight","age"]].apply(np.sum, axis=0).reset_index()
# 处理列
data[["height","weight","age"]].apply(np.log, axis=0)
#三种方法之三:applymap用法
#applymap的用法比较简单,会对DataFrame中的每个单元格执行指定函数的操作,
#虽然用途不如apply广泛,但在某些场合下还是比较有用的,如下面这个例子。
df = pd.DataFrame(
{
"A":np.random.randn(5),
"B":np.random.randn(5),
"C":np.random.randn(5),
"D":np.random.randn(5),
"E":np.random.randn(5),
}
)
df
df.applymap(lambda x:"%.2f" % x)
df.apply(lambda x:"%.2f" % x)