Intro
把pandas中类别型变量,映射成数值or其他值,常规操作时写个if else,然后apply操作
看几个例子:
apply
import datetime
def timer(start_time=None):
if not start_time:
start_time = datetime.datetime.now()
return start_time
elif start_time:
thour, temp_sec = divmod((datetime.datetime.now() - start_time).total_seconds(), 3600)
tmin, tsec = divmod(temp_sec, 60)
print('\n Time taken: %i hours %i minutes and %s seconds.' % (thour, tmin, round(tsec, 2)))
import pandas as pd
import numpy as np
di = {1: "A", 2: "B", 3: "C", 4: "D", 5: "E", 6: "F", 7: "G", 8: "H" }
df = pd.DataFrame({ 'col1': np.random.choice( range(1,9), 1000000 ) })
def transform_f(x):
if x=="A":
return 1
elif x=="B":
return 2
elif x=="C":
return 3
elif x=="D":
return 4
elif x=="E":
return 5
elif x=="F":
return 6
elif x=="G":
return 7
elif x=="H":
return 8
start_time = timer()
df.col1.apply(lambda x:transform_f(x))
timer(start_time)
Time taken: 0 hours 0 minutes and 0.45 seconds.
replace
start_time = timer()
df.replace({"col1": di})
timer(start_time)
Time taken: 0 hours 0 minutes and 0.12 seconds.
map
start_time = timer()
df.col1.map(di)
timer(start_time)
Time taken: 0 hours 0 minutes and 0.03 seconds.
2022-01-14 于南京市江宁区九龙湖