pandas使用小技巧(一)
使用map对某些列做特征工程
import pandas as pd
data = {
'color':['red','blue','black','white'],
'num':[11,23,21,32]
}
df = pd.DataFrame(data)
df
# 对color列进行映射
f = {
'red':1,'blue':2,'black':3,'white':4}
df['color2'] = df['color'].map(f)
df
使用replace与正则清洗数据
data = {
'category':['one','two','three','four'],
'sales':[999,'888.8RMB','¥500','$1000.88']
}
df = pd.DataFrame(data)
df
df['sales'] = df['sales'].replace('[$,RMB,¥]','',regex=True).astype('float')
df