数据处理
import numpy as np
import pandas as pd
from pandas import DataFrame
data= pd.read_csv('data.csv', encoding='utf-8',engine='python')
#data.drop('_id',axis=1, inplace=True) #去掉id列
data['price'].replace('¥', '', regex=True, inplace=True) #去掉price列的'¥'
#从location列中取出省份和城市,然后删除location列
data['province']=data.location.apply(lambda x:x.split()[0])
data['city']=data.location.apply(lambda x:x.split()[0] if len(x)<4 else x.split()[1])
data.drop('location',axis=1, inplace=True)
#数据类型转化
data['price']=data.price.astype('float64')
for i in ['province','city']:
data[i]=data[i].astype('category')