#导入库 任务1——加载数据及进行预处理
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df=pd.read_csv('./data/insurance.csv')
df.head()
df.shape
df.info()
2.数据的清洗和转换
#将性别'female'——0;'male'-1替换 df['sex'].unique() df['sex'].replace({'female':0,'male':1},inplace=True) df.head() #df['smoker']中'yes'——1,'no'——0替换 df['smoker'].unique() df['smoker'].replace({'yes':1,'no':0},inplace=True) df.head()
df['region']中'southwest’,'southeast','northwest','northeast'分别对应1、2、3、4数据的清洗和转换
df['region'].unique()
dict_region={'southwest':1,'southeast':2,'northwest':3,'northeast':4}
df['region']=df['region'].map(dict_re