数据预处理
!wget -nc "https://labfile.oss.aliyuncs.com/courses/1363/googleplaystore.csv"
import pandas as pd
df = pd.read_csv('./googleplaystore.csv')
df.head()
处理Installs列
#查看Installs列的取值信息
df['Installs'].unique()
#根据结果显示,Installs列除了数字外,还有“Free”值,将含有该值的行进行删除
df = df[df['Installs'] != 'Free']
df['Installs'].unique()
#删除数值里加号和逗号
df['Installs'] = df['Installs'].apply(
lambda x: x.replace('+', '') if '+' in str(x) else x)
df['Installs'] = df['Installs'].apply(
lambda x: x.replace(',', '') if ',' in str(x) else x)
df['Installs'] = df['Installs'].apply(lambda x: int(x))
df['Installs'].describe()
df.head()