根据train的属性找到路径,把数据读取出来
import numpy as np
import pandas as pd
data=pd.read_csv(r'C:\Users\Administrator\Desktop\train.csv')
data
查看是否存在空值,每列空值个数
data.isnull().sum()
判断Age中的空值,用isnull,TRUE表示为空值
用众数填充Age中的空值
data.Age.value_counts()
data.Age.isnull()
data.Age.fillna(24.00,inplace=True)
统计出男女比例
data.Sex.value_counts()/len(data.Sex)
统计出存活与死亡的比例
data.Survived.value_counts()/len(data.Survived)
找出已知年龄最大最小值
data.Age.max()
data.Age.min()
画出男女存活死亡图
import matplotlib.pyplot as plt
smale= data.Survived[data.Sex=='male'].value_counts()
sfemale= data.Survived[data.Sex=='female'].value_counts()
df = pd.DataFrame({u'male':smale,u'female':sfemale})
df.plot(kind = 'bar')
plt.xlabel(u'survived')
plt.ylabel(u'number')
plt.title(u"Bar Chart of Men and Women's Survival and Death ")
plt.show()
将数据存成csv格式文件
data1 = pd.DataFrame(data)
data1.to_csv(r'C:\Users\Administrator\Desktop\data1.csv')
data1