data.iloc[0:100,:] # ',' 前的部分标明选取的行,‘,’后的部分标明选取的列
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 9 23:16:18 2018
@author: fengjuan
"""
import pandas as pd
import numpy as np
from numpy import nan as NaN
import re
import matplotlib as mpl
import matplotlib.pyplot as plt
from pandas import Series,DataFrame
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 20)
pd.set_option('display.max_rows', 25)
train=pd.read_csv('E:/python/train.csv')
#train.Age=train.Age.fillna(train.Age.median()))#用中位数数填充
train.Age=train.Age.fillna(train.Age.mode()[0])#用众数填充,注意,由于众数可能
#存在多个,所以pandas返回的是一个Series,而不像mean()返回的是一个值:默认第一个
mode=train["Age"].mode()
train.Embarked=train.Embarked.fillna(train.Embarked.mode()[0])
train.Cabin=train.Cabin.fillna(train.Cabin.mode()[0])
#train["Sex"]=train["Sex"].map({"male":1,"female":0})
sex=pd.get_dummies(train["Sex"])#哑变量形式
#train.info()
bb=Series(train['Age'],index=train['PassengerId'])
train1=train.sort_index(axis=1,ascending=False)#按行降序排列
train0=train.sort_index(by='Age')#按Age值排列
train2=train.sort_index(by=['Age','Sex'])#按Age\sex值排列
'''
obj=Series([7,-5,7,4,2,0,4])
print(obj)
print(obj.rank())'''
#print(train.sum)
#print(train.describe())
print(train['Age'].argmin())#计算能够获得的最小值的索引位置
print(train['Age'].idxmin())
print(train['Age'].skew(),train['Age'].kurt())#计算样本age的样本值的偏度、峰度