import pandas as pd
import numpy as np
food_info=pd.read_csv("food_info.csv")
food_info.sort_values("Sodium_(mg)",inplace=True) #对列升序排列,inplace用来新生成列表
print(food_info["Sodium_(mg)"])
food_info.sort_values("Sodium_(mg)",inplace=True,ascending=False) #降序排列
print(food_info["Sodium_(mg)"])
titanic_survival=pd.read_csv("titanic_train.csv")
titanic_survival.head()
age=titanic_survival["Age"]
age_is_null=pd.isnull(age) #新建列表,列表的值FALSE和TRUE指出是否缺失
age_null_true=age[age_is_null] #保留缺失值
age_null_count=len(age_null_true)
print(age_null_count) #打印出缺失的个数
mean_age=sum(titanic_survival["Age"])/len(titanic_survival["Age"])
print(mean_age)
#去除缺失值求均值
good_ages=titanic_survival["Age"][age_is_null==False]
print(good_ages)
correct_mean_age=sum(good_ages)/len(good_ages)
print(correct_mean_age)
#算三个等级舱船票价格平均值
passenger_classes=[1,2,3]
fares_by_class={}
for this_class in passenger_classes:
pclass_rows=titanic_survival[titanic_survival["Pclass"]==this_class]
pclass_fares=pclass_rows["Fare"]
fare_for_class=pclass_fares.mean()
fares_by_class[this_class]=fare_for_class
print(fare_for_class)
#index是类别分类标准,values是标签分类标准,aggfunc=np.mean是具体怎么操作
passenger_survival=titanic_survival.pivot_table(index="Pclass",values="Survived",aggfunc=np.mean)
print(passenger_survival)
passenger_age=titanic_survival.pivot_table(index="Pclass",values="Age")
print(passenger_age) #aggfunc不指定默认求均值
#观察登陆点与收入和获救情况sum的关系
passenger_stats=titanic_survival.pivot_table(index="Embarked",values=["Fare","Survived"],aggfunc=np.sum)
print(passenger_stats)
#丢去缺失值,如果age sex有缺失就直接舍掉这两列
drop_na_columns=titanic_survival.dropna(axis=1)
new_titanic_survival=titanic_survival.dropna(axis=0,subset=["Age","Sex"])
print(new_titanic_survival)
#定位到一个具体值
row_index_83_age=titanic_survival.loc[83,"Age"]
row_index_1000_pclass=titanic_survival.loc[766,"Pclass"]
print(row_index_83_age)
print(row_index_1000_pclass)
import numpy as np
food_info=pd.read_csv("food_info.csv")
food_info.sort_values("Sodium_(mg)",inplace=True) #对列升序排列,inplace用来新生成列表
print(food_info["Sodium_(mg)"])
food_info.sort_values("Sodium_(mg)",inplace=True,ascending=False) #降序排列
print(food_info["Sodium_(mg)"])
titanic_survival=pd.read_csv("titanic_train.csv")
titanic_survival.head()
age=titanic_survival["Age"]
age_is_null=pd.isnull(age) #新建列表,列表的值FALSE和TRUE指出是否缺失
age_null_true=age[age_is_null] #保留缺失值
age_null_count=len(age_null_true)
print(age_null_count) #打印出缺失的个数
mean_age=sum(titanic_survival["Age"])/len(titanic_survival["Age"])
print(mean_age)
#去除缺失值求均值
good_ages=titanic_survival["Age"][age_is_null==False]
print(good_ages)
correct_mean_age=sum(good_ages)/len(good_ages)
print(correct_mean_age)
#算三个等级舱船票价格平均值
passenger_classes=[1,2,3]
fares_by_class={}
for this_class in passenger_classes:
pclass_rows=titanic_survival[titanic_survival["Pclass"]==this_class]
pclass_fares=pclass_rows["Fare"]
fare_for_class=pclass_fares.mean()
fares_by_class[this_class]=fare_for_class
print(fare_for_class)
#index是类别分类标准,values是标签分类标准,aggfunc=np.mean是具体怎么操作
passenger_survival=titanic_survival.pivot_table(index="Pclass",values="Survived",aggfunc=np.mean)
print(passenger_survival)
passenger_age=titanic_survival.pivot_table(index="Pclass",values="Age")
print(passenger_age) #aggfunc不指定默认求均值
#观察登陆点与收入和获救情况sum的关系
passenger_stats=titanic_survival.pivot_table(index="Embarked",values=["Fare","Survived"],aggfunc=np.sum)
print(passenger_stats)
#丢去缺失值,如果age sex有缺失就直接舍掉这两列
drop_na_columns=titanic_survival.dropna(axis=1)
new_titanic_survival=titanic_survival.dropna(axis=0,subset=["Age","Sex"])
print(new_titanic_survival)
#定位到一个具体值
row_index_83_age=titanic_survival.loc[83,"Age"]
row_index_1000_pclass=titanic_survival.loc[766,"Pclass"]
print(row_index_83_age)
print(row_index_1000_pclass)