import pandas as pdimport numpy as nptitanic_survival = pd.read_csv("titanic_train.csv")
titanic_survival.head()
#The Pandas library uses NaN, which stands for"not a number", to indicate a missing value.
#we can use the pandas.isnull() function which takes a pandas series and returns a series of TrueandFalse values
age = titanic_survival["Age"]
#print(age.loc[0:10])
age_is_null = pd.isnull(age)
#print age_is_null
age_null_true = age[age_is_null]
#print age_null_true
age_null_count = len(age_null_true)
print(age_null_count)
#The result of thisis that mean_age would be nan. This is because any calculations we dowith a null value also result in a null value
mean_age = sum(titanic_survival["Age"]) / len(titanic_survival["Age"])
print mean_age
#we have to filter out the missing values before we calculate the mean.
good_ages = titanic_survival["Age"][age_is_null == False]
#print good_ages
correct_mean_age = sum(good_ages) / len(good_ages)
print correct_mean_age
# missing data is so common that many pandas methods automatically filterfor it
correct_mean_age = titanic_survival["Age"].mean()
print correct_mean_age
#mean fare foreachclasspassenger_classes = [1, 2, 3]
fares_by_class = {}
for this_class in passenger_classes:
pclass_rows = titanic_survival[titanic_survival["Pclass"] == this_class]
pclass_fares = pclass_rows["Fare"]
fare_for_class = pclass_fares.mean()
fares_by_class[this_class] = fare_for_class
print fares_by_class
#index tells the methodwhichcolumntogroupby
#valuesisthecolumnthatwewanttoapplythecalculationto
#aggfuncspecifiesthecalculationwewanttoperformpassenger_survival = titanic_survival.pivot_table(index="Pclass", values="Survived", aggfunc=np.mean)printpassenger_survival
import pandas as pdimport numpy as nptitanic_survival = pd.read_csv("titanic_train.csv")titanic_survival.head()#The Pandas library uses NaN, which stands for "not a number", to indicate a missing va