import pandas
import numpy
import time
import matplotlib.pyplot as plt
%matplotlib inline
defprepareData(filename):
data = pandas.read_csv(filename)
data['Sex']= data['Sex'].map({'female':0,'male':1})
data['Embarked']= data['Embarked'].map({'S':1,'C':2,'Q':3})
pier =[0if numpy.isnan(item)else item for item in data['Embarked']]
data['Embarked']=[max(set(pier), key=pier.count)if item ==0else item for item in pier]
age_avg = numpy.mean([0if numpy.isnan(item)else item for item in data['Age']])
data['Age']=[age_avg if numpy.isnan(item)else item for item in data['Age']]#data['Age'] = [1/(1+numpy.exp(-item)) for item in data['Age']]
data['Age']=[(item-min(data['Age']))/(max(data['Age'])-min(data['Age']))for item in data['Age']]#data['Fare'] = [1/(1+numpy.exp(-item)) for item in data['Fare']]
data['Fare']=[(item-min(data['Fare']))/(max(data['Fare'])-min(data['Fare']))for item in data['Fare']]#data = data.drop(['PassengerId','Name','Ticket','Cabin'], axis=1)
data.insert(0,'ones',1)return data
defrun(X, Y, theta, alpha, steps):
init_time = time.time()
costs =[getCost(X, Y, theta)]
count =0withopen('titanic/model.txt','w')as f:for i inrange(len(theta)):
f.write('theta_'+str(i)+',')
f.write('cost\n')while count < steps:
theta -= alpha*getGradient(X, Y, theta)
cost = getCost(X, Y, theta)
costs.append(cost)for item in theta:
f.write(str(item)+',')
f.write(str(cost)+'\n')
count +=1
time_spent = time.time()-init_time
return costs, theta, time_spent