#coding=utf-8
import numpy as np
import random
from numpy import genfromtxt
import sys
def getData(dataSet):
m, n = np.shape(dataSet)
print 'np.shape(dataSet):',np.shape(dataSet)
trainData = np.ones((m, n))
trainData[:,:-1] = dataSet[:,:-1]
trainLabel = dataSet[:,-1]
return trainData, trainLabel
def batchGradientDescent(x, y, theta, alpha, m, maxIterations):
xTrains = x.transpose()
print 'xTrains:',xTrains
print 'x:',x
print 'y:',y
for i in range(0, maxIterations):
print i,'=================='
print 'theta:',theta
hypothesis = np.dot(x, theta)
print 'hypothesis:',hypothesis
loss = hypothesis - y
print 'loss:',loss
# print loss
gradient = np.dot(xTrains, loss) / m
theta = theta - alpha * gradient
return theta
def predict(x, theta):
m, n = np.shape(x)
xTest = np.ones((m, n+1))
print 'xTest:',xTest
xTest[:, :-1] = x
print 'xTest:',xTest
yP = np.dot(xTest, theta)
return yP
if __name__ == "__main__":
dataPath = "house.csv"
dataSet = genfromtxt(dataPath, delimiter=',')
print dataSet
trainData, trainLabel = getData(dataSet)
print 'trainData:',trainData
print 'trainLabel:',trainLabel
m, n = np.shape(trainData)
print 'np.shape(trainData):',np.shape(trainData)
theta = np.ones(n)
print 'theta:',theta
alpha = 0.1
maxIteration = 5
theta = batchGradientDescent(trainData, trainLabel, theta, alpha, m, maxIteration)
print '========theta',theta
x = np.array([[3.1, 5.5], [3.3, 5.9], [3.5, 6.3], [3.7, 6.7], [3.9, 7.1]])
print predict(x, theta)
'''cat house.csv
1.1,1.5,1.0
2.1,2.3,0.0'''