1、numpy和octave的标准差的算法包要注意使用参数ddof,numpy.std(axis=0, ddof=1)
# % Machine Learning Online Class
# Exercise 1: Linear regression with multiple variables
#
# For this part of the exercise, you will need to change some
# parts of the code below for various experiments (e.g., changing
# learning rates).
#
#
# % Initialization
#
# % ================ Part 1: Feature Normalization ================
import numpy as np
import matplotlib.pyplot as plt
# ================ Part 1: Feature Normalization ================
def loadData(filename):
fr = open(filename)
arrayLines = fr.readlines()
numberOfLines = len(arrayLines)
x = np.zeros((numberOfLines, 2))
y = np.zeros((numberOfLines, 1))
index = 0
for line in arrayLines:
line = line.strip()
listFormLine = line.split(',')
x[index, :] = listFormLine[:2]
y[index] = listFormLine[-1]
index += 1
return x, y, numberOfLines
# ================ Part 1.1: Scale features and set them to zero mean ================
def featureNormalize(X, lenOfData):
X_norm = X.copy()
# store the mean of X
mu = np.zeros((1, X_norm.shape[1]))
# store the standard deviation of X
sigma = np.zeros((1, X_norm.shape[1]))
mu = np.mean(X_norm, axis=0).reshape((1, 2))
# numpy 中的标准差要加上 ddof=1,或者使用 statistics module
# https://stackoverflow.com/questions/15389768/standard-deviation-of-a-list
sigma = X_norm.std(axis=0, ddof=1).reshape((1, 2))
X_norm = (X_norm - np.tile(mu, (lenOfData, 1))) / np.tile(sigma, (lenOfData, 1))
return X_norm, mu, sigma
# ================ Part 2: Gradient Descent ================
def gradientDescentMulti(X, y, theta, alpha, num_iters):
XMatrix = np.mat(X)
yMatrix = np.mat(y)
thetaMatrix = np.mat(theta)
m = XMatrix.shape[0]
J_history = np.zeros((num_iters, 1))
for i in range(num_iters):
thetaMatrix = thetaMatrix - XMatrix.T * (XMatrix * thetaMatrix - yMatrix) * alpha / m
J_history[i, :] = (computeCostMulti(X, y, thetaMatrix, m))
return thetaMatrix, J_history
def computeCostMulti(X, y, thetaMatrix, m):
XMatrix = np.mat(X)
yMatrix = np.mat(y)
J = sum(np.array(XMatrix * thetaMatrix - yMatrix) ** 2) / (2 * m)
return J
def Plotting(num_iters, J_history):
f2 = plt.figure(2)
p1 = plt.plot([i for i in range(num_iters)], J_history, color='r', label='J_history')
# ax =f2.add_axes([0.1, 0.1, 0.7, 0.3])
plt.xlabel("Number of iterations")
plt.ylabel("Cost J")
plt.legend(loc='upper right')
plt.show()
# %% ================ Part 3: Normal Equations ================
def normalEqn(X, y):
XMatrix = np.mat(X)
yMatrix = np.mat(y)
thetaNormEqn = np.zeros((3, 1))
thetaNormEqn = np.linalg.pinv(XMatrix.T * XMatrix) * XMatrix.T * yMatrix
return thetaNormEqn
if __name__ == '__main__':
print("Loading data ...\n")
x, y, numberOfLines = loadData('ex1data2.txt')
# print out some data points
print('First 10 examples from the dataset: \n')
# print(' x = [%.0f %.0f], y = %.0f \n', x[0:10, :], y[0:10, :])
print('Program paused. Press enter to continue.\n')
# scale features and set them to zero mean
print('Normalizing Features ...\n')
x, mu, sigma = featureNormalize(x, numberOfLines)
# Add a column of ones to x
columnOne = np.ones((numberOfLines, 1))
X = np.column_stack((columnOne, x))
## ================ Part 2: Gradient Descent ================
# Instructions: We have provided you with the following starter
# code that runs gradient descent with a particular
# learning rate (alpha).
#
# Your task is to first make sure that your functions -
# computeCost and gradientDescent already work with
# this starter code and support multiple variables.
#
# After that, try running gradient descent with
# different values of alpha and see which one gives
# you the best result.
#
# Finally, you should complete the code at the end
# to predict the price of a 1650 sq-ft, 3 br house.
#
# Hint: By using the 'hold on' command, you can plot multiple
# graphs on the same figure.
#
# Hint: At prediction, make sure you do the same feature normalization.
print('Running gradient descent ...\n')
# % Choose some alpha value
alpha = 0.01
num_iters = 8500
#Init Theta and Run Gradient Descent
theta = np.zeros((3, 1))
theta, J_history = gradientDescentMulti(X, y, theta, alpha, num_iters)
#visualize costFunction
# Plotting(num_iters, J_history)
# Display gradient descent's result
print('Theta computed from gradient descent: ', theta)
# predict the price of 1650 sq-ft, 3 bedrooms house
predictHosue = np.array([1650, 3]).reshape(1, 2)
predictHosue = np.column_stack((1, (predictHosue - mu) / sigma)) * theta
print(predictHosue)
# ================ Part 3: Normal Equations ================
print('Solving with normal equations...')
xNE, yNE, numberOfLines = loadData('ex1data2.txt')
columnOne = np.ones((numberOfLines, 1))
XNE = np.column_stack((columnOne, xNE))
thetaNormEqn = normalEqn(XNE, yNE)
print(thetaNormEqn)
price = np.mat(np.array([1, 1650, 3])) * thetaNormEqn
print(price)