单变量
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
def costFunction(X,Y,theta):
inner = np.power((X @ theta - Y),2)#X@theta 等价于X.dot(theta)
return np.sum(inner) / (2 * len(X))
def gradientDescent(X,Y,theta,alpha,iters):
costs = []
for i in range(iters):
theta = theta - alpha * X.T @ (X @ theta - Y) /len(X)
cost = costFunction(X,Y,theta)
costs.append(cost)#将每个代价函数保存
return theta,costs
data = pd.read_csv("D:\桌面\Coursera-ML-AndrewNg-Notes-master\code\ex1-linear regression\ex1data1.txt", names=["popalution","profit"])
#print(data.head())看前五行的数据
data.insert(0,'ones',1)
#print(data.head())
X = data.iloc[:,0:-1]
#print(X.head())
Y = data.iloc[:,-1]
X = np.array(X) #将dataframe类型数据转换成数组
Y = np.array(Y)
Y=Y.reshape(97,1)
alpha = 0.01
iters = 1000
theta = np.zeros((2,1))
cost_init = costFunction(X,Y,theta)
g,cost = gradientDescent(X,Y,theta,alpha,iters)
#print(g)#[[-3.24140214],[ 1.1272942 ]]与正规方程法比较theta
#可视化
fig,ax = plt.subplots()
ax.plot(np.arange(iters),cost)
ax.set(xlabel='iters',ylabel='cost',title='cost vs iters')
plt.show()
x = np.linspace(Y.min(),Y.max(),100)
y = g[0,0] + g[1,0] * x
fig,ax = plt.subplots()
ax.scatter(X[:,1],Y,label='training data')
ax.plot(x,y,'r',label='predict')
ax.legend()
ax.set(xlabel='population',ylabel='profit')
plt.show()
多变量
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
#2.特征归一化
def normalize_feature(data):
return (data - data.mean())/data.std()
def costFunction(X,Y,theta):
inner = np.power((X @ theta - Y),2)#X@theta 等价于X.dot(theta)
return np.sum(inner) / (2 * len(X))
def gradientDescent(X,Y,theta,alpha,iters):
costs = []
for i in range(iters):
theta = theta - alpha * X.T @ (X @ theta - Y)/len(X)
cost = costFunction(X,Y,theta)
costs.append(cost)
return costs,theta
#1.读取数据
data = pd.read_csv("D:\桌面\Coursera-ML-AndrewNg-Notes-master\code\ex1-linear regression\ex1data2.txt",names=["size","bedrooms","price"])
#print(data.head())
data = normalize_feature(data)
#print(data.head())
data.insert(0,"one",1)
#4.构造数据集,切片
X = data.iloc[:,0:-1]
Y = data.iloc[:,-1]
X = np.array(X)
Y = np.array(Y)
Y=Y.reshape(47,1)
alphas = [0.0003,0.003,0.03]
iters = 1000
theta = np.zeros((3,1))
cost_init = costFunction(X,Y,theta)
fig,ax = plt.subplots()
for alpha in alphas:
cost, g = gradientDescent(X, Y, theta, alpha, iters)
ax.plot(np.arange(iters),cost,label=alpha)
ax.legend()
ax.set(xlabel = "iters",ylabel="cost",title = "cost vs iters")
plt.show()
"""
#3.数据可视化
data.plot.scatter("size","price",label="size")
plt.show()
data.plot.scatter("bedrooms","price",label="bedrooms")
plt.show()
"""
正规方程法
import numpy as np
#import matplotlib.pyplot as plt
import pandas as pd
def normal_Equation(X,Y):
theta = np.linalg.inv(X.T@X)@X.T@Y
return theta
data = pd.read_csv("D:\桌面\Coursera-ML-AndrewNg-Notes-master\code\ex1-linear regression\ex1data1.txt", names=["popalution","profit"])
#print(data.head())看前五行的数据
data.insert(0,'ones',1)
#print(data.head())
X = data.iloc[:,0:-1]
#print(X.head())
Y = data.iloc[:,-1]
X = np.array(X) #将dataframe类型数据转换成数组
Y = np.array(Y)
Y=Y.reshape(97,1)
theta = normal_Equation(X,Y)
print(theta)