#本文中所使用的数据集参考吴恩达的course课程题目,数据集下载地址:链接:百度网盘 请输入提取码 密码:mwcl
1 加载数据集并可视化
使用numpy科学计算库加载数据集中数据,并通过matplotlib库下plot函数进行绘制散点图。
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
data = np.loadtxt('D:\ZacLing\mat\Study\ex1data1.txt',delimiter = ',')
x = data[:,0]
y = data[:,1]
plt.figure()
plt.plot(x,y,'k.')
plt.show()
2 sklearn版本
线性式(假设函数):
损失函数:
2.1 初始化参数
Len = np.size(X)
X = np.hstack(np.ones([Len,1]),X.reshape(Len,1))#增加常数项
2.2 初始化模型
from sklearn import linear_model
model = linear_model.LinearRegression()
2.3 拟合数据并画图
model.fit(X,y)
xp = np.array([[1,0],[1,3],[1,7],[1,12],[1,15],[1,25]])
yp = model.predict(xp)
print(yp)
plt.plot(xp[:,1],yp,'g-')
plt.show()
2.4 程序总览
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
from sklearn import linear_model
data = np.loadtxt('ex1data1.txt',delimiter = ',')
X = data[:,0]
y = data[:,1]
plt.figure()
plt.plot(X,y,'r.')
Len = np.size(X)
X.shape = (Len,1)
X = np.hstack([np.ones([Len,1]),X.reshape(Len,1)]) #增加常数项
model = linear_model.LinearRegression()
model.fit(X,y)
xp = np.array([[1,0],[1,3],[1,7],[1,12],[1,15],[1,25]])
yp = model.predict(xp)
print(yp)
plt.plot(xp[:,1],yp,'g-')
plt.show()
3 手撸版本
3.1 损失函数图像绘制
import numpy as np
from mpl_toolkits import mplot3d
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
def CostFunction(X,y,theta0,theta1): #损失函数
n = np.size(theta0)
J_list = np.zeros([n,n])
m = np.size(y)
for i in range(0,n):
for j in range(0,n):
theta = np.array([[theta0[i]],[theta1[j]]])
J_list[i,j] = sum(np.square(np.dot(X,theta) - y)) / (2 * m)
return J_list
data = np.loadtxt('D:\ZacLing\mat\Study\ex1data1.txt',delimiter = ',')
X = data[:,0]
y = data[:,1]
plt.figure()
ax = plt.axes(projection = '3d')
Len = np.size(X)
X.shape = (Len,1)
y.shape = (Len,1)
X = np.hstack([np.ones([Len,1]),X.reshape(Len,1)]) #增加常数项
theta0 = np.linspace(-6,0,200)
theta1 = np.linspace(1,1.5,200)
ax.contour3D(theta0,theta1,CostFunction(X,y,theta0,theta1),50, cmap='cool')
plt.show()
3.2 程序总览
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
def CostFunction(X,y,theta): #损失函数
m = np.size(y)
J = 0
J = sum(np.square(np.dot(X,theta) - y)) / (2 * m)
return J
def GradientDescent(X,y,theta,alpha,steps): #梯度下降
m = np.size(y)
J_list = np.zeros([steps,1])
for i in range(0,steps):
h = np.dot(X,theta)
temp = np.zeros([2,1])
for j in range(0,m):
temp = temp + (np.dot((h[j] - y[j]).reshape(1,1),X[j,:].reshape(1,2))).reshape(2,1)
theta = theta - (alpha * temp) / m
J_list[i] = CostFunction(X,y,theta)
return theta
data = np.loadtxt('ex1data1.txt',delimiter = ',')
X = data[:,0]
y = data[:,1]
plt.figure()
plt.plot(X,y,'r.')
Len = np.size(X)
X.shape = (Len,1)
y.shape = (Len,1)
X = np.hstack([np.ones([Len,1]),X.reshape(Len,1)]) #增加常数项
theta = np.zeros([2,1]) #初始化系数向量
steps = 1500 #迭代次数
alpha = 0.01 #学习率
theta = GradientDescent(X,y,theta,alpha,steps)
xp = np.array([[1,0],[1,3],[1,7],[1,12],[1,15],[1,25]])
yp = np.dot(xp,theta)
plt.plot(xp[:,1],yp,'g-')
print(theta)
plt.show()
4 多元线性回归
数据集下载地址:百度网盘 请输入提取码 密码:29fv
4.1 数据归一化
def Normalization(X):
range_ = np.max(X) - np.min(X)
return (X - np.min(X)) / range_
4.2 程序总览
from unicodedata import normalize
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
from sklearn import linear_model
def Normalization(X):
range_ = np.max(X) - np.min(X)
return (X - np.min(X)) / range_
data = np.loadtxt('ex1data2.txt',delimiter = ',')
X = data[:,0:2]
y = data[:,2]
X = Normalization(X)
model = linear_model.LinearRegression()
model.fit(X,y)
print(model.intercept_)
print(model.coef_)