python 机器学习之（影厅观影人数的预测）多参数线性回归的梯度下降算法

最新推荐文章于 2023-10-21 19:00:00 发布

瑟瑟发抖小菜鸟

最新推荐文章于 2023-10-21 19:00:00 发布

阅读量2.6k

点赞数 5

本文链接：https://blog.csdn.net/qq_41638632/article/details/98326669

版权

影响影厅观影人数的因素可能有很多种，本次提取三种（影厅的大小，电影口碑，影院排片）来预测影厅观影人数的多少

import pandas as pd #导入pandas库
import numpy as np #导入numpy库
import matplotlib.pyplot as plt #导入matplotlib库
df = pd.read_csv('d:/3_film.csv') #打开文件
df.insert(1,'one',1) #插入新的一列

#插入新的一列是为了让矩阵能够顺利的计算，因为矩阵有截距θ0
df.head() #查看前五条信息

from sklearn.model_selection import train_test_split #引入sklearn的数据拆分的库
cols = df.shape[1] #获取csv的总的列数
X = df.iloc[:,1:cols] #获取表格后4列即：影厅的大小，电影口碑，影院排片
Y = df.iloc[:,0:1] #获取第一列即: 影厅的人数
x = np.array(X.values) #转换为矩阵
y = np.array(Y.values) #转换为矩阵
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.25) #打散数据

#计算成本函数 hθ(x) = 1/2m*∑(x*θ-y)^2
def computeCost(X, y, theta):
#(x*θ-y)^2
inner = np.power(((X * theta.T) - y), 2)
#1/2m*∑(x*θ-y)^2
return np.sum(inner) / (2 * len(X))
#梯度下降函数 x,y 输入输出，theta θ值，alpha学习率，iters迭代次数
#可以说梯度下降损失函数在你规定的迭代次数中越小说明越准确
#梯度下降损失函数 = θj (j为下标)= α*(1/m)*∑(h(x^j)-y^j)x^j 这里的j是上标不是平方 j从0-m
def gradientDescent(X,y,theta,alpha,iters):
#构建一个零值矩阵,用来存储更新后的θ
temp = np.matrix(np.zeros(theta.shape))
#判断参数有几个
parameters = int(theta.ravel().shape[1])
#构建iters个0值矩阵
cost = np.zeros(iters)
#迭代iters次
for i in range(iters):
#找出损失值矩阵
error = (X * theta.T)-y
#每个参数一次计算他们的θ值
for j in range(parameters):
#多变量线性表达式hθ（x） = θ0+x1*θ1+x2*θ2+.....+θn*xn
term = np.multiply(error,X[:,j])
#α*(1/m)*∑((error)*x)
temp[0,j] = theta[0,j]-((alpha/len(X))*np.sum(term))
#返回给theta
theta = temp
#计算成本函数并返回
cost[i] = computeCost(X,y,theta)
return theta,cost

alpha = 0.000001 #学习率
iters = 100 #迭代次数
theta = np.matrix(np.array([0,0,0,0])) #参数数目的数组 matrix 创建的矩阵可以直接用 * 进行矩阵的相乘
g,cost = gradientDescent(x,y,theta,alpha,iters)

#绘制曲线图
plt.figure(figsize = (10,6)) #设置图片尺寸
t = np.arange(len(x_test)) #创建t变量长度为x_test
#绘制y_test估计值曲线
plt.plot(t,y_test,'r',linewidth = 2,label='test')
#绘制y_per测量值曲线
plt.plot(t,y_pre,'g',linewidth = 2,label='predict')
plt.legend()
plt.show()