梯度下降--多元线性回归
import numpy as np from numpy import genfromtxt import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D
# 读入数据 data = genfromtxt(r'data.csv',delimiter = ',') print(data)
数据的格式:
[[100. 4. 9.3] [ 50. 3. 4.8] [100. 4. 8.9] [100. 2. 6.5] [ 50. 2. 4.2] [ 80. 2. 6.2] [ 75. 3. 7.4] [ 65. 4. 6. ] [ 90. 3. 7.6] [ 90. 2. 6.1]]
# 切分数据 x_data = data[:,:-1] y_data = data[:,-1] print(x_data,y_data)
# 学习率 learning rate lr = 0.0001 #参数 theta0 = 0 theta1 = 1 theta2 = 2 # 最大迭代次数 epochas = 1000 # 最小二乘法 损失函数的的计算 def compute_error(theta0,theta1,theta2,x_data,y_data): totalError = 0 for i in range(0,len(x_data)): totalError += (y_data[i] - (theta1 * x_data[i,0] + theta2 * x_data[i,1] +theta0)) ** 2 return totalError / float(len(x_data)) def graient_descent_runner(x_data , y_data,theta0,theta1,theta2,lr,epochas): # 计算总数据量 m = float(len(x_data)) #循环迭代次数 for i in range(epochas): theta0_grad = 0 theta1_grad = 0 theta2_grad = 0 #计算梯度的总和再求平均 for j in range(0,len(x_data)): theta0_grad += -(1/m) * (y_data[j] - (theta1 * x_data[j,0] + theta2 * x_data[j,1] + theta0)) theta1_grad += -(1/m) * x_data[j,0] * (y_data[j] - (theta1 * x_data[j,0] + theta2 * x_data[j,1] + theta0)) theta2_grad += -(1/m) * x_data[j,1] * (y_data[j] - (theta1 * x_data[j,0] + theta2 * x_data[j,1] + theta0)) # 更新b 和 k theta0 = theta0 - (lr * theta0_grad) theta1 = theta1 - (lr * theta1_grad) theta2 = theta2 - (lr * theta2_grad) return theta0,theta1,theta2
print ("Starting theta0 = {0} ,theta1 = {1} ,theta2 = {2} , error = {3}". format(theta0,theta1,theta2,compute_error(theta0,theta1,theta2,x_data,y_data))) # 首先需要初始化,然后更新参数 print(".......") theta0,theta1,theta2 = graient_descent_runner(x_data,y_data,theta0,theta1,theta2,lr,epochas) print("After {0} iterations theta0 = {1},theta1 = {2} ,theta2 = {3} , error = {4}". format(epochas,theta0,theta1,theta2,compute_error(theta0,theta1,theta2,x_data,y_data)))
ax = plt.figure().add_subplot(111,projection = '3d') ax.scatter(x_data[:,0],x_data[:,1],y_data,c = 'r',marker = 'o',s = 100) x0 = x_data[:,0] x1 = x_data[:,1] #生成网络矩阵 x0 , x1 = np.meshgrid(x0,x1) z = theta0 + x0 * theta1 + x1 * theta2 #画3d图s ax.plot_surface(x0,x1,z) ax.set_xlabel('Miles') ax.set_ylabel('Num of Deliveries') ax.set_zlabel('Time') plt.show()
sklearn多元线性回归(注意,sklearn里面封装的算法不是梯度下降法,所以结果是和梯度下降法是不一样的,sklearn使用的是标准方程法)
import numpy as np from numpy import genfromtxt from sklearn import linear_model import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D # 读取数据 data = genfromtxt('data.csv',delimiter = ',') print(data) #[[100. 4. 9.3] # [ 50. 3. 4.8] # [100. 4. 8.9] # [100. 2. 6.5] # [ 50. 2. 4.2] # [ 80. 2. 6.2] # [ 75. 3. 7.4] # [ 65. 4. 6. ] # [ 90. 3. 7.6] # [ 90. 2. 6.1]] # 切分数据 x_data = data[:,:-1] y_data = data[:,-1] print(x_data) print(y_data) # 创建模型 model = linear_model.LinearRegression() model.fit(x_data,y_data) # 传入多少个参数就是多少的特征,后面的系数也就是多少个 # 系数 print('coefficients:',model.coef_) #截据 print('intercept:',model.intercept_) # 测试 x_test = [[102,4]] predict = model.predict(x_test) print('predict:',predict)
sklearn多项式回归
import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import PolynomialFeatures #生成多项式 from sklearn.linear_model import LinearRegression # 载入数据 data = np.genfromtxt('job.csv',delimiter = ',') x_data = data[1:,1] y_data = data[1:,2] plt.scatter(x_data,y_data) plt.show() # 处理数据成需要的那种维度 x_data = data[1:,1,np.newaxis] #np.newaxis 增加数据的维度 y_data = data[1:,2,np.newaxis] print(x_data) # 创建并拟合模型 model = LinearRegression() model.fit(x_data,y_data) #画图 plt.plot(x_data,y_data,'b.') plt.plot(x_data,model.predict(x_data),'r') plt.show() # 定义多项式回归,degree的值可以调节多项式的特征 # degree=1 时,x_poly特征过后的数据为[[ 1., 1.],[ 1., 2.]] # degree=2 时,x_poly特征过后的数据为[[ 1., 1., 1.],[ 1., 2., 4.]] # 以此类推 ploy_reg = PolynomialFeatures(degree=3) #特征处理 x_poly = ploy_reg.fit_transform(x_data) #定义回归模型 lin_reg = LinearRegression() #训练模型 lin_reg.fit(x_poly,y_data) # 画图 plt.plot(x_data,y_data,'b.') plt.plot(x_data,lin_reg.predict(ploy_reg.fit_transform(x_data)),c='r') plt.title('Truth or Bluff (Polynomial Regression)') plt.xlabel('Position level') plt.ylabel('Salary') plt.show()