多项式回归模型做运费价格预测
数据准备
数据源取系统应付结算单数据,数据需要经过清洗,具体为:
SELECT
b.CODE,
b.TRAVEL_MILES,
b.WEIGHT,
b.VOLUME,
b.PS_NUM
FROM
base_settle b
WHERE
b.TRAVEL_MILES > 0
AND WEIGHT > 0
AND VOLUME > 0
AND PS_NUM > 0
and code like 'AP%';
导包并读取数据源
import csv
import numpy as np
import pandas as pd
import os
source=pd.read_csv('C:/Users/liuhb41/Desktop/回归/settledata1.csv')
source
分割训练参数及结果
#分割X和Y
from sklearn.model_selection import train_test_split
data_train, data_test = train_test_split(source, test_size=0.2, random_state=1234)
设置训练集X和Y
x_train=data_train.iloc[:,2:5]
x_train
将x转换为ndarray
x_train=x_train.values
x_train
设置训练集y
y_train=data_train['HJ_FEE']
y_train
进行数据多项式处理
from sklearn.preprocessing import PolynomialFeatures
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
d=3#多项式维度
rnd=np.random.RandomState(40)#设置随机数种子
x_train_p=PolynomialFeatures(degree=d,interaction_only=False).fit_transform(x_train)#对数据进行多项式处理
模型创建及预测
#创建线性回归模型
LinearR_=LinearRegression()
#模型预测
LinearR_.fit(x_train_p, y_train)
模型测试
#区分测试集X,Y
data_test
x_test=data_test.iloc[:,2:5]
y_test=data_test['HJ_FEE']
#预测
y_test_predict=LinearR_.predict(PolynomialFeatures(degree=d).fit_transform(x_test))
可用score函数查看评分,不断调优
score=LinearR_.score(x_train_p, y_train)
score