线性回归简单介绍:
其中h(x)^hat是预测值(这里打起来有点麻烦,大家懂我的意思就好),线性回归做的事情简单的来说就是找到一个合适的权重向量
使得 h(x)^hat与真实值h(x)尽可能接近(具体过程大家可以百度一下,因为第一次写博客,编辑公式不太熟练,也没什么好的方法,如果有大佬可以指教一下~不胜感激!)
接下来放上代码,并与sklearn中的lr进行比较。
linear_regression_model.py
#!/usr/bin/python
# -*- coding: utf-8 -*
import tensorflow as tf
import numpy as np
class linearRegressionModel:
def __init__(self,x_dimen):
self.x_dimen = x_dimen
self._index_in_epoch = 0
self.constructModel()
self.sess = tf.Session()
self.sess.run(tf.global_variables_initializer())
#权重初始化
def weight_variable(self,shape):
initial = tf.truncated_normal(shape,stddev = 0.1)
return tf.Variable(initial)
#偏置项初始化
def bias_variable(self,shape):
initial = tf.constant(0.1,shape = shape)
return tf.Variable(initial)
#每次选取100个样本,如果选完,重新打乱
def next_batch(self,batch_size):
start = self._index_in_epoch # 数据集的切片起始位置
self._index_in_epoch += batch_size # 数据集的切片末位置
if self._index_in_epoch > self._num_datas: # 如果切片末位置已经超过了数据的总长度
perm = np.arange(self._num_datas)
np.random.shuffle(perm) # 通过shuffle函数打乱perm
self._datas = self._datas[perm] # 将perm作为新数据集的下标
self._labels = self._labels[perm]
start = 0 # 初始化起始值切位位置
self._index_in_epoch = batch_size # 末位置
assert batch_size <= self._num_datas # assert断言函数,如果后面条件不正确,就会警告
end = self._index_in_epoch
return self._datas[start:end],self._labels[start:end]
# 构建简单的网络结构
def constructModel(self):
self.x = tf.placeholder(tf.float32, [None,self.x_dimen]) # 占位符, 后续可以通过feed_dict填充数据
self.y = tf.placeholder(tf.float32,[None,1])
self.w = self.weight_variable([self.x_dimen,1])
self.b = self.bias_variable([1])
self.y_prec = tf.nn.bias_add(tf.matmul(self.x, self.w), self.b)
mse = tf.reduce_mean(tf.squared_difference(self.y_prec, self.y)) # 计算预测值与真实值的均方误差
l2 = tf.reduce_mean(tf.square(self.w)) # l2 正则项
self.loss = mse + 0.15*l2 # 总误差
self.train_step = tf.train.AdamOptimizer(0.1).minimize(self.loss) # 使用Adam优化器梯度下降最小化损失值
# 训练模型
def train(self,x_train,y_train,x_test,y_test):
self._datas = x_train
self._labels = y_train
self._num_datas = x_train.shape[0]
for i in range(5000):
batch = self.next_batch(100)
self.sess.run(self.train_step,feed_dict={self.x:batch[0],self.y:batch[1]}) # 运行训练过程,训练数据通过feed_dict喂入
if i%500 == 0:
train_loss = self.sess.run(self.loss,feed_dict={self.x:batch[0],self.y:batch[1]})
print('step %d,test_loss %f' % (i,train_loss)) # 每500次打印一下结果
def predict_batch(self,arr,batch_size):
for i in range(0,len(arr),batch_size):
yield arr[i:i + batch_size] # yield和return作用基本一样,但yield功能类似于迭代器
# 预测结果
def predict(self, x_predict):
pred_list = []
for x_test_batch in self.predict_batch(x_predict,100):
pred = self.sess.run(self.y_prec, {self.x:x_test_batch})
pred_list.append(pred) # 将每一个结果存入列表
return np.vstack(pred_list)
run.py
#!/usr/bin/python
# -*- coding: utf-8 -*
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from linear_regression_model import linearRegressionModel as lrm
if __name__ == '__main__':
x, y = make_regression(7000) # 构造数据集,其中x.shape = (7000,100), y.shape = (7000,1)
x_train,x_test,y_train, y_test = train_test_split(x, y, test_size=0.5)
y_lrm_train = y_train.reshape(-1, 1) # 重塑,变为一维向量
y_lrm_test = y_test.reshape(-1, 1)
linear = lrm(x.shape[1])
linear.train(x_train, y_lrm_train,x_test,y_lrm_test)
y_predict = linear.predict(x_test)
print("Tensorflow R2: ", r2_score(y_predict.ravel(), y_lrm_test.ravel()))
lr = LinearRegression()
y_predict = lr.fit(x_train, y_train).predict(x_test)
print("Sklearn R2: ", r2_score(y_predict, y_test)) #采用r2_score评分函数
运行结果:
step 0,test_loss 37999.144531
step 500,test_loss 8343.382812
step 1000,test_loss 828.596680
step 1500,test_loss 122.320221
step 2000,test_loss 54.585281
step 2500,test_loss 53.017075
step 3000,test_loss 52.988567
step 3500,test_loss 53.011978
step 4000,test_loss 52.996586
step 4500,test_loss 53.004738
Tensorflow R2: 0.999997521256188
Sklearn R2: 1.0
结果分析:
从中可以看出,用TF构造的一层网络在效果上是不如Sklearn的LR,但是,网络具有可拓展的性质,可以通过构造更深层结合激活函数来使得效果越来越好。
注:本例代码均来自于腾讯实验室,我在其基础上加上注解并稍加修改,方便初学者阅读