#coding:utf8
import tensorflow as tf
from sklearn import linear_model
from sklearn import preprocessing
import numpy as np
def read_data(file_queue):
'''
the function is to get features and label (即样本特征和样本的标签)
数据来源是csv的文件,采用tensorflow 自带的对csv文件的处理方式
:param file_queue:
:return: features,label
'''
# 读取的时候需要跳过第一行
reader = tf.TextLineReader(skip_header_lines=1)
key, value = reader.read(file_queue)
# 对于数据源中空的值设置默认值
record_defaults = [[''], [''], [''], [''], [0.], [0.], [0.], [0.], [''],[0], [''], [0.], [''], [''], [0]]
# 定义decoder,每次读取的执行都从文件中读取一行。然后,decode_csv 操作将结果解析为张量列表
province, city, address, postCode, longitude,latitude, price, buildingTypeId, buildingTypeName, tradeTypeId, tradeTypeName, expectedDealPrice, listingDate, delislingDate, daysOnMarket = tf.decode_csv(value, record_defaults)
#对非数值数据进行编码:buildingTypeName
preprocess_buildingTypeName_op = tf.case({
tf.equal(buildingTypeName, tf.constant('Residential')): lambda: tf.constant(0.00),
tf.equal(buildingTypeName, tf.constant('Condo')): lambda: tf.constant(1.00),
tf.equal(buildingTypeName, tf.constant('Mobile Home')): lambda: tf.constant(2.00),
tf.equal(buildingTypeName, tf.constant('No Building')): lambda: tf.constant(3.00),
tf.equal(buildingTypeName, tf.constant('Row / Townhouse')): lambda: tf.constant(4.00),
tf.equal(buildingTypeName, tf.constant('Duplex')): lambda: tf.constant(5.00),
tf.equal(buildingTypeName, tf.constant('Manufactured Home')): lambda: tf.constant(6.00),
tf.equal(buildingTypeName, tf.constant('Commercial')): lambda: tf.constant(7.00),
tf.equal(buildingTypeName, tf.constant('Other')): lambda: tf.constant(8.00),
}, lambda: tf.constant(-1.00), exclusive=True)
# 对tradeTypeName 进行编码 Sale,Lease
preprocess_tradeTypeName_op = tf.case({
tf.equal(tradeTypeName, tf.constant('Sale')): lambda: tf.constant(0.00),
tf.equal(tradeTypeName, tf.constant('Lease')): lambda: tf.constant(1.00),
}, lambda: tf.constant(-1.00), exclusive=True)
features = tf.stack([latitude,longitude,price, preprocess_buildingTypeName_op, preprocess_tradeTypeName_op,expectedDealPrice])
return features, daysOnMarket
def create_pipeline(filename,batch_size,num_epochs=None):
'''
the function is to get every batch example and label
此处使用的是tf.train.batch,即顺序获取,非随机获取,随机获取采用的方法是:tf.train.shuffle_batch
:param filename:
:param batch_size:
:param num_epochs:
:return:example_batch,label_batch
'''
file_queue = tf.train.string_input_producer([filename],num_epochs=num_epochs)
# example,label 样本和样本标签,batch_size 返回一个样本batch样本集的样本个数
example,dayOnMarket = read_data(file_queue)
# 出队后队列至少剩下的数据个数,小于capacity(队列的长度)否则会报错,
min_after_dequeue = 1000
#队列的长度
capacity = min_after_dequeue+batch_size
# 顺序获取每一批数据
example_batch,daysOnMarket_batch= tf.train.batch([example,dayOnMarket],batch_size=batch_size,capacity=capacity)#顺序读取
return example_batch,daysOnMarket_batch
def train(batch_size, feature_num,learn_rate,filename):
'''
the function is to train to get w and b
:param batch_size: 批量大小
:param feature_num: 特征个数
:param learn_rate: 学习率
:param filename:csv文件名称
:return: w,b
'''
# 预处理输入的样本和标签,后面用获取的数据进行喂养
x_data = tf.placeholder(tf.float32, [batch_size, feature_num])
y_data = tf.placeholder(tf.float32, [batch_size])
# 创建参数 w ,b
w = tf.Variable(tf.random_uniform((feature_num, 1), -1.0, 1.0))
b = tf.Variable(tf.random_uniform((1, 1), -1.0, 1.0))
# 定义预测的y
y = tf.add(tf.matmul(x_data, w), b)
#定义损失函数
loss = tf.reduce_mean(tf.square(y - y_data)) / 2
#定义优化器,这里采用梯度下降的方法
optimizer = tf.train.GradientDescentOptimizer(learn_rate)
# 训练
train = optimizer.minimize(loss)
# 获取 样本和标签
example_batch, daysOnMarket_batch = create_pipeline(filename, batch_size)
# 初始化全局和局部变量
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
print('.........................>>>>开始会话')
# 创建会话,采用上下文管理器的方式,无需手动关闭会话
with tf.Session() as sess:
sess.run(init_op)
# 创建一个队列
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for step in range(100):
#获取正真的样本和标签
example, label = sess.run([example_batch, daysOnMarket_batch])
print('第%d批数据'%(step))
print(example, label)
print('.......这一批数据的直接参数')
reg = linear_model.LinearRegression()
reg.fit(example, label)
print("Coefficients of sklearn: W=%s, b=%f" % (reg.coef_, reg.intercept_))
# 数据归一化处理
scaler = preprocessing.StandardScaler().fit(example)
print(scaler.mean_, scaler.scale_)
x_data_standard = scaler.transform(example)
sess.run(train, feed_dict={x_data: x_data_standard, y_data: label})
# 每十步获取一次w和b
if step % 10 == 0:
print('当前w值和b值')
print(sess.run(w, feed_dict={x_data: x_data_standard, y_data: label}),
sess.run(b, feed_dict={x_data: x_data_standard, y_data: label}))
print('。。。。。。。》》》训练后得到w和b')
theta = sess.run(w).flatten()
intercept = sess.run(b).flatten()
print('W:%s' % theta)
print('b:%f' % intercept)
coord.request_stop()
coord.join(threads)
return theta, intercept
def predict(data, theta,intercept, feature_num):
'''
the function is to predict label(daysOnMarket)
:param data: 待预测数据
:param theta: 训练得到的参数
:param intercept: 截距
:param feature_num: 特征个数(自变量个数)
:return: result(label:预测结果)
'''
theta1 = tf.placeholder(tf.float32, [feature_num, 1])
intercept1 = tf.placeholder(tf.float32, [1, 1])
x_data = tf.placeholder(tf.float32, [1, feature_num])
y = tf.add(tf.matmul(x_data, theta1), intercept1)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
result = sess.run(y, feed_dict={x_data: data, theta1: theta, intercept1: intercept})
print(result)
return result
def data_type_conversion(data,theta,intercept,feature_num):
'''
the function is to do data_type_conversion(数据类型和形状转换)
:param data:
:param theta:
:param intercept:
:return:
'''
real_data1 = data.astype(np.float32)
real_data2 = data.reshape(1,feature_num)
theta_tra = theta.astype(np.float32)
theta_real = theta.reshape(feature_num, 1)
intercept_tran = intercept.astype(np.float32)
intercept_real = intercept.reshape(1, 1)
return real_data2,theta_real,intercept_real
if __name__ == '__main__':
input_longitude =int(input('请输入经度'))
input_latitude = int(input('请输入纬度'))
input_price = int(input('请输入价格'))
input_buildingtype = input('请输入房源类型名称:只有9种类型:Residential:0 ,Condo:1 Mobile Home:3,No Building:4 , Row / Townhouse:5 ,Duplex:6 ,Manufactured Home:7 ,Commercial:8 ,Other:9')
input_tradetype = input('请输入交易形式:只有两种Sale:0,Lease:1')
input_expected_deal_price = int(input('请输入期望的交易价格'))
data = np.array([input_longitude,input_latitude,input_price,input_buildingtype,input_tradetype,input_expected_deal_price])
theta, intercept = train(10, 6, 0.3, 'house_info.csv')
data_real, theta_real, intercept_real = data_type_conversion(data, theta, intercept,6)
daysOnmarket = predict(data_real, theta_real, intercept_real, 6)
print('预测的天数:%d'%int(daysOnmarket))
tensorflow基于csv数据集实现多元线性回归并预测
最新推荐文章于 2023-09-11 15:33:22 发布