背景
路况在地图渲染时候,会针对不同的拥堵情况选择不同颜色。一般来讲,道路拥堵情况分为三个状态,畅通,拥堵,缓行,分别用绿色,黄色,红色来渲染。
我们面临的问题是,已知道路属性以及通行速度,需要对路况状态进行分类。解决方案是依据第三方路况提供的路况状态以及抓取的高德路况状态来训练一个三分类模型。
特征处理
应用的特征如下
feature | description |
---|---|
speed | 路况速度 |
maxspeed | 道路最大速度 |
highway_level | 道路等级,共有17种可能,使用one-hot-encoding |
lanes | 车道数 |
oneway | 是否是单向路,使用one-hot-encoding |
路况状态使用 0-1-2 分别表示缓行-拥堵-畅通
处理好的特征使用**\t**分割的文本处理,最后一列代表路况状态。
模型训练
模型使用TensorFlow 提供的DNN分类器。代码如下
#-*- coding: utf-8 -*-
"""
File Name: traffic_status_classifier.py
Author: ce39906
mail: ce39906@163.com
Created Time: 2018-09-03 19:11:57
"""
import sys
import time
import numpy as np
import tensorflow as tf
FEATURES = [ "speed",
"maxspeed",
"level_1",
"level_2",
"level_3",
"level_4",
"level_5",
"level_6",
"level_7",
"level_8",
"level_9",
"level_10",
"level_11",
"level_12",
"level_13",
"level_14",
"level_15",
"level_16",
"level_17",
"lanes",
"oneway_0",
"oneway_1"]
def usage():
print "python %s ${train_data_file}" % (sys.argv[0])
def read_data(train_data_file):
xy_list = []
with open(train_data_file, 'r') as f:
for line in f:
line = line.strip('\n')
content = line.split('\t')
xy = [int(float(x)) for x in content]
xy_list.append(xy)
# 80% as train data, 20% as test data
train_xy = xy_list[ : int(len(xy_list) * 0.8)]
test_xy = xy_list[int(len(xy_list) * 0.8) : ]
train_x = [x[ : -1] for x in train_xy]
train_y = [x[-1] for x in train_xy]
test_x = [x[ : -1] for x in test_xy]
test_y = [x[-1] for x in test_xy]
return train_x, train_y, test_x, test_y
def list_2_tf_dataset(train_x, train_y, test_x, test_y):
train_x = np.array(train_x)
train_y_dataset = np.array(train_y)
test_x = np.array(test_x)
test_y_dataset = np.array(test_y)
train_x_cols = []
for col in train_x.T:
train_x_cols.append(col)
train_x_dataset = {}
for i in range(len(FEATURES)):
train_x_dataset[FEATURES[i]] = train_x_cols[i]
test_x_cols = []
for col in test_x.T:
test_x_cols.append(col)
test_x_dataset = {}
for i in range(len(FEATURES)):
test_x_dataset[FEATURES[i]] = test_x_cols[i]
return train_x_dataset, train_y_dataset, test_x_dataset, test_y_dataset
def train_input_fn(features, labels, batch_size):
dataset = tf.data.Dataset.from_tensor_slices((features, labels))
# Shuffle, repeat, and batch the examples.
dataset = dataset.shuffle(1000).repeat().batch(batch_size)
return dataset
def eval_input_fn(features, labels, batch_size):
if labels is None:
inputs = features
else:
inputs = (features, labels)
dataset = tf.data.Dataset.from_tensor_slices(inputs)
# batch the example
dataset = dataset.batch(batch_size)
return dataset
def main():
if len(sys.argv) != 2:
usage()
sys.exit()
batch_size = 100
steps = 10000
train_data_file = sys.argv[1]
# adapt to tensorflow format
train_x_list, train_y_list, test_x_list, test_y_list = read_data(train_data_file)
train_x, train_y, test_x, test_y = \
list_2_tf_dataset(train_x_list, train_y_list, test_x_list, test_y_list)
feature_columns = []
for key in train_x.keys():
feature_columns.append(tf.feature_column.numeric_column(key = key))
start_time = time.time()
classifier = tf.estimator.DNNClassifier(
feature_columns = feature_columns,
hidden_units = [10, 10],
n_classes = 3,
model_dir = './saved_model')
# train the model
classifier.train(
input_fn = lambda:train_input_fn(train_x, train_y, batch_size),
steps = steps)
end_time = time.time()
print 'Train DNN Classifier cost %fs.' %(end_time - start_time)
# evaluate the model
eval_result = classifier.evaluate(
input_fn = lambda:eval_input_fn(test_x, test_y, batch_size))
print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))
# begin to saved the model
feature_spec = {'speed' : tf.FixedLenFeature([], tf.int64),
'maxspeed' : tf.FixedLenFeature([], tf.int64),
'level_1' : tf.FixedLenFeature([], tf.int64),
'level_2' : tf.FixedLenFeature([], tf.int64),
'level_3' : tf.FixedLenFeature([], tf.int64),
'level_4' : tf.FixedLenFeature([], tf.int64),
'level_5' : tf.FixedLenFeature([], tf.int64),
'level_6' : tf.FixedLenFeature([], tf.int64),
'level_7' : tf.FixedLenFeature([], tf.int64),
'level_8' : tf.FixedLenFeature([], tf.int64),
'level_9' : tf.FixedLenFeature([], tf.int64),
'level_10' : tf.FixedLenFeature([], tf.int64),
'level_11' : tf.FixedLenFeature([], tf.int64),
'level_12' : tf.FixedLenFeature([], tf.int64),
'level_13' : tf.FixedLenFeature([], tf.int64),
'level_14' : tf.FixedLenFeature([], tf.int64),
'level_15' : tf.FixedLenFeature([], tf.int64),
'level_16' : tf.FixedLenFeature([], tf.int64),
'level_17' : tf.FixedLenFeature([], tf.int64),
'lanes' : tf.FixedLenFeature([], tf.int64),
'oneway_0' : tf.FixedLenFeature([], tf.int64),
'oneway_1' : tf.FixedLenFeature([], tf.int64)}
def serving_input_receiver_fn():
serialized_tf_example = tf.placeholder(
dtype = tf.string,
shape = None,
name = "input_example_tensor")
receiver_tensors = {'inputs' : serialized_tf_example}
features = tf.parse_example(serialized_tf_example, feature_spec)
return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
saved_model_dir = classifier.export_savedmodel(
'.', serving_input_receiver_fn = serving_input_receiver_fn)
print saved_model_dir
if __name__ == '__main__':
main()
输出如下
Python应用保存的模型
代码如下
#-*- coding: utf-8 -*-
"""
File Name: apply_saved_model.py
Author: ce39906
mail: ce39906@163.com
Created Time: 2018-09-04 18:48:37
"""
import sys
import tensorflow as tf
import numpy as np
def main():
saved_model_dir = sys.argv[1]
with tf.Session() as sess:
tf.saved_model.loader.load(
sess, [tf.saved_model.tag_constants.SERVING], saved_model_dir)
predictor = tf.contrib.predictor.from_saved_model(saved_model_dir)
features = tf.train.Features(feature =
{'speed' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [9])),
'maxspeed' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [90])),
'level_1' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [0])),
'level_2' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [0])),
'level_3' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [1])),
'level_4' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [0])),
'level_5' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [0])),
'level_6' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [0])),
'level_7' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [0])),
'level_8' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [0])),
'level_9' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [0])),
'level_10' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [0])),
'level_11' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [0])),
'level_12' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [0])),
'level_13' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [0])),
'level_14' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [0])),
'level_15' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [0])),
'level_16' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [0])),
'level_17' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [0])),
'lanes' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [3])),
'oneway_0' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [0])),
'oneway_1' : tf.train.Feature(int64_list =
tf.train.Int64List(value = [1]))})
model_input = tf.train.Example(features = features)
model_input = model_input.SerializeToString()
output_dict = predictor({"inputs" : [model_input]})
classes_list = output_dict['classes']
scores_list = output_dict['scores']
for scores, classes in zip(scores_list, classes_list):
scores = scores.tolist()
classes = classes.tolist()
max_score = max(scores)
max_idx = scores.index(max_score)
print "Predicted traffic status is %s" % classes[max_idx]
if __name__ == '__main__':
main()
输出如下
C++应用保存的模型
TODO