1.概述
在使用matplotlib.pyplot画出图形窗口时,通常会有不同的变种,本文根据实际使用经验,特记录以下几种常见的情况。
2. plt.subplot 操作
from __future__ import division
import os
import sys
sys.path.append('../../')
import time
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import itertools
def compare_analysis(trip_time, th, path):
trip_time_idF = list(np.where(trip_time < trip_time_thresh)[0])
trip_time_idF_M = list(np.where(trip_time >=trip_time_thresh)[0])
'''give plot'''
plt.figure(figsize=(23,16))
plt.subplot(211)
plt.scatter(trip_time[trip_time_idF], th[trip_time_idF],alpha=0.5)
#plt.scatter(tripTime[tripTime_idF_M], th[tripTime_idF_M],c='r',marker='*',alpha=0.5)
plt.xlabel('Trip actual time (s)',fontsize=16)
plt.xlim(trip_time.min()-1000, trip_time.max())
plt.ylabel('Hist time with hist speed (s)',fontsize=16)
plt.ylim(th.min()-1000, th.max())
plt.xticks(np.arange(0, 25000, 2500))
#plt.yticks(np.arange(-5, 5, 0.5))
plt.grid(True)
plt.title('Hist2Trip ScatterPlot',fontsize=16)
plt.subplot(212)
plt.scatter(trip_time[trip_time_idF], th[trip_time_idF],alpha=0.5)
plt.scatter(trip_time[trip_time_idF_M], th[trip_time_idF_M],c='r',marker='*',alpha=0.5)
plt.xlabel('Trip actual time (s)',fontsize=16)
plt.ylabel('Hist time with hist speed (s)',fontsize=16)
plt.xticks(np.arange(0, 25000, 2500))
plt.grid(True)
plt.title('Hist2Trip ScatterPlot',fontsize=16)
plt.savefig('{0}/compare.png'.format(path))
""" actual and regression time scatter """
def scatterPlot(rates, th, tr, tp, ta, path):
rates_h = rates[:,0] # hist pass rate,1 or 0
rates_r = rates[:,1] # real pass rate,1 or 0
rates_a = rates[:,2] # actual pass rate,1 or 0
#use_colours = {"T":"green","F":"red"}
colors = ['r','g']
plt.figure(figsize=(22,7))
plt.subplot(131)
plt.scatter(ta, th, c=[colors[int(x)] for x in rates_h], alpha=0.5)
plt.xlabel('Trip actual time (s)',fontsize=16)
plt.ylabel('Hist time with hist speed (s)',fontsize=16)
plt.grid(True)
plt.title('Hist2Trip ScatterPlot',fontsize=16)
plt.subplot(132)
plt.scatter(ta, tr, c=[colors[int(x)] for x in rates_r], alpha=0.5)
plt.xlabel('Trip actual time (s)',fontsize=16)
plt.ylabel('Real time with hist speed (s)',fontsize=16)
plt.grid(True)
plt.title('Real2Trip ScatterPlot',fontsize=16)
plt.subplot(133)
plt.scatter(ta, tp, c=[colors[int(x)] for x in rates_a],alpha=0.5)
plt.xlabel('Trip actual time (s)',fontsize=16)
plt.ylabel('Regre time with Regression coffe (s)',fontsize=16)
plt.grid(True)
plt.title('Regre2Trip ScatterPlot',fontsize=16)
plt.savefig('{0}/timeScatter.png'.format(path))
3.axes操作
def histPlot(th, tr, tp, ta, path):
dth = th - ta
dtr = tr - ta
dtp = tp - ta
fig,axes = plt.subplots(1,3, figsize=(23,8)) #get 1x1 subplots
axes[0].set_xlabel(r'$\Delta$' + "t (s)",fontsize=16) #set the coordinate axis-x label
axes[0].set_ylabel(u'Probability',fontsize=16) #set the coordinate axis-y label
axes[0].set_title(u"Hist2trip time diff distribution",fontsize=16)
axes[0].set_xlim(-3600, 3600)
axes[0].hist(dth,bins=50,density=1,color='r',alpha=0.5,edgecolor='k') #th-tripTime
axes[0].grid(True)
#get 1x1 subplots
axes[1].set_xlabel(r'$\Delta$' + "t (s)",fontsize=16) #set the coordinate axis-x label
axes[1].set_ylabel(u'Probability',fontsize=16) #set the coordinate axis-y label
axes[1].set_title(u"Real2trip time diff distribution",fontsize=16)
axes[1].set_xlim(-3600, 3600)
axes[1].hist(dtr,bins=50,density=1,color='r',alpha=0.5,edgecolor='k') # tr - tripTime
axes[1].grid(True)
axes[2].set_xlabel(r'$\Delta$' + "t (s)",fontsize=16) #set the coordinate axis-x label
axes[2].set_ylabel(u'Probability',fontsize=16) #set the coordinate axis-y label
axes[2].set_title(u"Regre2trip time diff distribution",fontsize=16)
axes[2].set_xlim(-3600, 3600)
axes[2].hist(dtp,bins=50,density=1,color='r',alpha=0.5,edgecolor='k') #tp - tripTime
axes[2].grid(True)
plt.savefig('{0}/timeDiff.png'.format(path))
4.具体应用
4.1 应用1
# -*- coding: utf-8 -*-
from __future__ import division
import os
import sys
sys.path.append('../../')
import time
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import itertools
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge,Lasso,ElasticNet
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import tensorflow as tf
#from sklearn.preprocessing import StandardScaler
""" peak:[06:30,07:30)-[07:30,08:30)-[08:30,09:30)-[16:30,17:30)-[17:30,18:30)-[18:30,19:30)-[19:30,20:30) """
""" from starting point:[0,10)-[10,20)-[20,40)-[40,60)-[60,90)-[90,inf) """
""" traffic state: smooth 3 ; slow 2 ; congestion 1; heavy congestion 0 """
link_scena0 = 6 # time from starting point
link_scena1 = 5 # link road class
link_scena2 = 8 # link time phase(peak or not)
link_scena3 = 2 # link state (congestion 0 or not 1)
node_scena0 = 1 # node type
node_scena1 = 4 # node turn type
node_scena2 = 8 # node time phase
p_am_begin_0 = 195 # Am rush hour begin -period
p_am_begin_1 = 225 # Am rush hour begin -period
p_am_begin_2 = 255 # Am rush hour begin -period
p_am_end = 285 # Am rusuh hour end -period
p_pm_begin_0 = 495 # Pm rush hour begin -period
p_pm_begin_1 = 525 # Pm rush hour begin -period
p_pm_begin_2 = 555 # Pm rush hour begin -period
p_pm_begin_3 = 585 # Pm rush hour begin -period
p_pm_end = 615 # Pm rush hour end -period
t_begin_lower = "00:00:00"
t_begin_upper = "23:59:59"
"""trip time threshold when necessary"""
trip_time_thresh = 9000
scenario_num_filter = 100
""" generate map dict for link scenario """
scena_link = []
lc_ls = [0,1,3] # highway roads, urban express roads, main roads
fs_ls = [0,1,2] # 0~10min, 10~20min, 20~40min
for fs in range(link_scena0): # time from starting point
for lc in range(link_scena1): # link road class
for lp in range(link_scena2): # link time phase
for ls in range(link_scena3): # link state
if fs in fs_ls and lc in lc_ls:
if (fs,lc,lp,ls) not in scena_link:
scena_link.append((fs,lc,lp,ls))
else:
if (fs,lc,lp,1) not in scena_link:
scena_link.append((fs,lc,lp,1))
num_link_scena = len(scena_link)
links_dict = dict(zip(scena_link,range(num_link_scena)))
"""generate map dict for node scenario """
scena_node = []
for ntt in range(node_scena1):
for ntp in range(node_scena2):
scena_node.append((node_scena0,ntt,ntp))
num_node_scena = len(scena_node)
nodes_dict = dict(zip(scena_node,range(num_node_scena)))
""" get a generator for data stream """
def data_stream_trip(data_stream):
for data in data_stream:
yield data
""" load *.trip.etalr file and get trip time file for learning"""
def load_data_eta(trips_file, is_filter = False): #get data
filter_thresh = trip_time_thresh # trip time threshold
# trips_file = ["310000.20190312.trip.etalr"]
filterNum = 0
tripData = []
tripData_ = []
for trip_file in trips_file:
print('{} read file {}'.format(time.strftime('%Y-%m-%d %X'), trip_file))
trip_fd = open(trip_file, 'r')
ik = 0
for line in trip_fd:
line = line.strip().split('|')
if len(line) < 20: #20
continue
tmc_index,time_hist,time_real,\
from_start,time_phase,time_period,link_class,link_state,\
node_phase,node_type,node_turntype,node_time,\
begin_ts,end_ts,period,eta_act,th_,tr_,length,trip_id = line
if is_filter:
if float(eta_act) > filter_thresh:
filterNum +=1
continue
if ik % 100000 == 0:
print('{} read lines {}'.format(time.strftime('%Y-%m-%d %X'), ik))
ik += 1
if begin_ts < t_begin_lower or begin_ts >= t_begin_upper:
continue
tripData.append((
[float(t)*1.0 for t in time_hist.split(',')],
[float(t)*1.0 for t in time_real.split(',')],
[int(t) for t in from_start.split(',')],
[int(t) for t in time_phase.split(',')],
#[int(t) for t in time_period.split(',')],
[int(t) for t in link_class.split(',')],
[int(t) for t in link_state.split(',')],
[int(t) for t in node_phase.split(',') if t != ''],
[int(t) for t in node_type.split(',') if t != ''],
[int(t) for t in node_turntype.split(',') if t != ''],
[float(t) for t in node_time.split(',') if t != ''],
begin_ts,end_ts,int(period),float(eta_act),float(length),trip_id))
trip_fd.close()
"""combine road class for lower level and combine link state"""
for c, trip in enumerate(tripData):
time_hist,time_real,\
from_start,time_phase,link_class,link_state,\
node_phase,node_type,node_turntype,node_time,\
begin_ts,end_ts,period,eta_act,length,trip_id = trip
for ilc,lc in enumerate(link_class):
if lc == 6: # urban express roads
link_class[ilc] = 1
elif lc in [1,2,3]: # national highway,provincial highways, county roads,
link_class[ilc] = 2
elif lc in [7]: # main roads
link_class[ilc] = 3
elif lc in [4,5,8,9,10]:
# township highways,county rural internal roads,secondary roads,ordinary roads,small roads
link_class[ilc] = 4
# link_class[ilc] = lc-5 if lc>=8 else lc
if link_class[ilc] in lc_ls and from_start[ilc] in fs_ls:
if link_state[ilc] in [0,1,2]:
link_state[ilc] = 0
else:
link_state[ilc] = 1
else:
link_state[ilc] = 1
tripData_.append((time_hist,time_real,\
from_start,time_phase,link_class,link_state,\
node_phase,node_type,node_turntype,node_time,\
begin_ts,end_ts,period,eta_act,length,trip_id))
fmt = '{} filtered trip number is:{},rate is:{:.2f} owing to trip time'
print(fmt.format(time.strftime('%Y-%m-%d %X'),filterNum,filterNum/(len(tripData)+filterNum)))
return tripData_
""" convert trip time file to training file"""
def data_trip_processing(data, reg_flag = 2, is_jointReg = True):
tps = len(data)
t_links_ht = np.zeros((tps,num_link_scena))
t_links_rt = np.zeros((tps,num_link_scena))
t_nodes = np.zeros((tps,num_node_scena))
tripInfo = np.zeros((tps,5))
tripId = []
skip_links = 0
skip_nodes = 0
for i, trip in enumerate(data):
link_ht, link_rt, from_start, link_phase,link_class,link_state,\
node_phase, node_type, node_turntype, node_time, \
begin_ts, end_ts, trip_period, trip_time, trip_len, trip_id = trip
if len(from_start) != len(link_class) or len(link_class) != len(link_phase) or len(link_phase)!=len(link_state):
skip_links +=1
continue
links_map = []
for fs,lc,lp,ls in zip(from_start,link_class,link_phase,link_state):
links_map.append(links_dict[(fs,lc,lp,ls)])
for j, link in enumerate(links_map):
t_links_ht[i,link] +=link_ht[j]
t_links_rt[i,link] +=link_rt[j]
if node_type:
if len(node_type)!= len(node_turntype) or len(node_turntype)!=len(node_phase):
skip_nodes +=1
continue
nodes_map = []
for nt,ntt,ntp in zip(node_type,node_turntype,node_phase):
nodes_map.append(nodes_dict[(nt,ntt,ntp)])
for k, node in enumerate(nodes_map):
t_nodes[i,node] +=node_time[k]
tripInfo[i][0] = trip_period # trip period
tripInfo[i][1] = trip_len # tripLength
tripInfo[i][2] = len(link_phase) # linksNum for a trip
tripInfo[i][3] = len(node_phase) # nodesNum for a trip
tripInfo[i][4] = trip_time # tripTime
tripId.append(trip_id)
ta = tripInfo[:,4] # trip actual time
if is_jointReg == True: # weighted th & tr respectively
t_links = np.concatenate((t_links_ht, t_links_rt), axis=1)
Xdata = np.concatenate((t_links,t_nodes), axis=1)
Ydata = ta
else:
if reg_flag == 1: # only th
t_links = t_links_ht
Xdata = np.concatenate((t_links,t_nodes), axis=1)
Ydata = ta
elif reg_flag == 2: # only tr
t_links = t_links_rt
Xdata = np.concatenate((t_links,t_nodes), axis=1)
Ydata = ta
else: # weighted th & tr (k,1-k)
t_links = t_links_ht - t_links_rt # t_h - t_r
Xdata = np.concatenate((t_links,t_nodes), axis=1)
t_links_rt_x = np.sum(t_links_rt,axis=1)
Ydata = ta - t_links_rt_x
t_ht = np.sum(t_links_ht, axis=1)
t_rt = np.sum(t_links_rt, axis=1)
t_nt = np.sum(t_nodes, axis=1)
th = t_ht + t_nt # trip time with hist
tr = t_rt + t_nt # trip time with real
df_trip = pd.DataFrame(tripInfo[:,:4])
df_trip[4] = tripId #trip id
print('{} filtered trip number is:{} owing to links'.format(time.strftime('%Y-%m-%d %X'),skip_links))
print('{} filtered trip number is:{} owing to nodes'.format(time.strftime('%Y-%m-%d %X'),skip_nodes))
return Xdata,Ydata,t_ht,t_rt,t_nt,th,tr,ta,df_trip
""" ordinary least square(ols) linear/ridge regression model """
""" score:coefficient of determination R^2, the value range [0,1] """
""" the stronger the interpretation ability of the model """
def test_regression_ols(data_iter, path, is_jointReg = True, is_ridge = True):
""" models for comparison"""
# RR = Ridge(alpha=0.5,fit_intercept = False)
LR = LinearRegression(fit_intercept = False)
# LA = Lasso(alpha=0.0001,precompute=True,max_iter=1000,
# positive=True, random_state=9999, selection='random',fit_intercept = False)
LA = ElasticNet(alpha=0.001, l1_ratio=0.9, fit_intercept=False, precompute=True,
max_iter=1000,tol=0.01,random_state=9999,positive=True)
"""prepare data"""
train_trip = [item for item in data_iter] # invert iterator to list
tripDataTrain = load_data_eta(train_trip)
outPut = data_trip_processing(data = tripDataTrain, is_jointReg = is_jointReg)
Xdata, Ydata = outPut[0], outPut[1]
X_train, X_test, y_train, y_test = train_test_split(Xdata, Ydata, test_size = 0.01, random_state = 0)
if is_ridge:
# RR.fit(X_train, y_train)
LA.fit(X_train, y_train)
print("%s Coefficients:%s, intercept:%.4f" % (time.strftime('%Y-%m-%d %X'),LA.coef_, LA.intercept_))
print("{} RMSE:{:.4f}".format(time.strftime('%Y-%m-%d %X'),np.sqrt(np.mean((LA.predict(X_test) - y_test) ** 2))))
print("{} score:{:.4f}".format(time.strftime('%Y-%m-%d %X'),LA.score(X_test, y_test)))
coeff = np.around(LA.coef_, decimals=4)
interc = np.around(LA.intercept_, decimals=4)
rmse = np.sqrt(np.mean((LA.predict(X_test) - y_test) ** 2))
else:
LR.fit(X_train, y_train)
print("%s Coefficients:%s, intercept:%.4f" % (time.strftime('%Y-%m-%d %X'),LR.coef_, LR.intercept_))
print("{} RMSE:{:.4f}".format(time.strftime('%Y-%m-%d %X'),np.sqrt(np.mean((LR.predict(X_test) - y_test) ** 2))))
print("{} score:{:.4f}".format(time.strftime('%Y-%m-%d %X'),LR.score(X_test, y_test)))
coeff = np.around(LR.coef_, decimals=4)
interc = np.around(LR.intercept_, decimals=4)
rmse = np.sqrt(np.mean((LR.predict(X_test) - y_test) ** 2))
link_states, node_states = num_scenario(tripDataTrain)
scenarioPlot(link_states, node_states, path)
save_params(link_states, node_states, coeff, interc, path, is_jointReg = is_jointReg)
np.save(path+"/coeff_interc.npy", np.concatenate((coeff,np.array(interc).reshape(-1,)), axis=0))
return coeff, interc, rmse
""" sgd linear/ridge regression model """
def test_regression_sgd(data_iter, path, partial_fit_num =600, display_step=100, is_jointReg=True):
sgdR = SGDRegressor(max_iter=5000, tol=0.01, epsilon=5, eta0=1e-6,fit_intercept=False,
loss="epsilon_insensitive", penalty="l2")
link_states, node_states = None, None
for ik, train_trip in enumerate(data_iter):
# load training and test data and split for training and validation
print("{} generate training data No.{}".format(time.strftime('%Y-%m-%d %X'),ik))
tripDataTrain = load_data_eta([train_trip])
outPut = data_trip_processing(data = tripDataTrain, is_jointReg = is_jointReg)
Xdata, Ydata = outPut[0], outPut[1]
link_states_, node_states_ = num_scenario(tripDataTrain)
if link_states is None or node_states is None:
link_states, node_states = link_states_, node_states_
else:
link_states[:,0] = link_states[:,0] + link_states_[:,0]
node_states[:,0] = node_states[:,0] + node_states_[:,0]
X_train, X_test, y_train, y_test = train_test_split(Xdata, Ydata, test_size = 0.01, random_state = 0)
print("{} start training and get result No.{}".format(time.strftime('%Y-%m-%d %X'),ik))
# iter some times for each data in data iterator
for n in range(partial_fit_num):
sgdR.partial_fit(X_train, y_train)
if n % display_step ==0:
print('{} training times {}'.format(time.strftime('%Y-%m-%d %X'), n))
print("%s Coefficients:%s, intercept:%.4f" % (time.strftime('%Y-%m-%d %X'),sgdR.coef_, sgdR.intercept_))
print("{} RMSE:{:.4f}".format(time.strftime('%Y-%m-%d %X'),np.sqrt(np.mean((sgdR.predict(X_test) - y_test) ** 2))))
print("{} score:{:.4f}".format(time.strftime('%Y-%m-%d %X'),sgdR.score(X_test, y_test)))
outPut, Xdata, Ydata, X_train, y_train = None, None, None, None, None
coeff = np.around(sgdR.coef_, decimals=4)
interc = np.around(sgdR.intercept_[0], decimals=4)
rmse = np.sqrt(np.mean((sgdR.predict(X_test) - y_test) ** 2))
scenarioPlot(link_states, node_states, path)
save_params(link_states, node_states, coeff, interc, path, is_jointReg = is_jointReg)
np.save(path+"/coeff_interc.npy", np.concatenate((coeff,np.array(interc).reshape(-1,)), axis=0))
return coeff, interc, rmse
""" testing different lambda on prediction performance"""
def test_ridge_lmbda(*data):
x_train, x_test, y_train, y_test = data
lmbdas = [0, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50, 100, 200, 500, 1000]
scores = []
for i, lmbda in enumerate(lmbdas):
RR = Ridge(alpha = lmbda)
RR.fit(x_train, y_train)
scores.append(RR.score(x_test, y_test))
return lmbdas, scores
""" get scenario number for different scenario"""
def num_scenario(data):
tripData = data
link_dict = {}
node_dict = {}
for i, line in enumerate(tripData):
link_ht, link_rt, from_start, link_phase, link_class, link_state,\
node_phase, node_type, node_turntype, node_time,\
begin_ts, end_ts, trip_period, trip_time, trip_len, trip_id= line
for lkey in zip(from_start, link_class, link_phase, link_state):
if not lkey in link_dict:
link_dict[lkey] = 1
else:
link_dict[lkey] += 1
for nkey in zip(node_type, node_turntype, node_phase):
if not nkey in node_dict:
node_dict[nkey] = 1
else:
node_dict[nkey] += 1
link_states = np.zeros((num_link_scena,5), dtype = np.int32)
node_states = np.zeros((num_node_scena,4), dtype = np.int32)
link_states[:,1:] = np.array(scena_link)
node_states[:,1:] = np.array(scena_node)
for lkey in link_dict:
lk = links_dict[lkey] #row number
link_states[lk,0] = link_dict[lkey]
for nkey in node_dict:
nk = nodes_dict[nkey] #row number
node_states[nk,0] = node_dict[nkey]
return link_states, node_states
""" trip time with regression prediction"""
def time_reg(Xdata, coeff, interc):
tp = np.around(np.dot(Xdata,coeff) + interc, decimals=2)
return tp
""" evaluate eta to get csv file"""
def eta_evaluate_csv(t_hist_link, t_real_link, t_node, t_hist, t_tmc, t_reg, t_act, df_trip, path):
rate_eta = np.zeros((t_hist_link.size,3))
time_ = np.concatenate((t_act.reshape(-1,1), t_hist.reshape(-1,1)), axis=1)
time_ = np.concatenate((time_, t_tmc.reshape(-1,1)), axis=1)
time_ = np.concatenate((time_, t_reg.reshape(-1,1)), axis=1)
for i, act in enumerate(t_act):
rate = get_pass_rate(tuple(time_[i]))
rate_eta[i,0] = rate[0]
rate_eta[i,1] = rate[1]
rate_eta[i,2] = rate[2]
df_time = df_trip # tripPeriod,triplength,linksNum,nodesNum,tripId
df_time[5] = t_hist_link # static_link_time
df_time[6] = t_real_link # tmc_link_time
df_time[7] = t_node # node_time
df_time[8] = t_hist # static_time
df_time[9] = t_tmc # tmc_time
df_time[10] = t_reg # reg_time
df_time[11] = t_act # actual_time
df_time[12] = np.abs(t_act - t_reg) #absoluteDiff
df_time[13] = np.around(np.abs(t_act - t_reg)/t_act *100, decimals=2) #relativeDiff%
df_time[14] = rate_eta[:,0] #static ok(0/1)
df_time[15] = rate_eta[:,1] #tmc ok(0/1)
df_time[16] = rate_eta[:,2] #reg ok(0/1)
static_rate = round(np.mean(rate_eta[:,0]),4)
tmc_rate = round(np.mean(rate_eta[:,1]),4)
reg_rate = round(np.mean(rate_eta[:,2]),4)
"""" Am ok rate"""
reg_rate_am_0 = df_time[(df_time.iloc[:,0] >=p_am_begin_0)&(df_time.iloc[:,0] <p_am_begin_1)].iloc[:,16].mean() # [06:30,07:30)
reg_rate_am_1 = df_time[(df_time.iloc[:,0] >=p_am_begin_1)&(df_time.iloc[:,0] <p_am_begin_2)].iloc[:,16].mean() # [07:30-08:30)
reg_rate_am_2 = df_time[(df_time.iloc[:,0] >=p_am_begin_2)&(df_time.iloc[:,0] <p_am_end)].iloc[:,16].mean() # [08:30-09:30)
reg_rate_am = df_time[(df_time.iloc[:,0] >=p_am_begin_0)&(df_time.iloc[:,0] <p_am_end)].iloc[:,16].mean() # 06:30-09:30
"""" Pm/off-peak ok rate"""
reg_rate_pm_0 = df_time[(df_time.iloc[:,0] >=p_pm_begin_0)&(df_time.iloc[:,0] <p_pm_begin_1)].iloc[:,16].mean() # [16:30,17:30)
reg_rate_pm_1 = df_time[(df_time.iloc[:,0] >=p_pm_begin_1)&(df_time.iloc[:,0] <p_pm_begin_2)].iloc[:,16].mean() # [17:30-18:30)
reg_rate_pm_2 = df_time[(df_time.iloc[:,0] >=p_pm_begin_2)&(df_time.iloc[:,0] <p_pm_begin_3)].iloc[:,16].mean() # [18:30-19:30)
reg_rate_pm_3 = df_time[(df_time.iloc[:,0] >=p_pm_begin_3)&(df_time.iloc[:,0] <p_pm_end)].iloc[:,16].mean() # [19:30-20:30)
reg_rate_pm = df_time[(df_time.iloc[:,0] >=p_pm_begin_0)&(df_time.iloc[:,0] <p_pm_end)].iloc[:,16].mean() # 16:30-20:30
reg_rate_no = df_time[(df_time.iloc[:,0] <p_am_begin_0)|(df_time.iloc[:,0] >=p_pm_end)|(
(df_time.iloc[:,0] >=p_am_end)&(df_time.iloc[:,0] <p_pm_begin_0))].iloc[:,16].mean()
"""replace nan"""
if np.isnan(reg_rate_am_0): reg_rate_am_0 = 0
if np.isnan(reg_rate_am_1): reg_rate_am_1 = 0
if np.isnan(reg_rate_am_2): reg_rate_am_2 = 0
if np.isnan(reg_rate_am): reg_rate_am = 0
if np.isnan(reg_rate_pm_0): reg_rate_pm_0 = 0
if np.isnan(reg_rate_pm_1): reg_rate_pm_1 = 0
if np.isnan(reg_rate_pm_2): reg_rate_pm_2 = 0
if np.isnan(reg_rate_pm_3): reg_rate_pm_3 = 0
if np.isnan(reg_rate_pm): reg_rate_pm = 0
if np.isnan(reg_rate_no): reg_rate_no = 0
print("{} static ok rate(in total) : {:.3%}".format(time.strftime('%Y-%m-%d %X'),static_rate))
print("{} tmc ok rate(in total) : {:.3%}".format(time.strftime('%Y-%m-%d %X'),tmc_rate))
print("{} reg ok rate(in total) : {:.3%}".format(time.strftime('%Y-%m-%d %X'),reg_rate))
print("{} reg ok rate(rush hour--am): {:.3%}({:.3%},{:.3%},{:.3%})".format(time.strftime('%Y-%m-%d %X'),
reg_rate_am,reg_rate_am_0,reg_rate_am_1,reg_rate_am_2))
print("{} reg ok rate(rush hour--pm): {:.3%}({:.3%},{:.3%},{:.3%},{:.3%})".format(time.strftime('%Y-%m-%d %X'),
reg_rate_pm,reg_rate_pm_0,reg_rate_pm_1,reg_rate_pm_2,reg_rate_pm_3))
print("{} reg ok rate(off-peak time): {:.3%}".format(time.strftime('%Y-%m-%d %X'),reg_rate_no))
cols = ['tripPeriod', 'tripLength','linksNum','nodesNum','tripId','static_link_time', 'tmc_link_time',
'node_time', 'static_time', 'tmc_time','reg_time','actual_time', 'absoluteDiff','relativeDiff%',
'static ok(0/1)', 'tmc ok(0/1)','reg ok(0/1)']
df_time.columns = cols
df_time.to_csv(path + '/eta_result.csv', header=True, index=True)
df_time_abs = df_time.sort_values(by = ['absoluteDiff'],axis = 0,ascending = False)
df_time_abs.to_csv(path + '/eta_absolute.csv', header=True, index=True)
df_time_re = df_time.sort_values(by = ['relativeDiff%'],axis = 0,ascending = False)
df_time_re.to_csv(path + '/eta_relative.csv', header=True, index=True)
return rate_eta
""" pass_rate calculate"""
def get_pass_rate(trip_time):
standard_time, static_time, tmc_time, regr_time = trip_time
static_diff = abs(standard_time - static_time)/standard_time
tmc_diff = abs(standard_time - tmc_time)/standard_time
regr_diff = abs(standard_time - regr_time)/standard_time
actual_time = standard_time
tmc_diff_time = abs(standard_time - tmc_time)
static_diff_time = abs(standard_time - static_time)
regr_diff_time = abs(standard_time - regr_time)
static_ok = 0
tmc_ok = 0
regr_ok = 0
if actual_time <= 10 * 60:
if static_diff <= 0.3:
static_ok = 1
elif actual_time <= 15 * 60:
if static_diff_time <= 3 * 60:
static_ok = 1
elif actual_time <= 20 * 60:
if static_diff <= 0.2:
static_ok = 1
elif actual_time <= 40 * 60:
if static_diff_time <= 4 * 60:
static_ok = 1
else:
if static_diff <= 0.1:
static_ok = 1
if actual_time <= 10 * 60:
if tmc_diff <= 0.3:
tmc_ok = 1
elif actual_time <= 15 * 60:
if tmc_diff_time <= 3 * 60:
tmc_ok = 1
elif actual_time <= 20 * 60:
if tmc_diff <= 0.2:
tmc_ok = 1
elif actual_time <= 40 * 60:
if tmc_diff_time <= 4 * 60:
tmc_ok = 1
else:
if tmc_diff <= 0.1:
tmc_ok = 1
if actual_time <= 10 * 60:
if regr_diff <= 0.3:
regr_ok = 1
elif actual_time <= 15 * 60:
if regr_diff_time <= 3 * 60:
regr_ok = 1
elif actual_time <= 20 * 60:
if regr_diff <= 0.2:
regr_ok = 1
elif actual_time <= 40 * 60:
if regr_diff_time <= 4 * 60:
regr_ok = 1
else:
if regr_diff <= 0.1:
regr_ok = 1
ret = (static_ok, tmc_ok, regr_ok)
return ret
""" output link and node coeff params for different scenario"""
def save_params(link_states, node_states, coeff, interc, path, is_jointReg = True):
df_coeffLink = pd.DataFrame(link_states) # trip link scena
df_coeffNode = pd.DataFrame(node_states) # trip node scena
if is_jointReg:
df_coeffLink[5] = coeff[:num_link_scena] #static coeff
df_coeffLink[6] = coeff[num_link_scena:num_link_scena*2] #tmc coeff
df_coeffLink[7] = interc #trip interc
df_coeffNode[4] = coeff[num_link_scena*2:]
df_coeffLink.columns = ['linkTotal','linkFromStp','linkClass','linkPhase','linkState','staticCoeff','tmcCoeff','interc']
df_coeffNode.columns = ['nodeTotal','nodeType','nodeTurnType','nodePhase', 'nodeCoeff']
else:
df_coeffLink[5] = coeff[:num_link_scena] #static or tmc coeff
df_coeffLink[6] = interc #trip interc
df_coeffNode[4] = coeff[num_link_scena:]
df_coeffLink.columns = ['linkTotal','linkFromStp','linkClass','linkPhase','linkState','Coeff','interc']
df_coeffNode.columns = ['nodeTotal','nodeType','nodeTurnType','nodePhase', 'nodeCoeff']
df_coeffLink_filter = df_coeffLink[(df_coeffLink.iloc[:,0] <scenario_num_filter)] # scenario filter
df_coeffLink.to_csv(path + '/link_params.csv', header=True, index=True)
df_coeffLink_filter.to_csv(path + '/link_params_filter.csv', header=True, index=True)
df_coeffNode.to_csv(path + '/node_params.csv', header=True, index=True)
"""compare trip time when necessary """
def compare_analysis(trip_time, th, path):
trip_time_idF = list(np.where(trip_time < trip_time_thresh)[0])
trip_time_idF_M = list(np.where(trip_time >=trip_time_thresh)[0])
'''give plot'''
plt.figure(figsize=(23,16))
plt.subplot(211)
plt.scatter(trip_time[trip_time_idF], th[trip_time_idF],alpha=0.5)
#plt.scatter(tripTime[tripTime_idF_M], th[tripTime_idF_M],c='r',marker='*',alpha=0.5)
plt.xlabel('Trip actual time (s)',fontsize=16)
plt.xlim(trip_time.min()-1000, trip_time.max())
plt.ylabel('Hist time with hist speed (s)',fontsize=16)
plt.ylim(th.min()-1000, th.max())
plt.xticks(np.arange(0, 25000, 2500))
#plt.yticks(np.arange(-5, 5, 0.5))
plt.grid(True)
plt.title('Hist2Trip ScatterPlot',fontsize=16)
plt.subplot(212)
plt.scatter(trip_time[trip_time_idF], th[trip_time_idF],alpha=0.5)
plt.scatter(trip_time[trip_time_idF_M], th[trip_time_idF_M],c='r',marker='*',alpha=0.5)
plt.xlabel('Trip actual time (s)',fontsize=16)
plt.ylabel('Hist time with hist speed (s)',fontsize=16)
plt.xticks(np.arange(0, 25000, 2500))
plt.grid(True)
plt.title('Hist2Trip ScatterPlot',fontsize=16)
plt.savefig('{0}/compare.png'.format(path))
""" plot fig for scenario number """
def scenarioPlot(link_states, node_states, path):
df_link = pd.DataFrame(link_states)
df_node = pd.DataFrame(node_states)
df_link.columns = ['linkTotal','linkFromStp','linkClass','linkPhase','linkState']
df_node.columns = ['nodeTotal','nodeType','nodeTurnType','nodePhase']
df_link_max = df_link.sort_values(by = ['linkTotal'],axis = 0,ascending = False).iloc[:10,:]
df_link_min = df_link.sort_values(by = ['linkTotal'],axis = 0,ascending = False).iloc[-10:,:]
df_link_max = df_link_max.set_index(['linkFromStp','linkClass','linkPhase','linkState'])
df_link_min = df_link_min.set_index(['linkFromStp','linkClass','linkPhase','linkState'])
df_node_max = df_node.sort_values(by = ['nodeTotal'],axis = 0,ascending = False).iloc[:10,:]
df_node_min = df_node.sort_values(by = ['nodeTotal'],axis = 0,ascending = False).iloc[-10:,:]
df_node_max = df_node_max.set_index(['nodeType','nodeTurnType','nodePhase'])
df_node_min = df_node_min.set_index(['nodeType','nodeTurnType','nodePhase'])
fig,axes = plt.subplots(2,2, figsize=(24,18)) #get 1x1 subplots
df_link_max.plot(kind='barh',ax=axes[0,0],title=u" ",alpha=0.95,stacked=True)
df_link_max.linkTotal.plot(kind='barh',ax=axes[0,0],title=u" ",alpha=0.95,stacked=True)
axes[0,0].set_xlabel(r'Scenario number',fontsize=16) #set the coordinate axis-x label
axes[0,0].set_ylabel('linkFromStp,linkClass,linkPhase,linkState',fontsize=16)
axes[0,0].set_title(u"Link scenario(Max-10)",fontsize=16)
df_link_min.plot(kind='barh',ax=axes[0,1],title=u" ",alpha=0.95,stacked=True)
df_link_min.linkTotal.plot(kind='barh',ax=axes[0,1],title=u" ",alpha=0.95,stacked=True)
axes[0,1].set_xlabel(r'Scenario number',fontsize=16) #set the coordinate axis-x label
axes[0,1].set_ylabel('linkFromStp,linkClass,linkPhase,linkState',fontsize=16)
axes[0,1].set_title(u"Link scenario(Min-10)",fontsize=16)
df_node_max.plot(kind='barh',ax=axes[1,0],title=u" ",alpha=0.95,stacked=True)
df_node_max.nodeTotal.plot(kind='barh',ax=axes[1,0],title=u" ",alpha=0.95,stacked=True)
axes[1,0].set_xlabel(r'Scenario number',fontsize=16) #set the coordinate axis-x label
axes[1,0].set_ylabel('nodeType,nodeTurnType,nodePhase',fontsize=16)
axes[1,0].set_title(u"Node scenario(Max-10)",fontsize=16)
df_node_min.plot(kind='barh',ax=axes[1,1],title=u" ",alpha=0.95,stacked=True)
df_node_min.nodeTotal.plot(kind='barh',ax=axes[1,1],title=u" ",alpha=0.95,stacked=True)
axes[1,1].set_xlabel(r'Scenario number',fontsize=16) #set the coordinate axis-x label
axes[1,1].set_ylabel('nodeType,nodeTurnType,nodePhase',fontsize=16)
axes[1,1].set_title(u"Node scenario(Min-10)",fontsize=16)
plt.savefig('{0}/scenarioDistri.png'.format(path))
"""time diff distribution"""
def histPlot(th, tr, tp, ta, path):
dth = th - ta
dtr = tr - ta
dtp = tp - ta
fig,axes = plt.subplots(1,3, figsize=(23,8)) #get 1x1 subplots
axes[0].set_xlabel(r'$\Delta$' + "t (s)",fontsize=16) #set the coordinate axis-x label
axes[0].set_ylabel(u'Probability',fontsize=16) #set the coordinate axis-y label
axes[0].set_title(u"Hist2trip time diff distribution",fontsize=16)
axes[0].set_xlim(-3600, 3600)
axes[0].hist(dth,bins=50,density=1,color='r',alpha=0.5,edgecolor='k') #th-tripTime
axes[0].grid(True)
#get 1x1 subplots
axes[1].set_xlabel(r'$\Delta$' + "t (s)",fontsize=16) #set the coordinate axis-x label
axes[1].set_ylabel(u'Probability',fontsize=16) #set the coordinate axis-y label
axes[1].set_title(u"Real2trip time diff distribution",fontsize=16)
axes[1].set_xlim(-3600, 3600)
axes[1].hist(dtr,bins=50,density=1,color='r',alpha=0.5,edgecolor='k') # tr - tripTime
axes[1].grid(True)
axes[2].set_xlabel(r'$\Delta$' + "t (s)",fontsize=16) #set the coordinate axis-x label
axes[2].set_ylabel(u'Probability',fontsize=16) #set the coordinate axis-y label
axes[2].set_title(u"Regre2trip time diff distribution",fontsize=16)
axes[2].set_xlim(-3600, 3600)
axes[2].hist(dtp,bins=50,density=1,color='r',alpha=0.5,edgecolor='k') #tp - tripTime
axes[2].grid(True)
plt.savefig('{0}/timeDiff.png'.format(path))
""" actual and regression time scatter """
def scatterPlot(rates, th, tr, tp, ta, path):
rates_h = rates[:,0] # hist pass rate,1 or 0
rates_r = rates[:,1] # real pass rate,1 or 0
rates_a = rates[:,2] # actual pass rate,1 or 0
#use_colours = {"T":"green","F":"red"}
colors = ['r','g']
plt.figure(figsize=(22,7))
plt.subplot(131)
plt.scatter(ta, th, c=[colors[int(x)] for x in rates_h], alpha=0.5)
plt.xlabel('Trip actual time (s)',fontsize=16)
plt.ylabel('Hist time with hist speed (s)',fontsize=16)
plt.grid(True)
plt.title('Hist2Trip ScatterPlot',fontsize=16)
plt.subplot(132)
plt.scatter(ta, tr, c=[colors[int(x)] for x in rates_r], alpha=0.5)
plt.xlabel('Trip actual time (s)',fontsize=16)
plt.ylabel('Real time with hist speed (s)',fontsize=16)
plt.grid(True)
plt.title('Real2Trip ScatterPlot',fontsize=16)
plt.subplot(133)
plt.scatter(ta, tp, c=[colors[int(x)] for x in rates_a],alpha=0.5)
plt.xlabel('Trip actual time (s)',fontsize=16)
plt.ylabel('Regre time with Regression coffe (s)',fontsize=16)
plt.grid(True)
plt.title('Regre2Trip ScatterPlot',fontsize=16)
plt.savefig('{0}/timeScatter.png'.format(path))
def linear_plot(X_train, y_train, coeff, interc):
tp_train = np.dot(X_train,coeff) + interc
ta_train = y_train
plt.figure(figsize=(12,12))
plt.scatter(tp_train, ta_train,alpha=0.5)
# plt.scatter(tt[tt_idF_M], th[tt_idF_M],c='r',marker='*',alpha=0.5)
plt.xlabel('Trip actual time (s)',fontsize=16)
plt.ylabel('Hist time with hist speed (s)',fontsize=16)
plt.grid(True)
plt.title('Hist2Trip ScatterPlot',fontsize=16)
def show_plot(alphas, scores):
figure = plt.figure()
ax = figure.add_subplot(1, 1, 1)
ax.plot(alphas, scores)
ax.set_xlabel(r"$\lambda$")
ax.set_ylabel(r"score")
ax.set_xscale("log")
ax.set_title("Ridge")
ax.set_gid
plt.show()
class linear_model_tf(object): # define a class for linear regression model
def __init__(self, file_num=1, data_iter=None, path=None, is_jointReg=True): # initialization
self.num = file_num
self.iter = data_iter
self.path = path
self.jointReg = is_jointReg
def get_minibatch(self):
X_train, X_test = np.asarray([],dtype=float),np.asarray([],dtype=float)
y_train, y_test = np.asarray([],dtype=float),np.asarray([],dtype=float)
for data_file in itertools.islice(self.iter, self.num):
tripDataTrain = load_data_eta([data_file])
outPut = data_trip_processing(data = tripDataTrain, is_jointReg = self.jointReg)
Xdata, Ydata = outPut[0], outPut[1]
X_train, X_test, y_train, y_test = train_test_split(Xdata, Ydata, test_size = 0.01, random_state = 0)
return X_train, y_train, X_test, y_test
def iter_minibatches(self):
"""Generator of minibatches."""
X_train, y_train, X_test, y_test = self.get_minibatch()
while len(X_train):
yield X_train, y_train,X_test, y_test
X_train, y_train,X_test, y_test =self.get_minibatch()
def tf_linear_regression(self,epochs=2000,display_step=500,lmbda=1e-3,lr = 0.001):
# create symbolic variables
if self.jointReg:
cols = num_link_scena*2 + num_node_scena
else:
cols = num_link_scena + num_node_scena
X = tf.placeholder(tf.float32,[None,cols],name='input-X')
Y = tf.placeholder(tf.float32,[None,1],name='input-Y')
# create shared variable
W = tf.Variable(tf.random_normal([cols,1],stddev=0.01),name='coefficients')
b = tf.Variable(tf.random_normal([1],stddev=0.01),name='intercept')
Y_pred = tf.add(tf.matmul(X,W),b)
loss = tf.reduce_mean(tf.square(Y_pred-Y))
loss = tf.add(loss, tf.contrib.layers.l2_regularizer(lmbda)(W))
minibatch_iterators = self.iter_minibatches()
# define Optimizer
optimizer = tf.train.AdamOptimizer(lr).minimize(loss)
#define session
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
# training...
with tf.Session() as sess:
# Run the initializer
sess.run(init)
# iter...
for i, (x_train, y_train,x_test,y_test) in enumerate(minibatch_iterators):
y_train,y_test = y_train.reshape(-1,1),y_test.reshape(-1,1)
for epoch in range(epochs):
sess.run(optimizer, feed_dict={X: x_train, Y: y_train})
# display logs per epoch step
if (epoch+1) % display_step == 0:
c = sess.run(loss, feed_dict={X: x_train, Y:y_train})
print("{} Epoch: {:04d}, cost={:.6f}".format(time.strftime('%Y-%m-%d %X'),epoch+1, c))
coeff,interc = sess.run(W),sess.run(b)
coeff = np.around(coeff, decimals=4)
interc = np.around(interc, decimals=4)
y_pred = np.dot(x_test, coeff) + interc
rmse = np.sqrt(np.mean((y_pred - y_test) ** 2))
print("%s Coefficients:%s, intercept:%.4f" % (time.strftime('%Y-%m-%d %X'),coeff.reshape(-1,), interc))
print("{} RMSE:{:.4f}".format(time.strftime('%Y-%m-%d %X'),rmse))
print("{} score: {:.4f}".format(time.strftime('%Y-%m-%d %X'),r2_score(y_pred, y_test)))
# scenarioPlot(link_states, node_states, self.path)
# save_params(link_states, node_states, coeff, interc, path, is_jointReg = is_jointReg)
params = np.concatenate((coeff.reshape(-1,),np.array(interc).reshape(-1,)),axis=0)
np.save(self.path+"/coeff_interc.npy", params)
return coeff.reshape(-1,), interc, rmse
if __name__ == '__main__':
is_tf = False # define regression framework,scikit-learn or tensorflow
is_ols = True # define regression model, ols or sgd.svm
is_jointReg = True # define the object of regression, if tr/th respectively or together
sys_len = len(sys.argv)
if sys_len < 5:
print('usage: {} <data_dir> <citycode> <training_data date> <test_data date>'.format(sys.argv[0]))
sys.exit(1)
data_dir = sys.argv[1] # data dir
citycode = sys.argv[2] # city code
data_train = [os.path.join(data_dir, '{}.{}.trip.etalr'.format(citycode, sys.argv[i].replace('-','')))
for i in range(3,sys_len-1)] # training data date
test_trip = [os.path.join(data_dir, '{}.{}.trip.etalr'.format(citycode, sys.argv[-1].replace('-','')))]
data_stream = data_stream_trip(data_train) #iter(data_train)
""" data file for training and test """
""" train_trips = ["310000.20190313.trip.etalr"] """
""" test_trips = ["310000.20190312.trip.etalr","310000.20190314.trip.etalr"]"""
# train_trip = data_stream_trip(data_train)
if is_tf:
lm_tf = linear_model_tf(file_num=1, data_iter=data_stream, is_jointReg=is_jointReg,path=data_dir)
coeff, interc, rmse = lm_tf.tf_linear_regression()
lm_tf = None
else:
if is_ols:
coeff, interc, rmse = test_regression_ols(data_stream, data_dir)
else:
coeff, interc, rmse = test_regression_sgd(data_stream, data_dir)
# coeff, interc, rmse = test_regression_ols(data_stream, data_dir)
print("{} generate test data".format(time.strftime('%Y-%m-%d %X')))
tripDataTest = load_data_eta(test_trip)
Xdata, Ydata1, t_ht, t_rt, t_nt, th, tr, ta, df_trip = data_trip_processing(data = tripDataTest, is_jointReg = is_jointReg)
print("{} using result to evaluate test data".format(time.strftime('%Y-%m-%d %X')))
tp = time_reg(Xdata, coeff, interc) #regression time
rates = eta_evaluate_csv(t_ht, t_rt, t_nt, th, tr, tp, ta, df_trip, data_dir)
histPlot(th, tr, tp, ta, data_dir)
scatterPlot(rates, th, tr, tp, ta, data_dir)
print('{} finished,anything is oK!'.format(time.strftime('%Y-%m-%d %X')))
4.2 应用2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
npg=np.genfromtxt
#import scipy
#fig=plt.figure()
#ax=fig.add_subplot(1,1,1)
"""b :blue,r:red,g: green,c:cyan,m: magenta,y:yellow,k:black, w:white"""
def data_getting():
mr_=np.load(path1+'\\'+'mr_.npy','r+')
mr_train=np.load(path1+'\\'+'mr_train.npy','r+')
mr_test=np.load(path1+'\\'+'mr_test.npy','r+')
params=npg(path1+'\\'+'params.txt',delimiter=':',dtype=None,encoding='utf-8')
params=dict(params) #params dict
rmse_interval=int(params['rmse_interval']) #get time slot end
T=np.load(path1+'\\'+'T.npy','r+')
R=np.load(path1+'\\'+'R.npy','r+')
G=np.load(path1+'\\'+'G.npy','r+')
F=np.load(path1+'\\'+'F.npy','r+')
mrPred=np.dot(T, R.T)
rmse_train=np.loadtxt(path1+'\\'+'rmse_train.csv')
rmse_test=np.loadtxt(path1+'\\'+'rmse_test.csv')
loss=np.loadtxt(path1+'\\'+'loss.csv')
return mr_,mr_train,mr_test,mrPred,T,R,G,F,rmse_train,rmse_test,loss,rmse_interval
def data_prcocessing():
mr_,mr_train,mr_test,mrPred,T,R,G,F,rmse_train,rmse_test,loss,invl=data_getting()
orig_train = mr_train[mr_train>0.1] # initial trainning value
orig_test = mr_test[mr_test>0.1] # initial test value
orig_data = np.hstack((orig_train,orig_test)) # initial value
pred_train = mrPred[mr_train>0.1] # predicted trainning value
pred_test = mrPred[mr_test>0.1] # predicted test value
pos_missing=np.where(mr_[:,:]<0.1) # all missing position
pred_missing=mrPred[pos_missing] # predicted value for missing position
zero_columns=list(np.where(~mr_.any(axis=0))[0])# the columns all zeros(fiber missing)
pred_fmissing=mrPred[:,zero_columns].flatten() #(fiber-like missing speed)
emissing_mr_= np.delete(mr_,zero_columns, axis=1)
emissing_pos=np.where(emissing_mr_[:,:]<0.1)
pred_emissing=mrPred[emissing_pos] #(element-like missing speed)
return orig_data,orig_train,orig_test,pred_missing,pred_emissing,pred_fmissing,\
pred_train,pred_test,rmse_train,rmse_test,loss,invl
def speed_plot(orig_data, pred_train, pred_test,
pred_missing, pred_emissing,pred_fmissing,rmse_train,rmse_test):
fig,axes = plt.subplots(2,3, figsize=(30,18)) #get 2x2 subplots
#get figure for speed distribution
axes[0, 0].set_xlabel(u'Speed(km/h)',fontsize=16) #set the coordinate axis-x label
axes[0, 0].set_ylabel(u'Probability',fontsize=16) #set the coordinate axis-y label
axes[0, 0].set_title(u"Original No-missing(num:{})".format(len(orig_data)),fontsize=16)
axes[0, 0].grid(True)
axes[0, 0].set_xlim(-10, 130)
axes[0, 0].hist(orig_data,bins=50,density=1,color='r',alpha=0.5,edgecolor='k') #initial speed
axes[0, 1].set_xlabel(u'Speed(km/h)',fontsize=16)
axes[0, 1].set_ylabel(u'Probability',fontsize=16)
axes[0, 1].set_title(u"Predicted training(num:{}, RMSE:{})".format(len(pred_train),rmse_train[-1]),fontsize=16)
axes[0, 1].grid(True)
axes[0, 1].set_xlim(-10, 130)
axes[0, 1].hist(pred_train,bins=50,density=1,color='g',alpha=0.5,edgecolor='k') #predicted training speed
axes[0, 2].set_xlabel(u'Speed(km/h)',fontsize=16)
axes[0, 2].set_ylabel(u'Probability',fontsize=16)
axes[0, 2].set_title(u"Predicted test(num:{}, RMSE:{})".format(len(pred_test),rmse_test[-1]),fontsize=16)
axes[0, 2].grid(True)
axes[0, 2].set_xlim(-10, 130)
axes[0, 2].hist(pred_test,bins=50,density=1,color='b',alpha=0.5,edgecolor='k') #predicted testing speed
axes[1, 0].set_xlabel(u'Speed(km/h)',fontsize=16)
axes[1, 0].set_ylabel(u'Probability',fontsize=16)
axes[1, 0].set_title(u"Original missing predicted(num:{})".format(len(pred_missing)),fontsize=16)
axes[1, 0].grid(True)
axes[1, 0].set_xlim(-10, 130)
axes[1, 0].hist(pred_missing,bins=60,density=1,color='c',alpha=0.5,edgecolor='k') #predicted missing speed
axes[1, 1].set_xlabel(u'Speed(km/h)',fontsize=16)
axes[1, 1].set_ylabel(u'Probability',fontsize=16)
axes[1, 1].set_title(u"Element-like missing(num:{})".format(len(pred_emissing)),fontsize=16)
axes[1, 1].grid(True)
axes[1, 1].set_xlim(-10, 130)
axes[1, 1].hist(pred_emissing,bins=4,density=1,color='m',alpha=0.5,edgecolor='k') #predicted Element-like missing speed
axes[1, 2].set_xlabel(u'Speed(km/h)',fontsize=16)
axes[1, 2].set_ylabel(u'Probability',fontsize=16)
axes[1, 2].set_title(u"Fiber-like missing(num:{})".format(len(pred_fmissing)),fontsize=16)
axes[1, 2].grid(True)
axes[1, 2].set_xlim(-10, 130)
axes[1, 2].hist(pred_fmissing,bins=60,density=1,color='y',alpha=0.5,edgecolor='k') #predicted Fiber-like missing speed
plt.savefig('{0}\\Speed_predicted.png'.format(path1))
def speed_dist_plot(orig_train, pred_train, orig_test, pred_test):
# get figure for error with ±5km/h
bias_train=pred_train-orig_train
bias_test=pred_test-orig_test
names = ["training bias", "test bias"]
bins=[-np.inf,-20,-15,-10,-5,0,5,10,15,20,np.inf]
cats_train=pd.value_counts(pd.cut(bias_train,bins))
cats_train=cats_train/sum(cats_train)*100
cats_test=pd.value_counts(pd.cut(bias_test,bins))
cats_test=cats_test/sum(cats_test)*100
cats=pd.concat([cats_train,cats_test],axis=1)
cats.sort_index(inplace=True)
cats.columns=pd.Index(names)
fig,axes = plt.subplots(1,1, figsize=(15,10))
axes.set_xlabel('Speed bias(km/h)',fontsize=16)
axes.set_ylabel('Percentage(%)',fontsize=16)
axes.set_title('Speed bias distribution', fontsize=16)
axes=cats.plot(kind='bar',ax=axes,color=['green','red'],\
alpha=0.5,rot=30)
for p in axes.patches:
axes.annotate(np.round(p.get_height(),decimals=2),\
(p.get_x()+p.get_width()/2, p.get_height()+ 0.3),\
ha='center', va='center',fontsize=9)
axes.legend(loc='best',fontsize=16)
plt.grid()
plt.savefig(f'{path1}/ speed_error.png')
def loss_rmse_plot(loss,rmse_train,rmse_test,invl):
#get figure for loss and rmse with steps
plt.figure(figsize=(21,7))
plt.subplot(121)
plt.plot(list(range(len(loss))), loss, 'bo')
plt.xlabel('Steps',fontsize=16)
plt.ylabel('Loss of objective function',fontsize=16)
plt.grid()
plt.title('Loss of steps',fontsize=16)
plt.subplot(122)
plt.plot(list(range(invl,(len(rmse_train)+1)*invl,invl)), rmse_train, 'bo',\
list(range(invl,(len(rmse_test)+1)*invl,invl)), rmse_test, 'ro')
plt.xlabel('Steps',fontsize=16)
plt.ylabel('RMSE',fontsize=16)
plt.grid()
plt.title('RMSE of steps',fontsize=16)
plt.legend(['RMSE of training','RMSE of test'],fontsize=16)
plt.savefig(f'{path1}\\loss and rmse.png')
def show_matplt():
#get_data()
orig_data,orig_train,orig_test,pred_missing,pred_emissing,pred_fmissing,\
pred_train,pred_test,rmse_train,rmse_test,loss,invl=data_prcocessing()
#plot graph
speed_plot(orig_data, pred_train, pred_test,
pred_missing, pred_emissing,pred_fmissing,rmse_train,rmse_test)
speed_dist_plot(orig_train, pred_train, orig_test, pred_test)
loss_rmse_plot(loss,rmse_train,rmse_test, invl)
def main():
show_matplt()
if __name__=='__main__':
main()
4.3 应用3
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
npg=np.genfromtxt
#import scipy
#fig=plt.figure()
#ax=fig.add_subplot(1,1,1)
"""b :blue,r:red,g: green,c:cyan,m: magenta,y:yellow,k:black, w:white"""
def data_getting():
mr_=np.load(path1+'\\'+'mr_.npy','r+')
mr_train=np.load(path1+'\\'+'mr_train.npy','r+')
mr_test=np.load(path1+'\\'+'mr_test.npy','r+')
params=npg(path1+'\\'+'params.txt',delimiter=':',dtype=None,encoding='utf-8')
params=dict(params) #params dict
rmse_interval=int(params['rmse_interval']) #get time slot end
T=np.load(path1+'\\'+'T.npy','r+')
R=np.load(path1+'\\'+'R.npy','r+')
G=np.load(path1+'\\'+'G.npy','r+')
F=np.load(path1+'\\'+'F.npy','r+')
mrPred=np.dot(T, R.T)
rmse_train=np.loadtxt(path1+'\\'+'rmse_train.csv')
rmse_test=np.loadtxt(path1+'\\'+'rmse_test.csv')
loss=np.loadtxt(path1+'\\'+'loss.csv')
return mr_,mr_train,mr_test,mrPred,T,R,G,F,rmse_train,rmse_test,loss,rmse_interval
def data_prcocessing():
mr_,mr_train,mr_test,mrPred,T,R,G,F,rmse_train,rmse_test,loss,invl=data_getting()
orig_train = mr_train[mr_train>0.1] # initial trainning value
orig_test = mr_test[mr_test>0.1] # initial test value
orig_data = np.hstack((orig_train,orig_test)) # initial value
pred_train = mrPred[mr_train>0.1] # predicted trainning value
pred_test = mrPred[mr_test>0.1] # predicted test value
pos_missing=np.where(mr_[:,:]<0.1) # all missing position
pred_missing=mrPred[pos_missing] # predicted value for missing position
zero_columns=list(np.where(~mr_.any(axis=0))[0])# the columns all zeros(fiber missing)
pred_fmissing=mrPred[:,zero_columns].flatten() #(fiber-like missing speed)
emissing_mr_= np.delete(mr_,zero_columns, axis=1)
emissing_pos=np.where(emissing_mr_[:,:]<0.1)
pred_emissing=mrPred[emissing_pos] #(element-like missing speed)
return orig_data,orig_train,orig_test,pred_missing,pred_emissing,pred_fmissing,\
pred_train,pred_test,rmse_train,rmse_test,loss,invl
def speed_plot(orig_data, pred_train, pred_test,
pred_missing, pred_emissing,pred_fmissing,rmse_train,rmse_test):
fig,axes = plt.subplots(2,3, figsize=(30,18)) #get 2x2 subplots
#get figure for speed distribution
axes[0, 0].set_xlabel(u'Speed(km/h)',fontsize=16) #set the coordinate axis-x label
axes[0, 0].set_ylabel(u'Probability',fontsize=16) #set the coordinate axis-y label
axes[0, 0].set_title(u"Original No-missing(num:{})".format(len(orig_data)),fontsize=16)
axes[0, 0].grid(True)
axes[0, 0].set_xlim(-10, 130)
axes[0, 0].hist(orig_data,bins=50,density=1,color='r',alpha=0.5,edgecolor='k') #initial speed
axes[0, 1].set_xlabel(u'Speed(km/h)',fontsize=16)
axes[0, 1].set_ylabel(u'Probability',fontsize=16)
axes[0, 1].set_title(u"Predicted training(num:{}, RMSE:{})".format(len(pred_train),rmse_train[-1]),fontsize=16)
axes[0, 1].grid(True)
axes[0, 1].set_xlim(-10, 130)
axes[0, 1].hist(pred_train,bins=50,density=1,color='g',alpha=0.5,edgecolor='k') #predicted training speed
axes[0, 2].set_xlabel(u'Speed(km/h)',fontsize=16)
axes[0, 2].set_ylabel(u'Probability',fontsize=16)
axes[0, 2].set_title(u"Predicted test(num:{}, RMSE:{})".format(len(pred_test),rmse_test[-1]),fontsize=16)
axes[0, 2].grid(True)
axes[0, 2].set_xlim(-10, 130)
axes[0, 2].hist(pred_test,bins=50,density=1,color='b',alpha=0.5,edgecolor='k') #predicted testing speed
axes[1, 0].set_xlabel(u'Speed(km/h)',fontsize=16)
axes[1, 0].set_ylabel(u'Probability',fontsize=16)
axes[1, 0].set_title(u"Original missing predicted(num:{})".format(len(pred_missing)),fontsize=16)
axes[1, 0].grid(True)
axes[1, 0].set_xlim(-10, 130)
axes[1, 0].hist(pred_missing,bins=60,density=1,color='c',alpha=0.5,edgecolor='k') #predicted missing speed
axes[1, 1].set_xlabel(u'Speed(km/h)',fontsize=16)
axes[1, 1].set_ylabel(u'Probability',fontsize=16)
axes[1, 1].set_title(u"Element-like missing(num:{})".format(len(pred_emissing)),fontsize=16)
axes[1, 1].grid(True)
axes[1, 1].set_xlim(-10, 130)
axes[1, 1].hist(pred_emissing,bins=4,density=1,color='m',alpha=0.5,edgecolor='k') #predicted Element-like missing speed
axes[1, 2].set_xlabel(u'Speed(km/h)',fontsize=16)
axes[1, 2].set_ylabel(u'Probability',fontsize=16)
axes[1, 2].set_title(u"Fiber-like missing(num:{})".format(len(pred_fmissing)),fontsize=16)
axes[1, 2].grid(True)
axes[1, 2].set_xlim(-10, 130)
axes[1, 2].hist(pred_fmissing,bins=60,density=1,color='y',alpha=0.5,edgecolor='k') #predicted Fiber-like missing speed
plt.savefig('{0}\\Speed_predicted.png'.format(path1))
def speed_dist_plot(orig_train, pred_train, orig_test, pred_test):
# get figure for error with ±5km/h
bias_train=pred_train-orig_train
bias_test=pred_test-orig_test
names = ["training bias", "test bias"]
bins=[-np.inf,-20,-15,-10,-5,0,5,10,15,20,np.inf]
cats_train=pd.value_counts(pd.cut(bias_train,bins))
cats_train=cats_train/sum(cats_train)*100
cats_test=pd.value_counts(pd.cut(bias_test,bins))
cats_test=cats_test/sum(cats_test)*100
cats=pd.concat([cats_train,cats_test],axis=1)
cats.sort_index(inplace=True)
cats.columns=pd.Index(names)
fig,axes = plt.subplots(1,1, figsize=(15,10))
axes.set_xlabel('Speed bias(km/h)',fontsize=16)
axes.set_ylabel('Percentage(%)',fontsize=16)
axes.set_title('Speed bias distribution', fontsize=16)
axes=cats.plot(kind='bar',ax=axes,color=['green','red'],\
alpha=0.5,rot=30)
for p in axes.patches:
axes.annotate(np.round(p.get_height(),decimals=2),\
(p.get_x()+p.get_width()/2, p.get_height()+ 0.3),\
ha='center', va='center',fontsize=9)
axes.legend(loc='best',fontsize=16)
plt.grid()
plt.savefig(f'{path1}/ speed_error.png')
def loss_rmse_plot(loss,rmse_train,rmse_test,invl):
#get figure for loss and rmse with steps
plt.figure(figsize=(21,7))
plt.subplot(121)
plt.plot(list(range(len(loss))), loss, 'bo')
plt.xlabel('Steps',fontsize=16)
plt.ylabel('Loss of objective function',fontsize=16)
plt.grid()
plt.title('Loss of steps',fontsize=16)
plt.subplot(122)
plt.plot(list(range(invl,(len(rmse_train)+1)*invl,invl)), rmse_train, 'bo',\
list(range(invl,(len(rmse_test)+1)*invl,invl)), rmse_test, 'ro')
plt.xlabel('Steps',fontsize=16)
plt.ylabel('RMSE',fontsize=16)
plt.grid()
plt.title('RMSE of steps',fontsize=16)
plt.legend(['RMSE of training','RMSE of test'],fontsize=16)
plt.savefig(f'{path1}\\loss and rmse.png')
def show_matplt():
#get_data()
orig_data,orig_train,orig_test,pred_missing,pred_emissing,pred_fmissing,\
pred_train,pred_test,rmse_train,rmse_test,loss,invl=data_prcocessing()
#plot graph
speed_plot(orig_data, pred_train, pred_test,
pred_missing, pred_emissing,pred_fmissing,rmse_train,rmse_test)
speed_dist_plot(orig_train, pred_train, orig_test, pred_test)
loss_rmse_plot(loss,rmse_train,rmse_test, invl)
def main():
show_matplt()
if __name__=='__main__':
main()