c:/Windows/Systems32/drivers/etc/hosts中追加映射(for kafaka)
121.41.8.124 ZSKFK1
121.41.8.128 ZSKFK2
120.26.46.92 ZSKFK3
备注:这里的121.41.8.124等为为zookeeper_hosts,hosts的主机地址,将其映射为ZSKFK1,易于解析(个人理解,不一定正确)
一 、kafka使用代码start
# -*- coding: utf-8 -*-
from pykafka import KafkaClient
import Queue
import logging
logging.basicConfig(
format='%(asctime)s.%(msecs)s:%(name)s:%(thread)d:%(levelname)s:%(process)d:%(message)s',
level=logging.DEBUG
)
client = KafkaClient(zookeeper_hosts="121.41.8.124:2181,121.41.8.128:2181,120.26.46.92:2181/kafka", hosts="121.41.8.124:9082,121.41.8.128:9082,120.26.46.92:9082")
client.topics
topic = client.topics['receiver_kafka_004'] #choose a topic baoqiang_test
'''one way'''
# with topic.get_sync_producer() as producer:
# producer.produce(['test message' + str(i**2) for i in range(4)])
#You can still obtain delivery confirmation for messages, through a queue interface which can be enabled by setting delivery_reports=True
with topic.get_producer(delivery_reports=True) as producer:
count = 0
while True:
count += 1
producer.produce('test msg %d_tbq' %count, partition_key='{}'.format(count))
if count % 10**1 == 0: # adjust this or bring lots of RAM ;)
while True:
try:
msg, exc = producer.get_delivery_report(block=False)
if exc is not None:
print 'Failed to deliver msg {}: {}'.format(
msg.partition_key, repr(exc))
else:
print 'Successfully delivered msg {}'.format(
msg.partition_key)
except Queue.Empty:
break
break
balanced_consumer = topic.get_balanced_consumer(
consumer_group = 'testgroup',
auto_commit_enable = True, #if false ,then no need consumer_group
# connect several zk'
)
messages = [balanced_consumer.consume() for i in range(1)]
# print messages
for message in messages:
if message is not None:
print message.offset, message.value
kafka使用代码end
二、为了更好的用windows下的pip
1.安装setuptool,下载setuptools-1.1.6.tar.gz然后解压,在cmd里面运行python setup.py install
2.安装pip,下载pip-1.4.1.tar.gz,解压,在cmd里面运行python setup.py install
3.电脑-属性-高级-环境变量-path里面添加环境变量C:\Python27\Scripts
4.在cmd 里面运行pip protobuf install
pip install 经常会出错,最好下载个mingw-get-setup.exe安装
三、protobuf使用
下载proto.7z 解压出来将proto.exe放在系统路径下 如: c:python27/
执行以下语句生成.py
protoc -I=$SRC_DIR --python_out=$DST_DIR $SRC_DIR/ruledb_pb2.proto
$SRC_DIR表示*.proto所在文件夹路径
$DST_DIR表示要把python代码生成在哪个文件夹
本例的*.proto文件采用proto.proto
生成proto_pb2.py
出现error:unrecognized syntax identifier "proto3". This parser only recognizes "proto2"
重新下载个protoc.exe protobuf3版本的得以解决
对于以下结构的proto文件,python读写消息:
syntax = "proto3";
package DataProtocol;
option java_package = "com.yg.travel.assistant.proto";
message RawGps
{
int32 version = 1;
int32 userId = 2;
string cityId = 3;
int64 timestamp = 4;
int64 recTimestamp = 5;
double lng = 6;
double lat = 7;
}
message OnBusAction
{
int32 version = 1;
string stopId = 2;
string stopName = 3;
double stopLng = 4;
double stopLat = 5;
int64 time = 6;
}
message TModelOut
{
int32 version = 1;
RawGps rawGps = 2;
int32 round = 3;
int32 label = 4;
double possibility = 5;
OnBusAction onBusAction = 6;
}
以下假设producer和balanced_consumer为已经创建好的句柄
写消息:
tmodelout = proto_pb2.TModelOut()
tmodelout.version = 1
tmodelout.round = 1
tmodelout.label = 1
tmodelout.possibility = 1
gps = tmodelout.rawGps
gps.version = 1
gps.userId = 3
gps.cityId = ‘004’
gps.timestamp = 15552
gps.recTimestamp = 15554
gps.lng = 120.336
gps.lat = 30.557
onbusaction = tmodelout.onBusAction
onbusaction.version = 1
onbusaction.stopId = ''
onbusaction.stopName = ‘’
onbusaction.stopLng = 120.648
onbusaction.stopLat = 30.878
onbusaction.time = 168742
tmodelout_str = tmodelout.SerializeToString()
producer.produce(tmodelout_str)
读消息:while True:
message = balanced_consumer.consume()
rawgps = proto_pb2.RawGps()
rawgps.ParseFromString(message.value)
usrId = rawgps.userId
代码部署到linux服务器:
1、安装xshell,为了更好的直观看界面目录最好再安装xftp
设置好给定的用户名,密码 ,主机号,端口,连接linux服务器
2、执行cd /切换到根目录,在其root目录下创建自己的项目文件夹,在其data目录下创建日志文件
通过执行 python getdataset_realtime.py并可以执行程序
也可以执行nohup python getdataset_realtime.py > /data/tmoderec/log/out.log 2>&1 &
在运行程序的同时将输出日志放到/data/tmoderec/log/out.log中
3、还得重新安装依赖的包,比如thrift,protobuf
如对于protobuf安装:
下载文件https://github.com/google/protobuf/releases,选protobuf-python-3.0.0-beta-2.tar.gz
tar -xzf protobuf-python-3.0.0-beta-2.tar.gz
cd protobuf-3.0.0-beta-2
./configure
make
make check
make install
protoc -I=. --python_out=. ./proto.proto 注意:linux中用.表示当前目录
这只是编译好protoc编译器,还得下载protobuf-3.0.0b2-py2.py3-none-any.whl安装protobuf
执行 pip install protobuf-3.0.0b2-py2.py3-none-any.whl
4、linux下获取运行的程序: ps aux | grep python
kill -9 进程id 终止进程
nohup python getdataset_realtime.py > /data/tmoderec/log/out.log 2>&1 &其实已经在运行py文件了
所以再python getdataset_realtime.py就出错了
查看日志:
tail -n 200 out.log
推荐 tail -f /data/tmoderec/log/out.log
cat out.log | grep '355021061155229' 过滤,实现只看355021061155229的数据
grep '8b4d0306-536b-47c8-864f-0d7e5ae48335' merged_file | less
windows设置为300s , staypoint设置为10个点, mod设置为6,每隔6个点计算机一次附近的站或者进一次交通模式识别。在判断车等站以后若30分钟内判断不出
是bus,then pop该用户。
114.215.242.105 (公)
10.162.66.19 (内)
wiki-rd.chelaile.net.cn
wiki.chelaile.net.cn
密码:
账号:tengbaoqiang
测试机:
实例名称: CXZS1
密码:
IP:
114.215.242.105 (公)
10.162.66.19 (内)
地域:杭州可用区C
系统盘: 40GB(普通云盘,/dev/xvda)
实例: 4 核 8GB(系列 I,标准型 s3)
带宽: 1Mbps(按固定带宽)
操作系统: yg_CentOs6.5_img20151020
数据盘: 1000GB(普通云盘,随实例释放)
有文件夹里面__init__.py那么,外来文件并可以通过import 文件夹.模块导入模块了
pip install python-binary-memcached
cd ~/real_simulate/bin
./start_simulate.sh
cd /root/tbq/formal_src_TranModeRec_construct
[root@CXZS1 ~]# lsof -p 30343
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
python 30343 root cwd DIR 202,1 4096 1315956 /root/tbq/src_TranModeRec_construct
python 30343 root rtd DIR 202,1 4096 2 /
python 30343 root txt REG 202,1 6224577 660916 /opt/Python-2.7/bin/python2.7
数据存放位置:
cd /data/kafka_backup/logs
[root@CXZS1 logs]# ls
collector.log geometry.log lr.log sos.log trans_mode.log
cd real_simulate/data_folder/mergifile 最原始数据
集成测试:
step1: tm模块运行,cd /root/tbq/formal_src_TranModeRec_construct nohup python getdataset_realtime.py > /data/tmoderec/log/out.log 2>&1 &
step2: lr模块运行,cd ~/line_recognizer/ ./start_lr.sh test 40
step3: 模拟发送数据 cd ~/real_simulate/bin ./start_simulate.sh
根据进程id查看全路径
ll /proc/进程id
更改写文件目录的文件:
/root/geo_middlet/bin
./start_bak.sh
获取标签文件
1 cd /data/real_simulate/logs(less simulate_engine.log.2016-03-23)
2 grep 'label_data' simulate_engine.log.2016-03-23 | head -1
结果为:2016-03-23 21:44:36 [INFO] com.yg.simulate.UGCDataDecoder - label_data:1d70d64b-e413-42ea-8cdd-5029dfd133a5_round#0_ts#1458740676226_rand#85746,0526,0
根据格式
[root@CXZS1 logs]# grep 'label_data' simulate_engine.log.2016-03-23 | head -1 | cut -d ' ' -f 6
label_data:1d70d64b-e413-42ea-8cdd-5029dfd133a5_round#0_ts#1458740676226_rand#85746,0526,0
[root@CXZS1 logs]# grep 'label_data' simulate_engine.log.2016-03-23 | head -1 | cut -d ' ' -f 6 | cut -d ':' -f 2
1d70d64b-e413-42ea-8cdd-5029dfd133a5_round#0_ts#1458740676226_rand#85746,0526,0
试试完之后保存文件grep 'label_data' simulate_engine.log.2016-03-23 | cut -d ' ' -f 6 | cut -d ':' -f 2 > labels
移动文件mv labels /data/data_backup/
获取tm_ret
cd /data/kafka_backup/logs/
grep 'data=RET' trans_mode.log.2016-03-23 | awk -F ' ' '{print $4,$5,$6}' > tm_ret
mv tm_ret /data/data_backup/
获取lr_ret
grep 'data=RET' lr.log.2016-03-23 | awk -F ' ' '{print $4,$5,$6}' > lr_ret
mv lr_ret /data/data_backup/
获取send_data
grep 'send_user' simulate_engine.log.2016-03-23 | cut -d '=' -f 2 > send_data
mv send_data /data/data_backup/
查看lr的执行日志
cd /data/lr
tail -f /data/lr/log.start_engine_test
grep 'a用户' log.start_engine_test | cut -d ':' -f 2 > lr_ret -C5加它的话,显示前后各5行 -o只输出相关内容
tail -n 100 as.log 显示后面100行
wc -l te* 看te为头的文件有多少行
修改配置文件,决定发少量数据还是大量数据
cd ~/real_simulate/bin/sim_conf/ vim app_config.properties
改为~/real_simulate/data_folder/下想测试的文件,如testAll, row_10
时间预测: grep ',05210,' 040-UDP.log.20160406.ocp | awk -F ',' '{print $3,$5,$9,$14,$15}' > 05210_0406
grep ',50070,' 040#20160420.log.ocp | awk -F ',' '{print $3,$5,$9,$10,$11}' > 50070_0420
grep ',01130,' 040#20160430.log.ocp > 01130_0430_que
原始数据存放位置:
1、天气数据:/data/SSEngines/WeatherData/cityId
2、高德路况数据:/data/SSEngines/INavHisData/cityId/lineNo/direction
3、原始公交数据:/data/SSEngines/BusTTHisData/cityId/order.zip 只看STN
备份数据格式说明:
GPS数据[编辑]
备份接受时间
Gps GPS 0
车辆编号 19125 1
原始时间 2015-01-18 00:06:02 2
原始经度 117.10667166666667 3
原始纬度 39.233245000000004 4
原始线路编号 560 5
原始方向 0 6
原始速度 -1.0 7
原始角度 -1.0 8
数据类型(0自带、1补偿生成、2用户数据) 0 9
数据状态(非用户数据:0正常、1车未动、2车辆离线;用户数据:-1用户偏离、0无法识别、1识别正确) 0 10
修正线路编号(-,x) 10 11
修正方向(-1,0,1), 1 12
修正时间 2015-01-18 00:06:02 13
修正经度 117.106668569027 14
修正纬度 39.2332430960254 15
上一站距离 29.0 16
下一站距离 817.0 17
下一站站点序号 2 18
下一站站名 泽天下 19
报站序号 1 20
车辆状况(到站/离站) 1 21
修正速度 -1 22
修正角度 -1 23
预期到达下一站时间 -1 24
数据来源(0公交/交委、1WIFI、2乘车模式、3乘客端)
STN数据[编辑]
备份接收时间
STN STN 0
车辆编号 22270 1
原始时间 2015-01-18 06:45:25 2
原始线路编号(-,x) 872 3
原始方向(-1,0,1) -1 4
原始站点序号 14 5
原始车辆状态(0到站/1离站) 0 6
数据类型(0自带、1生成) 0 7
数据状态(0正常、1修正、2忽略) 2 8
修正时间 - 9
修正线路编号 - 10
修正方向 -1 11
修正站点序号 -1 12
修正车辆状态 -1 13
修正规则序列 - 14
数据来源(0公交/交委、1WIFI、2乘车模式、3乘客端)
路况数据格式说明:
每个城市一分钟输出一个结果文档,文档包含所有公交线路的拥堵指数。
拥堵指数输出:0-4,0不可信,1绿色,2黄色,3橙色,4红色。越来拥堵。
单条数据标示某条线路(上下行)的拥堵指数,路况
{
??? "bc": true,?? #不理会
??? "dir": "0",??? #线路方向
??? "dis": 84679,? #线路总长度
??? "hbc": false,? #不理会
??? "id": "040",?? #城市编号
??? "no": "00010",? #线路编号
??? "tis": [
??????? {
??????????? "con": 1,? #置信度,1位可信,有数据计算出来;0位不可信,无车辆数据。
??????????? "idx": 1,? #拥堵指数,0-4,con是0的时候,idx为0;con为1,idx表示1-4
??????????? "pe": 0.00338927, #结束路段percent
??????????? "ps": 0,?? #开始路段percent
??????????? "spd": 14.444445? #速度
??????? },
? ????????????? ?{
??????????? "con": 1,
??????????? "idx": 2,
??????????? "pe": 1,
??????????? "ps": 0.9978271,
??????????? "spd": 5.277778
??????? }
??? ]
}
获取某条线路某个方向的基础数据:
获取lineid curl "http://100.98.168.166/baseservice/client/transformationLineNo.action?cityId=040&lineNo=01130&direction=0"
根据lineid获取基础数据
curl "http://100.98.168.166/baseservice/client/getLineAndStopDetails.action?cityId=040&lineId=020-01130-0"
{
"data": {
"lineId": "020-01130-0",
"direction": 0,
"firstTime": "06:00",
"lastTime": "22:30",
"lineNo": "01130",
"lineName": "113",
"stopsNum": 18,
"stopList": [
{
"order": 1,
"stationId": "020-3709",
"slng": 113.254871,
"slat": 23.100087000000002,
"stationName": "南田路总站",
"lat": 23.103567,
"lng": 113.266661,
"distance": 0.0
},
{
"order": 2,
"stationId": "020-8801",
"slng": 113.2601366,
"slat": 23.1017349,
"stationName": "广东药学院",
"lat": 23.10529127428721,
"lng": 113.27191266636152,
"distance": 565.7976678248924
},
{
"order": 3,
"stationId": "020-8802",
"slng": 113.257414,
"slat": 23.103354,
"stationName": "宝岗大道中",
"lat": 23.106874,
"lng": 113.269194,
"distance": 987.3034144736644
},
{
"order": 4,
"stationId": "020-7966",
"slng": 113.256228,
"slat": 23.105468000000002,
"stationName": "宝岗大道北",
"lat": 23.108968,
"lng": 113.268008,
"distance": 264.8472503302803
},
{
"order": 5,
"stationId": "020-225",
"slng": 113.25700400000001,
"slat": 23.108544,
"stationName": "市红会医院",
"lat": 23.112059,
"lng": 113.268781,
"distance": 486.56142795434585
},
{
"order": 6,
"stationId": "020-206",
"slng": 113.25736875454545,
"slat": 23.119936027272725,
"stationName": "解放南路",
"lat": 23.12345915782163,
"lng": 113.26913582325273,
"distance": 1507.1913558895294
},
{
"order": 7,
"stationId": "020-240",
"slng": 113.257177,
"slat": 23.125023,
"stationName": "解放中路",
"lat": 23.128545,
"lng": 113.268942,
"distance": 567.8547155555111
},
{
"order": 8,
"stationId": "020-1230",
"slng": 113.25681700000001,
"slat": 23.130492999999998,
"stationName": "迎宾馆",
"lat": 23.134012,
"lng": 113.268583,
"distance": 610.1281617058739
},
{
"order": 9,
"stationId": "020-1304",
"slng": 113.256129,
"slat": 23.144023,
"stationName": "越秀公园",
"lat": 23.147537,
"lng": 113.2679,
"distance": 1512.5632990516897
},
{
"order": 10,
"stationId": "020-185",
"slng": 113.25507804285714,
"slat": 23.158348385714284,
"stationName": "桂花岗",
"lat": 23.16185497004349,
"lng": 113.26686465801826,
"distance": 1700.9521334816363
},
{
"order": 11,
"stationId": "020-8978",
"slng": 113.252313,
"slat": 23.160289,
"stationName": "三元里",
"lat": 23.163752,
"lng": 113.26411,
"distance": 360.01412898005583
},
{
"order": 12,
"stationId": "020-1981",
"slng": 113.248827,
"slat": 23.166491,
"stationName": "北站",
"lat": 23.169899,
"lng": 113.260642,
"distance": 817.5293161737463
},
{
"order": 13,
"stationId": "020-969",
"slng": 113.247547,
"slat": 23.170745,
"stationName": "沙涌",
"lat": 23.174135,
"lng": 113.259367,
"distance": 492.27028596812306
},
{
"order": 14,
"stationId": "020-9378",
"slng": 113.24583,
"slat": 23.175193999999998,
"stationName": "棠下(三元里大道)",
"lat": 23.178554,
"lng": 113.25766,
"distance": 525.6605489940048
},
{
"order": 15,
"stationId": "020-8834",
"slng": 113.245604,
"slat": 23.179861,
"stationName": "三元里大道中",
"lat": 23.183221,
"lng": 113.257444,
"distance": 547.365314110797
},
{
"order": 16,
"stationId": "020-8976",
"slng": 113.24642655555557,
"slat": 23.183560255555555,
"stationName": "棠溪",
"lat": 23.186935612801623,
"lng": 113.25826515436717,
"distance": 422.8888268320661
},
{
"order": 17,
"stationId": "020-8975",
"slng": 113.251532,
"slat": 23.187048,
"stationName": "三元里大道北",
"lat": 23.190513,
"lng": 113.263355,
"distance": 660.932219897004
},
{
"order": 18,
"stationId": "020-3701",
"slng": 113.25215275,
"slat": 23.1898815,
"stationName": "棠安路总站",
"lat": 23.19335803747562,
"lng": 113.26397399043499,
"distance": 676.1515644987248
}
],
"startStopName": "南田路总站",
"endStopName": "棠安路总站",
"oppositeList": [
{
"lineId": "020-01130-1",
"direction": 1,
"firstTime": "06:00",
"lastTime": "22:30",
"lineNo": "01130",
"lineName": "113",
"stopsNum": 18,
"startStopName": "棠安路总站",
"endStopName": "南田路总站",
"price": "2元"
}
],
"price": "2元"
}
}[
root@JSTT5
nohup start-STNService_Single.sh 1>/dev/null 2>logs/error_STNEngine${i}.log &
SFLK1
主机: 121.41.116.59
密码:
121.41.8.124 ZSKFK1
121.41.8.128 ZSKFK2
120.26.46.92 ZSKFK3
备注:这里的121.41.8.124等为为zookeeper_hosts,hosts的主机地址,将其映射为ZSKFK1,易于解析(个人理解,不一定正确)
一 、kafka使用代码start
# -*- coding: utf-8 -*-
from pykafka import KafkaClient
import Queue
import logging
logging.basicConfig(
format='%(asctime)s.%(msecs)s:%(name)s:%(thread)d:%(levelname)s:%(process)d:%(message)s',
level=logging.DEBUG
)
client = KafkaClient(zookeeper_hosts="121.41.8.124:2181,121.41.8.128:2181,120.26.46.92:2181/kafka", hosts="121.41.8.124:9082,121.41.8.128:9082,120.26.46.92:9082")
client.topics
topic = client.topics['receiver_kafka_004'] #choose a topic baoqiang_test
'''one way'''
# with topic.get_sync_producer() as producer:
# producer.produce(['test message' + str(i**2) for i in range(4)])
#You can still obtain delivery confirmation for messages, through a queue interface which can be enabled by setting delivery_reports=True
with topic.get_producer(delivery_reports=True) as producer:
count = 0
while True:
count += 1
producer.produce('test msg %d_tbq' %count, partition_key='{}'.format(count))
if count % 10**1 == 0: # adjust this or bring lots of RAM ;)
while True:
try:
msg, exc = producer.get_delivery_report(block=False)
if exc is not None:
print 'Failed to deliver msg {}: {}'.format(
msg.partition_key, repr(exc))
else:
print 'Successfully delivered msg {}'.format(
msg.partition_key)
except Queue.Empty:
break
break
balanced_consumer = topic.get_balanced_consumer(
consumer_group = 'testgroup',
auto_commit_enable = True, #if false ,then no need consumer_group
# connect several zk'
)
messages = [balanced_consumer.consume() for i in range(1)]
# print messages
for message in messages:
if message is not None:
print message.offset, message.value
kafka使用代码end
二、为了更好的用windows下的pip
1.安装setuptool,下载setuptools-1.1.6.tar.gz然后解压,在cmd里面运行python setup.py install
2.安装pip,下载pip-1.4.1.tar.gz,解压,在cmd里面运行python setup.py install
3.电脑-属性-高级-环境变量-path里面添加环境变量C:\Python27\Scripts
4.在cmd 里面运行pip protobuf install
pip install 经常会出错,最好下载个mingw-get-setup.exe安装
三、protobuf使用
下载proto.7z 解压出来将proto.exe放在系统路径下 如: c:python27/
执行以下语句生成.py
protoc -I=$SRC_DIR --python_out=$DST_DIR $SRC_DIR/ruledb_pb2.proto
$SRC_DIR表示*.proto所在文件夹路径
$DST_DIR表示要把python代码生成在哪个文件夹
本例的*.proto文件采用proto.proto
生成proto_pb2.py
出现error:unrecognized syntax identifier "proto3". This parser only recognizes "proto2"
重新下载个protoc.exe protobuf3版本的得以解决
对于以下结构的proto文件,python读写消息:
syntax = "proto3";
package DataProtocol;
option java_package = "com.yg.travel.assistant.proto";
message RawGps
{
int32 version = 1;
int32 userId = 2;
string cityId = 3;
int64 timestamp = 4;
int64 recTimestamp = 5;
double lng = 6;
double lat = 7;
}
message OnBusAction
{
int32 version = 1;
string stopId = 2;
string stopName = 3;
double stopLng = 4;
double stopLat = 5;
int64 time = 6;
}
message TModelOut
{
int32 version = 1;
RawGps rawGps = 2;
int32 round = 3;
int32 label = 4;
double possibility = 5;
OnBusAction onBusAction = 6;
}
以下假设producer和balanced_consumer为已经创建好的句柄
写消息:
tmodelout = proto_pb2.TModelOut()
tmodelout.version = 1
tmodelout.round = 1
tmodelout.label = 1
tmodelout.possibility = 1
gps = tmodelout.rawGps
gps.version = 1
gps.userId = 3
gps.cityId = ‘004’
gps.timestamp = 15552
gps.recTimestamp = 15554
gps.lng = 120.336
gps.lat = 30.557
onbusaction = tmodelout.onBusAction
onbusaction.version = 1
onbusaction.stopId = ''
onbusaction.stopName = ‘’
onbusaction.stopLng = 120.648
onbusaction.stopLat = 30.878
onbusaction.time = 168742
tmodelout_str = tmodelout.SerializeToString()
producer.produce(tmodelout_str)
读消息:while True:
message = balanced_consumer.consume()
rawgps = proto_pb2.RawGps()
rawgps.ParseFromString(message.value)
usrId = rawgps.userId
代码部署到linux服务器:
1、安装xshell,为了更好的直观看界面目录最好再安装xftp
设置好给定的用户名,密码 ,主机号,端口,连接linux服务器
2、执行cd /切换到根目录,在其root目录下创建自己的项目文件夹,在其data目录下创建日志文件
通过执行 python getdataset_realtime.py并可以执行程序
也可以执行nohup python getdataset_realtime.py > /data/tmoderec/log/out.log 2>&1 &
在运行程序的同时将输出日志放到/data/tmoderec/log/out.log中
3、还得重新安装依赖的包,比如thrift,protobuf
如对于protobuf安装:
下载文件https://github.com/google/protobuf/releases,选protobuf-python-3.0.0-beta-2.tar.gz
tar -xzf protobuf-python-3.0.0-beta-2.tar.gz
cd protobuf-3.0.0-beta-2
./configure
make
make check
make install
protoc -I=. --python_out=. ./proto.proto 注意:linux中用.表示当前目录
这只是编译好protoc编译器,还得下载protobuf-3.0.0b2-py2.py3-none-any.whl安装protobuf
执行 pip install protobuf-3.0.0b2-py2.py3-none-any.whl
4、linux下获取运行的程序: ps aux | grep python
kill -9 进程id 终止进程
nohup python getdataset_realtime.py > /data/tmoderec/log/out.log 2>&1 &其实已经在运行py文件了
所以再python getdataset_realtime.py就出错了
查看日志:
tail -n 200 out.log
推荐 tail -f /data/tmoderec/log/out.log
cat out.log | grep '355021061155229' 过滤,实现只看355021061155229的数据
grep '8b4d0306-536b-47c8-864f-0d7e5ae48335' merged_file | less
windows设置为300s , staypoint设置为10个点, mod设置为6,每隔6个点计算机一次附近的站或者进一次交通模式识别。在判断车等站以后若30分钟内判断不出
是bus,then pop该用户。
114.215.242.105 (公)
10.162.66.19 (内)
wiki-rd.chelaile.net.cn
wiki.chelaile.net.cn
密码:
账号:tengbaoqiang
测试机:
实例名称: CXZS1
密码:
IP:
114.215.242.105 (公)
10.162.66.19 (内)
地域:杭州可用区C
系统盘: 40GB(普通云盘,/dev/xvda)
实例: 4 核 8GB(系列 I,标准型 s3)
带宽: 1Mbps(按固定带宽)
操作系统: yg_CentOs6.5_img20151020
数据盘: 1000GB(普通云盘,随实例释放)
有文件夹里面__init__.py那么,外来文件并可以通过import 文件夹.模块导入模块了
pip install python-binary-memcached
cd ~/real_simulate/bin
./start_simulate.sh
cd /root/tbq/formal_src_TranModeRec_construct
[root@CXZS1 ~]# lsof -p 30343
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
python 30343 root cwd DIR 202,1 4096 1315956 /root/tbq/src_TranModeRec_construct
python 30343 root rtd DIR 202,1 4096 2 /
python 30343 root txt REG 202,1 6224577 660916 /opt/Python-2.7/bin/python2.7
数据存放位置:
cd /data/kafka_backup/logs
[root@CXZS1 logs]# ls
collector.log geometry.log lr.log sos.log trans_mode.log
cd real_simulate/data_folder/mergifile 最原始数据
集成测试:
step1: tm模块运行,cd /root/tbq/formal_src_TranModeRec_construct nohup python getdataset_realtime.py > /data/tmoderec/log/out.log 2>&1 &
step2: lr模块运行,cd ~/line_recognizer/ ./start_lr.sh test 40
step3: 模拟发送数据 cd ~/real_simulate/bin ./start_simulate.sh
根据进程id查看全路径
ll /proc/进程id
更改写文件目录的文件:
/root/geo_middlet/bin
./start_bak.sh
获取标签文件
1 cd /data/real_simulate/logs(less simulate_engine.log.2016-03-23)
2 grep 'label_data' simulate_engine.log.2016-03-23 | head -1
结果为:2016-03-23 21:44:36 [INFO] com.yg.simulate.UGCDataDecoder - label_data:1d70d64b-e413-42ea-8cdd-5029dfd133a5_round#0_ts#1458740676226_rand#85746,0526,0
根据格式
[root@CXZS1 logs]# grep 'label_data' simulate_engine.log.2016-03-23 | head -1 | cut -d ' ' -f 6
label_data:1d70d64b-e413-42ea-8cdd-5029dfd133a5_round#0_ts#1458740676226_rand#85746,0526,0
[root@CXZS1 logs]# grep 'label_data' simulate_engine.log.2016-03-23 | head -1 | cut -d ' ' -f 6 | cut -d ':' -f 2
1d70d64b-e413-42ea-8cdd-5029dfd133a5_round#0_ts#1458740676226_rand#85746,0526,0
试试完之后保存文件grep 'label_data' simulate_engine.log.2016-03-23 | cut -d ' ' -f 6 | cut -d ':' -f 2 > labels
移动文件mv labels /data/data_backup/
获取tm_ret
cd /data/kafka_backup/logs/
grep 'data=RET' trans_mode.log.2016-03-23 | awk -F ' ' '{print $4,$5,$6}' > tm_ret
mv tm_ret /data/data_backup/
获取lr_ret
grep 'data=RET' lr.log.2016-03-23 | awk -F ' ' '{print $4,$5,$6}' > lr_ret
mv lr_ret /data/data_backup/
获取send_data
grep 'send_user' simulate_engine.log.2016-03-23 | cut -d '=' -f 2 > send_data
mv send_data /data/data_backup/
查看lr的执行日志
cd /data/lr
tail -f /data/lr/log.start_engine_test
grep 'a用户' log.start_engine_test | cut -d ':' -f 2 > lr_ret -C5加它的话,显示前后各5行 -o只输出相关内容
tail -n 100 as.log 显示后面100行
wc -l te* 看te为头的文件有多少行
修改配置文件,决定发少量数据还是大量数据
cd ~/real_simulate/bin/sim_conf/ vim app_config.properties
改为~/real_simulate/data_folder/下想测试的文件,如testAll, row_10
时间预测: grep ',05210,' 040-UDP.log.20160406.ocp | awk -F ',' '{print $3,$5,$9,$14,$15}' > 05210_0406
grep ',50070,' 040#20160420.log.ocp | awk -F ',' '{print $3,$5,$9,$10,$11}' > 50070_0420
grep ',01130,' 040#20160430.log.ocp > 01130_0430_que
原始数据存放位置:
1、天气数据:/data/SSEngines/WeatherData/cityId
2、高德路况数据:/data/SSEngines/INavHisData/cityId/lineNo/direction
3、原始公交数据:/data/SSEngines/BusTTHisData/cityId/order.zip 只看STN
备份数据格式说明:
GPS数据[编辑]
备份接受时间
Gps GPS 0
车辆编号 19125 1
原始时间 2015-01-18 00:06:02 2
原始经度 117.10667166666667 3
原始纬度 39.233245000000004 4
原始线路编号 560 5
原始方向 0 6
原始速度 -1.0 7
原始角度 -1.0 8
数据类型(0自带、1补偿生成、2用户数据) 0 9
数据状态(非用户数据:0正常、1车未动、2车辆离线;用户数据:-1用户偏离、0无法识别、1识别正确) 0 10
修正线路编号(-,x) 10 11
修正方向(-1,0,1), 1 12
修正时间 2015-01-18 00:06:02 13
修正经度 117.106668569027 14
修正纬度 39.2332430960254 15
上一站距离 29.0 16
下一站距离 817.0 17
下一站站点序号 2 18
下一站站名 泽天下 19
报站序号 1 20
车辆状况(到站/离站) 1 21
修正速度 -1 22
修正角度 -1 23
预期到达下一站时间 -1 24
数据来源(0公交/交委、1WIFI、2乘车模式、3乘客端)
STN数据[编辑]
备份接收时间
STN STN 0
车辆编号 22270 1
原始时间 2015-01-18 06:45:25 2
原始线路编号(-,x) 872 3
原始方向(-1,0,1) -1 4
原始站点序号 14 5
原始车辆状态(0到站/1离站) 0 6
数据类型(0自带、1生成) 0 7
数据状态(0正常、1修正、2忽略) 2 8
修正时间 - 9
修正线路编号 - 10
修正方向 -1 11
修正站点序号 -1 12
修正车辆状态 -1 13
修正规则序列 - 14
数据来源(0公交/交委、1WIFI、2乘车模式、3乘客端)
路况数据格式说明:
每个城市一分钟输出一个结果文档,文档包含所有公交线路的拥堵指数。
拥堵指数输出:0-4,0不可信,1绿色,2黄色,3橙色,4红色。越来拥堵。
单条数据标示某条线路(上下行)的拥堵指数,路况
{
??? "bc": true,?? #不理会
??? "dir": "0",??? #线路方向
??? "dis": 84679,? #线路总长度
??? "hbc": false,? #不理会
??? "id": "040",?? #城市编号
??? "no": "00010",? #线路编号
??? "tis": [
??????? {
??????????? "con": 1,? #置信度,1位可信,有数据计算出来;0位不可信,无车辆数据。
??????????? "idx": 1,? #拥堵指数,0-4,con是0的时候,idx为0;con为1,idx表示1-4
??????????? "pe": 0.00338927, #结束路段percent
??????????? "ps": 0,?? #开始路段percent
??????????? "spd": 14.444445? #速度
??????? },
? ????????????? ?{
??????????? "con": 1,
??????????? "idx": 2,
??????????? "pe": 1,
??????????? "ps": 0.9978271,
??????????? "spd": 5.277778
??????? }
??? ]
}
获取某条线路某个方向的基础数据:
获取lineid curl "http://100.98.168.166/baseservice/client/transformationLineNo.action?cityId=040&lineNo=01130&direction=0"
根据lineid获取基础数据
curl "http://100.98.168.166/baseservice/client/getLineAndStopDetails.action?cityId=040&lineId=020-01130-0"
{
"data": {
"lineId": "020-01130-0",
"direction": 0,
"firstTime": "06:00",
"lastTime": "22:30",
"lineNo": "01130",
"lineName": "113",
"stopsNum": 18,
"stopList": [
{
"order": 1,
"stationId": "020-3709",
"slng": 113.254871,
"slat": 23.100087000000002,
"stationName": "南田路总站",
"lat": 23.103567,
"lng": 113.266661,
"distance": 0.0
},
{
"order": 2,
"stationId": "020-8801",
"slng": 113.2601366,
"slat": 23.1017349,
"stationName": "广东药学院",
"lat": 23.10529127428721,
"lng": 113.27191266636152,
"distance": 565.7976678248924
},
{
"order": 3,
"stationId": "020-8802",
"slng": 113.257414,
"slat": 23.103354,
"stationName": "宝岗大道中",
"lat": 23.106874,
"lng": 113.269194,
"distance": 987.3034144736644
},
{
"order": 4,
"stationId": "020-7966",
"slng": 113.256228,
"slat": 23.105468000000002,
"stationName": "宝岗大道北",
"lat": 23.108968,
"lng": 113.268008,
"distance": 264.8472503302803
},
{
"order": 5,
"stationId": "020-225",
"slng": 113.25700400000001,
"slat": 23.108544,
"stationName": "市红会医院",
"lat": 23.112059,
"lng": 113.268781,
"distance": 486.56142795434585
},
{
"order": 6,
"stationId": "020-206",
"slng": 113.25736875454545,
"slat": 23.119936027272725,
"stationName": "解放南路",
"lat": 23.12345915782163,
"lng": 113.26913582325273,
"distance": 1507.1913558895294
},
{
"order": 7,
"stationId": "020-240",
"slng": 113.257177,
"slat": 23.125023,
"stationName": "解放中路",
"lat": 23.128545,
"lng": 113.268942,
"distance": 567.8547155555111
},
{
"order": 8,
"stationId": "020-1230",
"slng": 113.25681700000001,
"slat": 23.130492999999998,
"stationName": "迎宾馆",
"lat": 23.134012,
"lng": 113.268583,
"distance": 610.1281617058739
},
{
"order": 9,
"stationId": "020-1304",
"slng": 113.256129,
"slat": 23.144023,
"stationName": "越秀公园",
"lat": 23.147537,
"lng": 113.2679,
"distance": 1512.5632990516897
},
{
"order": 10,
"stationId": "020-185",
"slng": 113.25507804285714,
"slat": 23.158348385714284,
"stationName": "桂花岗",
"lat": 23.16185497004349,
"lng": 113.26686465801826,
"distance": 1700.9521334816363
},
{
"order": 11,
"stationId": "020-8978",
"slng": 113.252313,
"slat": 23.160289,
"stationName": "三元里",
"lat": 23.163752,
"lng": 113.26411,
"distance": 360.01412898005583
},
{
"order": 12,
"stationId": "020-1981",
"slng": 113.248827,
"slat": 23.166491,
"stationName": "北站",
"lat": 23.169899,
"lng": 113.260642,
"distance": 817.5293161737463
},
{
"order": 13,
"stationId": "020-969",
"slng": 113.247547,
"slat": 23.170745,
"stationName": "沙涌",
"lat": 23.174135,
"lng": 113.259367,
"distance": 492.27028596812306
},
{
"order": 14,
"stationId": "020-9378",
"slng": 113.24583,
"slat": 23.175193999999998,
"stationName": "棠下(三元里大道)",
"lat": 23.178554,
"lng": 113.25766,
"distance": 525.6605489940048
},
{
"order": 15,
"stationId": "020-8834",
"slng": 113.245604,
"slat": 23.179861,
"stationName": "三元里大道中",
"lat": 23.183221,
"lng": 113.257444,
"distance": 547.365314110797
},
{
"order": 16,
"stationId": "020-8976",
"slng": 113.24642655555557,
"slat": 23.183560255555555,
"stationName": "棠溪",
"lat": 23.186935612801623,
"lng": 113.25826515436717,
"distance": 422.8888268320661
},
{
"order": 17,
"stationId": "020-8975",
"slng": 113.251532,
"slat": 23.187048,
"stationName": "三元里大道北",
"lat": 23.190513,
"lng": 113.263355,
"distance": 660.932219897004
},
{
"order": 18,
"stationId": "020-3701",
"slng": 113.25215275,
"slat": 23.1898815,
"stationName": "棠安路总站",
"lat": 23.19335803747562,
"lng": 113.26397399043499,
"distance": 676.1515644987248
}
],
"startStopName": "南田路总站",
"endStopName": "棠安路总站",
"oppositeList": [
{
"lineId": "020-01130-1",
"direction": 1,
"firstTime": "06:00",
"lastTime": "22:30",
"lineNo": "01130",
"lineName": "113",
"stopsNum": 18,
"startStopName": "棠安路总站",
"endStopName": "南田路总站",
"price": "2元"
}
],
"price": "2元"
}
}[
root@JSTT5
nohup start-STNService_Single.sh 1>/dev/null 2>logs/error_STNEngine${i}.log &
SFLK1
主机: 121.41.116.59
密码: