Streamset消费kafka数据到hbase(复合rowkey):
管道创建及配置:
kafka:
js:
json二次开发:
一,标准的json
解析之前:
{“tagname”:“H_ZG_DE_Per1_pExtr2HPH2Abs”,“time”:“2021-05-19 11:27:46”,“value”:“0.000”,“status”:“0”}
解析之后:
{“rowkey”:“H_ZG_DE_Per1_pExtr2HPH2Abs_2021-05-19 11:27:46”,“value”:“0.000”,“status”:“0”}
主键拼接:
var records = sdc.records;
for(var i = 0; i < records.length; i++) {
try {
var newMap = sdc.createMap(false);
// 根据原先的record创建一个新的record对象
var newRecord = sdc.createRecord(records[i].sourceId + ‘:newRecordId’);
// 将这个对象的value属性置空
newRecord.value = {};
// apply_id;car_no;car_id;date_created;date_end
// 把记录中需要的字段提取出来封装到新map中
newMap[‘rowkey’]=records[i].value[‘tagname’]+’_’+records[i].value[‘time’];
newMap[‘value’]=records[i].value[‘value’];
newMap[‘status’]=records[i].value[‘status’];
newRecord.value = newMap
sdc.output.write(newRecord);
} catch (e) {
// 这里可以进行异常处理
// Send record to error
sdc.error.write(records[i], e);
}
}
hbase:
最后hbase数据:
streamset状态:
二,标准的jsonarray
[{“tagname”:“MonthlyPowerGeneration”,“time”:“2021-05-30 10:55:51”,“status”:0,“value”:“0.005”},{“tagname”:“Calculation”,“time”:“2021-05-30 10:55:51”,“status”:0,“value”:“4.496”},{“tagname”:“Calculation”,“time”:“2021-05-30 10:55:51”,“status”:0,“value”:“0.48”}]
转换后:
一行变多行:
{“tagname”:“MonthlyPowerGeneration”,“time”:“2021-05-30 10:55:51”,“status”:0,“value”:“0.005”}
{“tagname”:“Calculation”,“time”:“2021-05-30 10:55:51”,“status”:0,“value”:“4.496”}
{“tagname”:“Calculation”,“time”:“2021-05-30 10:55:51”,“status”:0,“value”:“0.48”}
StreamSets管道配置:
Kafka:
JavaScripe:
js解析代码:
var records=sdc.records;
for(var i = 0; i < records.length; i++) {
try {
var result = records[i].value;
for(var j = 0; j < result.length; j++) {
var newRecord = sdcFunctions.createRecord(true);
var resultMap = sdcFunctions.createMap(true);
resultMap.rowkey = result[j].tagname+'_'+result[j].time;
resultMap.time = result[j].time;
resultMap.status = result[j].status;
resultMap.value = result[j].value;
newRecord.value=resultMap;
log.info("-------------" + newRecord.value[‘rowkey’]);
log.info("-------------" + result[j].tagname);
log.info("-------------" + result[j].time);
log.info("-------------" + result[j].status);
log.info("-------------" + result[j].value);
output.write(newRecord);
}
} catch (e) {
error.write(records[i], e);
}
}
Hbase:
指定rowkey
最后导入kafka测试数据:
[{“tagname”:“MonthlyPowerGeneration”,“time”:“2021-05-30 10:55:51”,“status”:0,“value”:“0.005”},{“tagname”:“Calculation”,“time”:“2021-05-30 10:55:51”,“status”:0,“value”:“4.496”},{“tagname”:“Calculation”,“time”:“2021-05-30 10:55:51”,“status”:0,“value”:“0.48”}]
查看日志
查看hbase:
查看StreamSets状态: