Streamsets提供了丰富的restful接口,通过swagger方式提供,使用很方便,功能很强大。
地址: http://localhost:18630/collector/restapi
这里我们在对Streamsets做二次开发的时候,需要解析返回结果并保存到数据库中,这里把几个有代表性的json解析方式讲解一下。
1、查询所有任务的运行状态
在manager分组下, 不需要参数,直接通过GET /rest/v1/pipelines/status
返回结果如下:
{
"addNewPipeline": {
"pipelineId": "addNewPipeline",
"rev": "0",
"user": "admin",
"status": "EDITED",
"message": "Pipeline edited",
"timeStamp": 1556437391339,
"attributes": {
"IS_REMOTE_PIPELINE": false
},
"executionMode": "STANDALONE",
"metrics": null,
"retryAttempt": 0,
"nextRetryTimeStamp": 0,
"name": "addNewPipeline"
},
"Names2PgStage1wholecopy8396290d-163e-4ec6-9371-5f09baf29a23": {
"pipelineId": "Names2PgStage1wholecopy8396290d-163e-4ec6-9371-5f09baf29a23",
"rev": "0",
"user": "admin",
"status": "STOPPED",
"message": "Stopped while the pipeline was in RETRY state",
"timeStamp": 1560759731119,
"attributes": {
"IS_REMOTE_PIPELINE": false,
"RUNTIME_PARAMETERS": null,
"INTERCEPTOR_CONFIGS": []
},
"executionMode": "STANDALONE",
"metrics": null,
"retryAttempt": 0,
"nextRetryTimeStamp": 0,
"name": "Names2PgStage1wholecopy8396290d-163e-4ec6-9371-5f09baf29a23"
}
}
通过观察可以发现,这个json是有规律的,格式是固定的,所以解析并不难。
JSONObject jsonObject = JSONObject.parseObject(result);
Collection<Object> list=jsonObject.values();
Iterator<Object> it= list.iterator();
while(it.hasNext()){
Object obj = it.next();
Map m=JSONObject.parseObject(obj.toString(),Map.class);
String pipelineId=m.get("pipelineId").toString();
Tasks tasks=new Tasks();
tasks.setPipeline_id(pipelineId);
List<Tasks> lists=tasksService.selectByTask(tasks);
if(lists!=null && lists.size()>0){
Tasks task=lists.get(0);
task.setTask_status(m.get("status").toString());
task.setCommitted_offsets(m.get("message")==null?""
:m.get("message").toString());
tasksService.editTask(task);
}
}
2、查询所有日志
在system分组下,参数可选,直接GET /rest/v1/system/logs?endingOffset=-1
返回结果如下:
[
{
"timestamp": "2019-06-20 09:04:48,875",
"s-user": "*admin",
"s-entity": "hdp/hdpd32e5f16-4d10-439f-b5ad-4fbc29c78211",
"s-runner": "",
"thread": "Spool Directory Runner - 0",
"s-stage": "",
"severity": "DEBUG",
"category": "ProductionSourceOffsetTracker",
"message": "Saving offset {$com.streamsets.pipeline.stage.origin.spooldir.SpoolDirSource.offset.version$=1, SSTableExport-1.0-SNAPSHOT.jar={\"POS\":\"-1\"}} for pipeline hdpd32e5f16-4d10-439f-b5ad-4fbc29c78211"
},
{
"timestamp": "2019-06-20 09:04:48,877",
"s-user": "*admin",
"s-entity": "hdp/hdpd32e5f16-4d10-439f-b5ad-4fbc29c78211",
"s-runner": "",
"thread": "Spool Directory Runner - 0",
"s-stage": "",
"severity": "DEBUG",
"category": "DirectorySpooler",
"message": "Polling for file, waiting '60000' ms"
}
]
通过观察发现这个json也是有固定格式的,遍历解析就可以了
JSONArray jsonArray = JSONArray.parseArray(result);
for (Object obj : jsonArray) {
JSONObject jsonObject = (JSONObject) obj;
AlertLogs alertLogs=new AlertLogs();
alertLogs.setSdc_id(sdc.getSdc_id());
alertLogs.setSdc_name(sdc.getSdc_name());
alertLogs.setSdc_ip(sdc.getSdc_ip());
alertLogs.setSdc_port(sdc.getSdc_port());
alertLogs.setLog_time(jsonObject.getDate("timestamp"));
alertLogs.setMessage(jsonObject.getString("message"));
alertLogs.setLog_level(jsonObject.getString("severity"));
String entity=jsonObject.getString("s-entity");
if(entity.contains("/")){
String[] arr=entity.split("/");
StringBuffer sb=new StringBuffer();
for(int i=0;i<arr.length-1;i++){
sb.append(arr[i]);
}
alertLogs.setPipeline_title(sb.toString());
alertLogs.setPipeline_id(arr[arr.length-1]);
}
alertLogsService.insert(alertLogs);
}
3、查询streamsets的jmx信息
这个接口是在页面中调用的,swagger页面没有列出,地址 GET /rest/v1/system/jmx
返回结果如下,结果特别长,这里列举一部分:
{
"beans": [
{
"name": "metrics:name=sdc.pipeline.testcopya2cc2ed4-bd24-441a-9b09-6a2fdbb4b082.0.pipeline.batchInputRecords.meter",
"modelerType": "com.codahale.metrics.jmx.JmxReporter$JmxMeter",
"RateUnit": "events/second",
"OneMinuteRate": 1.5702124778961105e-86,
"FiveMinuteRate": 1.5921483231199352e-14,
"FifteenMinuteRate": 0.015958339755563485,
"MeanRate": 6.420727951968242,
"Count": 79884
},
{
"name": "metrics:name=jvm.threads.new.count",
"modelerType": "com.codahale.metrics.jmx.JmxReporter$JmxGauge",
"Value": 0
},
{
"name": "metrics:name=sdc.pipeline.loalToTrash.0.stage.Directory_01.errorRecords.meter",
"modelerType": "com.codahale.metrics.jmx.JmxReporter$JmxMeter",
"RateUnit": "events/second",
"OneMinuteRate": 0,
"FiveMinuteRate": 0,
"FifteenMinuteRate": 0,
"MeanRate": 0,
"Count": 0
},
{
"name": "metrics:name=jvm.memory.pools.Metaspace.usage",
"modelerType": "com.codahale.metrics.jmx.JmxReporter$JmxGauge",
"Value": 0.9130996604846318
},
{
"name": "metrics:name=sdc.pipeline.loalToTrash.0.stage.Directory_01.errorRecords.histogramM5",
"modelerType": "com.codahale.metrics.jmx.JmxReporter$JmxHistogram",
"50thPercentile": 0,
"SnapshotSize": 1028,
"Max": 0,
"Mean": 0,
"Min": 0,
"75thPercentile": 0,
"95thPercentile": 0,
"98thPercentile": 0,
"99thPercentile": 0,
"999thPercentile": 0,
"StdDev": 0,
"Count": 2749
}
]
通过观察发现这个json格式不统一,没有规律,所以只能分别遍历,通过name判断是哪些指标,然后对应解析,这里列举了一部分代码:
if(name.equals(JmxEnum.OPERATING_SYSTEM)){
metricSdcJmx.setMetric_name(JmxMetricNameEnum.ProcessCpuLoad);
metricSdcJmx.setUsage(jsonObject.getDouble("ProcessCpuLoad"));
metricSdcJmxService.insert(metricSdcJmx);
}else if(name.equals(JmxEnum.THREADING)){
metricSdcJmx.setMetric_name(JmxMetricNameEnum.ThreadCount);
metricSdcJmx.setVcount(jsonObject.getInteger("ThreadCount"));
metricSdcJmxService.insert(metricSdcJmx);
metricSdcJmx.setMetric_name(JmxMetricNameEnum.PeakThreadCount);
metricSdcJmx.setVcount(jsonObject.getInteger("PeakThreadCount"));
metricSdcJmxService.insert(metricSdcJmx);
metricSdcJmx.setMetric_name(JmxMetricNameEnum.TotalStartedThreadCount);
metricSdcJmx.setVcount(jsonObject.getInteger("TotalStartedThreadCount"));
metricSdcJmxService.insert(metricSdcJmx);
}else if(name.equals(JmxEnum.CLASS_LOADING)){
metricSdcJmx.setMetric_name(JmxMetricNameEnum.LoadedClassCount);
metricSdcJmx.setVcount(jsonObject.getInteger("LoadedClassCount"));
metricSdcJmxService.insert(metricSdcJmx);
metricSdcJmx.setMetric_name(JmxMetricNameEnum.UnloadedClassCount);
metricSdcJmx.setVcount(jsonObject.getInteger("UnloadedClassCount"));
metricSdcJmxService.insert(metricSdcJmx);
metricSdcJmx.setMetric_name(JmxMetricNameEnum.TotalLoadedClassCount);
metricSdcJmx.setVcount(jsonObject.getInteger("TotalLoadedClassCount"));
metricSdcJmxService.insert(metricSdcJmx);
}
4、查询指定任务的运行metric
在manager分组下,需要pipelineId参数,GET /rest/v1/pipeline/{pipelineId}/metrics
返回结果如下:
//主要的内容
{
"version": "string",
"gauges": {},
"counters": {},
"histograms": {},
"meters": {},
"timers": {}
}
//详细内容,部分
{
"version": "4.0.0",
"gauges": {
"RuntimeStatsGauge.gauge": {
"value": {
"batchCount": 95,
"idleBatchCount": 0,
"timeOfLastReceivedRecord": 1557296520561,
"lastBatchInputRecordsCount": 0,
"lastBatchOutputRecordsCount": 0,
"lastBatchErrorRecordsCount": 0,
"lastBatchErrorMessagesCount": 0,
"totalRunners": 1,
"availableRunners": 1
}
},
。。。。
"counters": {
"custom.Directory_01.pending.files.0.counter": {
"count": 0
},
。。
"histograms": {
"pipeline.errorRecordsPerBatch.histogramM5": {
"count": 95,
"max": 0,
"mean": 0,
"min": 0,
"p50": 0,
"p75": 0,
"p95": 0,
"p98": 0,
"p99": 0,
"p999": 0,
"stddev": 0
},
。。。。
},
"meters": {
"custom.Directory_01.spoolQueue.0.meter": {
"count": 90,
"m1_rate": 11.866331343607989,
"m5_rate": 16.560799463327818,
"m15_rate": 17.50688058809427,
"m30_rate": 17.751728101390487,
"h1_rate": 17.875433024825686,
"h6_rate": 17.97917871834381,
"h12_rate": 17.989586346833782,
"h24_rate": 17.994792420114447,
"mean_rate": 2.9818143805481996,
"units": "events/second"
},。。。
"timers": {
"pipeline.batchProcessing.timer": {
"count": 95,
"max": 5.009,
"mean": 0.2708105263157895,
"min": 0.003,
"p50": 0.006,
"p75": 0.009000000000000001,
"p95": 5.005,
"p98": 5.007160000000001,
"p99": 5.009,
"p999": 5.009,
"stddev": 1.1221307457361267,
"m15_rate": 17.512359692670998,
"m1_rate": 11.9344832175679,
"m5_rate": 16.576790580401955,
"mean_rate": 3.1389838868870585,
"duration_units": "seconds",
"rate_units": "calls/second"
},
。。。
}
通过观察发现,这个json也是不统一,没有规律,但是整体上分了几个部分,所以通过解析后,分别解析不同的部分
JSONObject gauges= jsonObject.getJSONObject("gauges");
Iterator gauges_it= gauges.entrySet().iterator();
while(gauges_it.hasNext()) {
Map.Entry<String, JSONObject> entry = (Map.Entry<String, JSONObject>) gauges_it.next();
String key = entry.getKey();
JSONObject json = entry.getValue();
if (key.startsWith("RuntimeStatsGauge")) {
JSONObject gauge=json.getJSONObject("value");
MetricsPipeline metricsPipeline=new MetricsPipeline();
metricsPipeline.setCollect_time(collect_time);
metricsPipeline.setPipeline_id(pipelineId);
metricsPipeline.setMetric_name(key+".lastBatchErrorMessagesCount");
metricsPipeline.setVcount(gauge.getInteger("lastBatchErrorMessagesCount"));
metricsPipelineService.insert(metricsPipeline);
metricsPipeline.setMetric_name(key+".timeOfLastReceivedRecord");
metricsPipeline.setVcount(gauge.getInteger("timeOfLastReceivedRecord"));
metricsPipelineService.insert(metricsPipeline);
metricsPipeline.setMetric_name(key+".idleBatchCount");
metricsPipeline.setVcount(gauge.getInteger("idleBatchCount"));
metricsPipelineService.insert(metricsPipeline);
metricsPipeline.setMetric_name(key+".lastBatchErrorRecordsCount");
metricsPipeline.setVcount(gauge.getInteger("lastBatchErrorRecordsCount"));
metricsPipelineService.insert(metricsPipeline);
metricsPipeline.setMetric_name(key+".lastBatchOutputRecordsCount");
metricsPipeline.setVcount(gauge.getInteger("lastBatchOutputRecordsCount"));
metricsPipelineService.insert(metricsPipeline);
metricsPipeline.setMetric_name(key+".availableRunners");
metricsPipeline.setVcount(gauge.getInteger("availableRunners"));
metricsPipelineService.insert(metricsPipeline);
metricsPipeline.setMetric_name(key+".lastBatchInputRecordsCount");
metricsPipeline.setVcount(gauge.getInteger("lastBatchInputRecordsCount"));
metricsPipelineService.insert(metricsPipeline);
metricsPipeline.setMetric_name(key+".totalRunners");
metricsPipeline.setVcount(gauge.getInteger("totalRunners"));
metricsPipelineService.insert(metricsPipeline);
metricsPipeline.setMetric_name(key+".batchCount");
metricsPipeline.setVcount(gauge.getInteger("batchCount"));
metricsPipelineService.insert(metricsPipeline);
}else if(key.startsWith("custom")){
JSONObject gauge=json.getJSONObject("value");
MetricsPipeline metricsPipeline=new MetricsPipeline();
metricsPipeline.setCollect_time(collect_time);
metricsPipeline.setPipeline_id(pipelineId);
metricsPipeline.setMetric_name(key);
metricsPipeline.setStatus(gauge.getString("Status"));
metricsPipeline.setCurrent_stage(gauge.getString("Current Offset"));
metricsPipeline.setThread_name(gauge.getString("Thread Name"));
metricsPipeline.setCurrent_file(gauge.getString("Current File"));
metricsPipelineService.insert(metricsPipeline);
}