简述
在上篇文章中[Apache Druid 0.18.1 compact (合并,压缩) segements] 说明了使用 compact 任务进行合并,压缩segement。
基本上的合并任务compact都可以满足。
但如果遇到比较复杂的需要时,例如需要根据维度,metrics 进行重建时,compact就无法满足需要了。
hadoop 重建任务
{
"type":"index_hadoop",
"spec":{
"dataSchema":{
"dataSource":"loginlog_1h",
"parser":{
"type":"hadoopyString",
"parseSpec":{
"format":"json",
"timestampSpec":{
"column":"timeStamp",
"format":"auto"
},
"dimensionsSpec": {
"dimensions": [
"realm_id",
"app_id",
"team_id",
"sales_team",
"member_id",
"member_name"
],
"dimensionExclusions": [
"timeStamp",
"value"
]
}
}
},
"metricsSpec": [
{
"type": "count",
"name": "count"
}
],
"granularitySpec":{
"type":"uniform",
"segmentGranularity":"DAY",
"queryGranularity": "HOUR"
}
},
"ioConfig":{
"type":"hadoop",
"inputSpec":{
"type":"dataSource",
"ingestionSpec":{
"dataSource":"loginlog_1h",
"intervals":[
"2019-01-01/2020-01-01"
]
}
}
},
"tuningConfig":{
"type":"hadoop",
"maxRowsInMemory":500000,
"partitionsSpec":{
"type":"hashed",
"targetPartitionSize":5000000
},
"numBackgroundPersistThreads":1,
"forceExtendableShardSpecs":true,
"jobProperties":{
"mapreduce.job.local.dir":"/home/druid/mapred",
"mapreduce.cluster.local.dir":"/home/mapred",
"mapred.job.map.memory.mb":4300,
"mapreduce.reduce.memory.mb":4300
}
}
}
}
hadoop 任务 与compact 任务对比
在较小的任务,不涉及数据重建的情况下建议使用compact.如果数据量比较大则建议使用hadoop任务。
任务提交地址
http://overlord:port/druid/indexer/v1/task
参考资料
https://druid.apache.org/docs/latest/ingestion/hadoop.html