实时任务需要重跑时,有时要往前追一个时间的数据,而且要指定到某个时间点。但是kafka本身没有提供相应的api(也许有?)。所以这里有个java的脚本程序,可以满足这个小需求。
大体的思路是获得指定topic队列中最早的offset,最新的offset,然后分10区间,在这10个区间中,根据前后的offset找到timestamp,判断指定的timestamp在哪个区间,继续在该区间中继续寻找。以此类推。
代码需要传四个参数
分别是kafkaBootstrapServers,每次请求kafka的间隔(毫秒),kafka的超时时间(毫秒),topic,指定的timestamp
命令样例
java -cp tools.jar com.yhl.offset_by_timestamp.SeekOffsetProd \
xxxx:9092 \
1000 \
1000 \
topicName \
1667404800000
public class SeekOffsetProd {
private static SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
private static SimpleDateFormat sdf2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
public static void main(String[] args) throws Exception {
String kafkaBootstrapServers = args[0];
Long sleepTime = Long.parseLong(args[1]);
Long timeout = Long.parseLong(args[2]);
String topic = args[3];
Long fetchDataTime = Long.parseLong(args[4]);
Properties props = new Properties();
props.put("bootstrap.servers", kafkaBootstrapServers);
props.put("group.id", "test1");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
try {
System.out.println(offsetByTimestamp(props,topic,fetchDataTime,timeout,sleepTime));
}catch (Exception e){
System.out.println("最终结果即上述最接近指定时间的timestamp");
}
}
public static String offsetByTimestamp(Properties props, String topicName,Long timestamp,Long timeout,Long sleepTime) throws Exception{
String begin = seekToBeginOffset(props, topicName);
String end = seekToEndOffset(props, topicName);
Long begin_offset = JSON.parseObject(begin).getLong("offset");
Long end_offset = JSON.parseObject(end).getLong("offset");
System.out.println("开始有值 :" + begin_offset +" , "+ end_offset);
Map<String,Long> jsonMap = new HashMap<>();
jsonMap.put("begin_offset",begin_offset);
jsonMap.put("end_offset",end_offset);
String jsonstr = JSON.toJSONString(jsonMap);
String aa = "";
while(true){
aa = aa(jsonstr, timestamp, props, topicName,timeout,sleepTime);
Long beginTimestamp = JSON.parseObject(aa).getLong("begin_timestamp");
Long endTimestamp = JSON.parseObject(aa).getLong("end_timestamp");
Long beginOffset = JSON.parseObject(aa).getLong("begin_offset");
Long endOffset = JSON.parseObject(aa).getLong("end_offset");
String beginDate = JSON.parseObject(aa).getString("begin_date");
String endDate = JSON.parseObject(aa).getString("end_date");
Long diff = (Long)(endOffset - beginOffset);
if(diff == 2){
System.out.println("最终结果:");
System.out.println("\n");
if(Math.abs(timestamp - beginTimestamp) <= Math.abs(timestamp - endTimestamp)){
Map<String,String> result = new HashMap<>();
result.put("date",beginDate);
result.put("timestamp",String.valueOf(beginTimestamp));
result.put("offset",String.valueOf(beginOffset));
System.out.println(JSON.toJSONString(result));
}else{
Map<String,String> result = new HashMap<>();
result.put("date",endDate);
result.put("timestamp",String.valueOf(endTimestamp));
result.put("offset",String.valueOf(endOffset));
System.out.println(JSON.toJSONString(result));
}
break;
}
jsonstr = aa;
}
return "";
}
public static String aa (String jsonstr, Long timestamp, Properties props,String topicName,Long timeout,Long sleepTime) throws Exception{
long begin_offset = JSON.parseObject(jsonstr).getLong("begin_offset");
long end_offset = JSON.parseObject(jsonstr).getLong("end_offset");
//这里要加上余数操作
long totalDuration = (long)(end_offset - begin_offset);
long yuNum = totalDuration % 10;
long duration = (long)((long)(totalDuration - yuNum) /10);
System.out.println("================================================================================");
System.out.println("totalDuration = " + totalDuration);
System.out.println(" yuNum = " + yuNum);
System.out.println(" duration = " + duration);
for (int i = 0; i < 10; i++) {
System.out.println(i);
Long timestamp_1 = 0L;
Long timestamp_2 = 0L;
int pre_index = i;
int suff_index = i+1;
if(suff_index != 10){
timestamp_1 = JSON.parseObject(seekOffset(props, topicName,(long)((long)duration *pre_index + begin_offset ),timeout)).getLong("timestamp");
timestamp_2 = JSON.parseObject(seekOffset(props, topicName,(long)((long)duration *suff_index + begin_offset ),timeout)).getLong("timestamp");
}else{
timestamp_1 = JSON.parseObject(seekOffset(props, topicName,(long)((long)duration *pre_index + begin_offset ),timeout)).getLong("timestamp");
timestamp_2 = JSON.parseObject(seekOffset(props, topicName,(long)((long)duration *suff_index + begin_offset ) + yuNum,timeout)).getLong("timestamp");
}
System.out.println(" pre_index " + (long)((long)duration *pre_index + begin_offset));
System.out.println(" suff_index " + (long)((long)duration *suff_index + begin_offset));
System.out.println(" pre_timestamp " + timestamp_1 +" | "+sdf2.format(new Date(timestamp_1)));
System.out.println("suff_timestamp " + timestamp_2 +" | "+sdf2.format(new Date(timestamp_2)));
if(timestamp_1 <= timestamp && timestamp <= timestamp_2){
System.out.println(
sdf2.format(new Date(timestamp_1)) +" | "
+sdf2.format(new Date(timestamp)) +" | "
+ sdf2.format(new Date(timestamp_2)));
Map<String,String> resultJsonstr = new HashMap<>();
resultJsonstr.put("begin_offset",String.valueOf((long)((long)duration *pre_index + begin_offset)));
resultJsonstr.put("end_offset",String.valueOf((long)((long)duration *suff_index + begin_offset)));
resultJsonstr.put("begin_timestamp",String.valueOf(timestamp_1));
resultJsonstr.put("end_timestamp",String.valueOf(timestamp_2));
resultJsonstr.put("begin_date",sdf2.format(new Date(timestamp_1)));
resultJsonstr.put("end_date",sdf2.format(new Date(timestamp_2)));
return JSON.toJSONString(resultJsonstr);
}
Thread.sleep(sleepTime);
}
return null;
}
public static String seekToBeginOffset(Properties props, String topicName ) throws Exception{
Consumer<String, String> consumer = null;
try {
consumer = new org.apache.kafka.clients.consumer.KafkaConsumer<String, String>(props);
TopicPartition topicPartition = new TopicPartition(topicName,0);
List<TopicPartition> topicPartitions = new ArrayList<>();
topicPartitions.add(topicPartition);
consumer.assign(topicPartitions);
//获得最早的offset和时间戳
consumer.seekToBeginning(topicPartitions);
ConsumerRecords<String, String> poll = consumer.poll(Duration.ofSeconds(1000));
Iterator<ConsumerRecord<String, String>> iterator = poll.iterator();
if(iterator.hasNext()){
ConsumerRecord<String, String> next = iterator.next();
long timestamp = getTimestampFromParseKafkaMeta(next.value());
//System.out.println(timestamp);
//System.out.println(next.value());
//System.out.println("timestamp " +next.timestamp());
//System.out.println("topic " +next.topic());
//System.out.println("partition " +next.partition());
//System.out.println("offset " + next.offset());
Map<String,Long> tsoffset = new HashMap<>();
tsoffset.put("timestamp",timestamp);
tsoffset.put("offset",next.offset());
System.out.println(JSON.toJSONString(tsoffset));
return JSON.toJSONString(tsoffset);
}else{
return null;
}
}finally {
consumer.close();
}
}
public static String seekToEndOffset(Properties props, String topicName ) throws Exception {
Consumer<String, String> consumer =null;
try {
consumer = new org.apache.kafka.clients.consumer.KafkaConsumer<String, String>(props);
TopicPartition topicPartition = new TopicPartition(topicName,0);
List<TopicPartition> topicPartitions = new ArrayList<>();
topicPartitions.add(topicPartition);
consumer.assign(topicPartitions);
consumer.seekToEnd(topicPartitions);
ConsumerRecords<String, String> poll = consumer.poll(Duration.ofSeconds(1000));
Iterator<ConsumerRecord<String, String>> iterator = poll.iterator();
if(iterator.hasNext()){
ConsumerRecord<String, String> next = iterator.next();
long timestamp = getTimestampFromParseKafkaMeta(next.value());
//System.out.println(next.value());
//System.out.println("topic " +next.topic());
//System.out.println("partition " +next.partition());
//System.out.println("offset " + next.offset());
Map<String,Long> tsoffset = new HashMap<>();
tsoffset.put("timestamp",timestamp);
tsoffset.put("offset",next.offset());
return JSON.toJSONString(tsoffset);
}else{
return null;
}
}finally {
consumer.close();
}
}
public static String seekOffset(Properties props, String topicName ,Long offset,long timeout) throws Exception {
Consumer<String, String> consumer =null;
try {
consumer = new org.apache.kafka.clients.consumer.KafkaConsumer<String, String>(props);
TopicPartition topicPartition = new TopicPartition(topicName,0);
List<TopicPartition> topicPartitions = new ArrayList<>();
topicPartitions.add(topicPartition);
consumer.assign(topicPartitions);
consumer.seek(topicPartition,offset);
ConsumerRecords<String, String> poll = consumer.poll(Duration.ofSeconds(timeout));
Iterator<ConsumerRecord<String, String>> iterator = poll.iterator();
if(iterator.hasNext()){
ConsumerRecord<String, String> next = iterator.next();
long timestamp = getTimestampFromParseKafkaMeta(next.value());
//System.out.println(next.value());
//System.out.println("topic " +next.topic());
//System.out.println("partition " +next.partition());
//System.out.println("offset " + next.offset());
Map<String,String> tsoffset = new HashMap<>();
tsoffset.put("timestamp",String.valueOf(timestamp));
tsoffset.put("offset",String.valueOf(next.offset()));
tsoffset.put("date",sdf2.format(new Date(timestamp)));
//这里是seek获得结果
System.out.println("date ======================================= " + sdf2.format(new Date(timestamp)));
return JSON.toJSONString(tsoffset);
}else{
return null;
}
}finally {
consumer.close();
}
}
public static long getTimestampFromParseKafkaMeta(String str) throws Exception{
JSONObject jsonObject = JSON.parseObject(str);
String time = jsonObject.getJSONObject("meta").getString("time");
Date date = sdf.parse(time);
return date.getTime();
}
}
推,直到找到最接近指定的timestamp的offset。