import jdk.nashorn.internal.codegen.CompilerConstants; import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.streaming.api.CheckpointingMode; import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.environment.CheckpointConfig; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; import sun.util.resources.cldr.ar.CalendarData_ar_SD; import java.util.HashMap; import java.util.Properties; /** * kafkaSource * * 從指定的offset出消费kafka */ public class StreamingKafkaSource { public static void main(String[] args) throws Exception { //获取Flink的运行环境 StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); //checkpoint配置 env.enableCheckpointing(5000); env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); env.getCheckpointConfig().setCheckpointTimeout(60000); env.getCheckpointConfig().setMinPauseBetweenCheckpoints(500); env.getCheckpointConfig().setMaxConcurrentCheckpoints(1); env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); //设置statebackend //env.setStateBackend(new RocksDBStateBackend("hdfs://hadoop100:9000/flink/checkpoints",true)); //设置kafka 的基本信息 String topic = "test"; Properties prop = new Properties(); prop.setProperty("bootstrap.servers","192.168.200.10:9092"); prop.setProperty("group.id","con1"); //设置kafka 中主题对应分区的开始消费的offset HashMap<KafkaTopicPartition, Long> kafkaTopicPartitionMap = new HashMap<>(); kafkaTopicPartitionMap.put(new KafkaTopicPartition(topic,0),10L); kafkaTopicPartitionMap.put(new KafkaTopicPartition(topic,1),0L); kafkaTopicPartitionMap.put(new KafkaTopicPartition(topic,2),0L); FlinkKafkaConsumer011<String> myConsumer = new FlinkKafkaConsumer011<>(topic, new SimpleStringSchema(), prop); //指定消费的策略 //myConsumer.setStartFromGroupOffsets(); //默认消费策略 消費kafka正在传输的信息 myConsumer.setStartFromSpecificOffsets(kafkaTopicPartitionMap); //从指定的offset开始消费 DataStreamSource<String> text = env.addSource(myConsumer); /* text.map(new MapFunction<String, Object>() { @Override public Object map(String value) throws Exception { System.out.println("原始接收到数据:" + value); System.out.println(); Thread.sleep(10000000); //只是便于查看 return value; } });*/ text.print().setParallelism(1); env.execute("StreamingFromCollection"); } }
flink 从kafka读取数据 (java)
最新推荐文章于 2024-07-27 15:05:24 发布