Spark directStream保存/读取kafka offset

来自原文:http://blog.csdn.net/xfg0218/article/details/53014206


RT。代码备忘。

1.Constant.Java

[java]  view plain  copy
  1. package com.sparktest.util;  
  2. public class Constant {  
  3.     public static String master = "yarn-client";  
  4.     public static String topic = "pj";  
  5.     public static String appName = "sparktest";  
  6.     public static long duration = 10000;  
  7.     public static String zookeeper = "10.67.2.20:2181,10.67.2.21:2181";  
  8.     public static String brokerlist = "10.67.2.20:9092,10.67.2.21:9092";  
  9.     public static String groupId = "com.sparktest";  
  10.     public static int partitions = 10;  
  11. }  

2.App.java

[java]  view plain  copy
  1. package com.sparktest.app;  
  2.   
  3. import java.io.Serializable;  
  4. import java.util.HashMap;  
  5. import java.util.HashSet;  
  6. import java.util.Map;  
  7. import java.util.Set;  
  8. import java.util.concurrent.atomic.AtomicReference;  
  9.   
  10. import kafka.common.TopicAndPartition;  
  11. import kafka.message.MessageAndMetadata;  
  12. import kafka.serializer.DefaultDecoder;  
  13. import kafka.serializer.StringDecoder;  
  14.   
  15. import org.apache.spark.api.java.JavaRDD;  
  16. import org.apache.spark.api.java.JavaSparkContext;  
  17. import org.apache.spark.api.java.function.Function;  
  18. import org.apache.spark.streaming.Duration;  
  19. import org.apache.spark.streaming.api.java.JavaInputDStream;  
  20. import org.apache.spark.streaming.api.java.JavaStreamingContext;  
  21. import org.apache.spark.streaming.kafka.HasOffsetRanges;  
  22. import org.apache.spark.streaming.kafka.KafkaCluster;  
  23. import org.apache.spark.streaming.kafka.KafkaUtils;  
  24. import org.apache.spark.streaming.kafka.OffsetRange;  
  25.   
  26. import scala.Predef;  
  27. import scala.Tuple2;  
  28. import scala.collection.JavaConversions;  
  29.   
  30. import com.sparktest.util.Constant;  
  31.   
  32. public class App implements Serializable{  
  33.     private KafkaCluster kafkaCluster = null;  
  34.     private Map<String, String> kafkaParams = new HashMap<String, String>();  
  35.     private Set<String> topics = new HashSet<String>();  
  36.     private Duration duration = new Duration(Constant.duration);  
  37.     private java.util.Map<kafka.common.TopicAndPartition, Long> fromOffsets = new java.util.HashMap<kafka.common.TopicAndPartition, Long>();  
  38.     private static final AtomicReference<OffsetRange[]> offsetRanges = new AtomicReference<OffsetRange[]>();  
  39.       
  40.     public App() {  
  41.         kafkaParams.put("metadata.broker.list", Constant.brokerlist);  
  42.         kafkaParams.put("group.id", Constant.groupId);  
  43.   
  44.         scala.collection.mutable.Map<String, String> mutableKafkaParam = JavaConversions  
  45.                 .mapAsScalaMap(kafkaParams);  
  46.         scala.collection.immutable.Map<String, String> immutableKafkaParam = mutableKafkaParam  
  47.                 .toMap(new Predef.$less$colon$less<Tuple2<String, String>, Tuple2<String, String>>() {  
  48.                     public Tuple2<String, String> apply(  
  49.                             Tuple2<String, String> v1) {  
  50.                         return v1;  
  51.                     }  
  52.                 });  
  53.         this.kafkaCluster = new KafkaCluster(immutableKafkaParam);  
  54.         this.topics.add(Constant.topic);  
  55.     }  
  56.   
  57.     public void startApp() {  
  58.         JavaSparkContext ctx = new JavaSparkContext(Constant.master,  
  59.                 Constant.appName);  
  60.         JavaStreamingContext jsctx = new JavaStreamingContext(ctx, duration);  
  61.   
  62.         scala.collection.mutable.Set<String> mutableTopics = JavaConversions  
  63.                 .asScalaSet(this.topics);  
  64.         scala.collection.immutable.Set<String> immutableTopics = mutableTopics  
  65.                 .toSet();  
  66.         scala.collection.immutable.Set<TopicAndPartition> scalaTopicAndPartitionSet = kafkaCluster  
  67.                 .getPartitions(immutableTopics).right().get();  
  68.   
  69.         // 首次消费,默认设置为0  
  70.         if (kafkaCluster.getConsumerOffsets(kafkaParams.get("group.id"),  
  71.                 scalaTopicAndPartitionSet).isLeft()) {  
  72.             Set<TopicAndPartition> javaTopicAndPartitionSet = JavaConversions  
  73.                     .setAsJavaSet(scalaTopicAndPartitionSet);  
  74.             for (TopicAndPartition topicAndPartition : javaTopicAndPartitionSet) {  
  75.                 this.fromOffsets.put(topicAndPartition, 0L);  
  76.             }  
  77.         } else {  
  78.             scala.collection.immutable.Map<TopicAndPartition, Object> consumerOffsetsTemp = kafkaCluster  
  79.                     .getConsumerOffsets(kafkaParams.get("group.id"),  
  80.                             scalaTopicAndPartitionSet).right().get();  
  81.   
  82.             Map<TopicAndPartition, Object> consumerOffsets = JavaConversions  
  83.                     .mapAsJavaMap(consumerOffsetsTemp);  
  84.             Set<TopicAndPartition> javaTopicAndPartitionSet = JavaConversions  
  85.                     .setAsJavaSet(scalaTopicAndPartitionSet);  
  86.             for (TopicAndPartition topicAndPartition : javaTopicAndPartitionSet) {  
  87.                 Long offset = (Long) consumerOffsets.get(topicAndPartition);  
  88.                 this.fromOffsets.put(topicAndPartition, offset);  
  89.             }  
  90.         }  
  91.   
  92.         JavaInputDStream<byte[]> stream = KafkaUtils.createDirectStream(jsctx,  
  93.                 String.classbyte[].class, StringDecoder.class,  
  94.                 DefaultDecoder.classbyte[].class, kafkaParams,  
  95.                 this.fromOffsets,  
  96.                 new Function<MessageAndMetadata<String, byte[]>, byte[]>() {  
  97.                     public byte[] call(MessageAndMetadata<String, byte[]> v1)  
  98.                             throws Exception {  
  99.                         return v1.message();  
  100.                     }  
  101.                 });  
  102.   
  103.         stream.foreachRDD(new Function<JavaRDD<byte[]>, Void>() {  
  104.             public Void call(JavaRDD<byte[]> arg0) throws Exception {  
  105.                 OffsetRange[] offsets = ((HasOffsetRanges) arg0.rdd()).offsetRanges();  
  106.                 for(OffsetRange o: offsets){  
  107.                     // 封装topic.partition 与 offset对应关系 java Map  
  108.                     TopicAndPartition topicAndPartition = new TopicAndPartition(o.topic(), o.partition());  
  109.                     Map<TopicAndPartition, Object> topicAndPartitionObjectMap = new HashMap<TopicAndPartition, Object>();  
  110.                     topicAndPartitionObjectMap.put(topicAndPartition, o.untilOffset());  
  111.   
  112.                     // 转换java map to scala immutable.map  
  113.                     scala.collection.mutable.Map<TopicAndPartition, Object> map =  
  114.                             JavaConversions.mapAsScalaMap(topicAndPartitionObjectMap);  
  115.                     scala.collection.immutable.Map<TopicAndPartition, Object> scalatopicAndPartitionObjectMap =  
  116.                             map.toMap(new Predef.$less$colon$less<Tuple2<TopicAndPartition, Object>, Tuple2<TopicAndPartition, Object>>() {  
  117.                         public Tuple2<TopicAndPartition, Object> apply(Tuple2<TopicAndPartition, Object> v1) {  
  118.                             return v1;  
  119.                         }  
  120.                     });  
  121.   
  122.                     // 更新offset到kafkaCluster  
  123.                     kafkaCluster.setConsumerOffsets(Constant.groupId, scalatopicAndPartitionObjectMap);  
  124.                 }  
  125.   
  126.                 System.out.println("==========================" + arg0.count()  
  127.                         + "==================================");  
  128.                 return null;  
  129.             }  
  130.         });  
  131.   
  132.         jsctx.start();  
  133.         jsctx.awaitTermination();  
  134.     }  
  135.   
  136.     public static void main(String[] args) {  
  137.         App app = new App();  
  138.         app.startApp();  
  139.     }  
  140. }  

3.pom.xml

[html]  view plain  copy
  1. <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"  
  2.   xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">  
  3.   <modelVersion>4.0.0</modelVersion>  
  4.   
  5.   <groupId>sparktest</groupId>  
  6.   <artifactId>sparktest</artifactId>  
  7.   <version>0.0.1-SNAPSHOT</version>  
  8.   <packaging>jar</packaging>  
  9.   
  10.   <name>sparktest</name>  
  11.   <url>http://maven.apache.org</url>  
  12.   
  13.   <properties>  
  14.     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>  
  15.   </properties>  
  16.   
  17.   <build>    
  18.         <plugins>    
  19.             <plugin>    
  20.                 <artifactId>maven-assembly-plugin</artifactId>    
  21.                 <configuration>    
  22.                     <archive>    
  23.                         <manifest>    
  24.                             <mainClass>com.allen.capturewebdata.Main</mainClass>    
  25.                         </manifest>    
  26.                     </archive>    
  27.                     <descriptorRefs>    
  28.                         <descriptorRef>jar-with-dependencies</descriptorRef>    
  29.                     </descriptorRefs>    
  30.                 </configuration>    
  31.             </plugin>    
  32.         </plugins>    
  33.    </build>  
  34.   
  35.   <dependencies>  
  36.      <dependency>  
  37.         <groupId>jdk.tools</groupId>  
  38.         <artifactId>jdk.tools</artifactId>  
  39.         <version>1.7</version>  
  40.         <scope>system</scope>  
  41.         <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>  
  42.     </dependency>  
  43.     <dependency>  
  44.         <groupId>org.apache.spark</groupId>  
  45.         <artifactId>spark-streaming_2.10</artifactId>  
  46.         <version>1.3.0</version>  
  47.     </dependency>  
  48.     <dependency>  
  49.         <groupId>org.apache.spark</groupId>  
  50.         <artifactId>spark-streaming-kafka_2.10</artifactId>  
  51.         <version>1.3.0</version>  
  52.     </dependency>  
  53.     <dependency>  
  54.         <groupId>org.apache.spark</groupId>  
  55.         <artifactId>spark-yarn_2.10</artifactId>  
  56.         <version>1.3.0</version>  
  57.     </dependency>  
  58.     <dependency>  
  59.       <groupId>junit</groupId>  
  60.       <artifactId>junit</artifactId>  
  61.       <version>3.8.1</version>  
  62.       <scope>test</scope>  
  63.     </dependency>  
  64.   </dependencies>  
  65. </project>  

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值