Kafka和Spark Streaming Java版本集成并将数据实时写入HBase

转载地址:https://blog.csdn.net/q79969786/article/details/42793487

Kafka和Spark Streaming Java版本集成并将数据实时写入HBase

mvn配置pom.xml

[html]  view plain  copy
  1. <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"  
  2.     xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">  
  3.     <modelVersion>4.0.0</modelVersion>  
  4.     <groupId>spaek</groupId>  
  5.     <artifactId>spark</artifactId>  
  6.     <version>1</version>  
  7.     <packaging>jar</packaging>  
  8.   
  9.     <properties>  
  10.         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>  
  11.     </properties>  
  12.     <dependencies>  
  13.   
  14.         <dependency>  
  15.             <groupId>org.apache.spark</groupId>  
  16.             <artifactId>spark-streaming_2.10</artifactId>  
  17.             <version>1.2.0</version>  
  18.             <scope>provided</scope>  
  19.         </dependency>  
  20.         <dependency>  
  21.             <groupId>org.apache.spark</groupId>  
  22.             <artifactId>spark-streaming-kafka_2.10</artifactId>  
  23.             <version>1.2.0</version>  
  24.         </dependency>  
  25.         <dependency>  
  26.             <groupId>org.clojure</groupId>  
  27.             <artifactId>clojure</artifactId>  
  28.             <version>1.6.0</version>  
  29.         </dependency>  
  30.         <dependency>  
  31.             <groupId>com.google.guava</groupId>  
  32.             <artifactId>guava</artifactId>  
  33.             <version>11.0.2</version>  
  34.         </dependency>  
  35.         <dependency>  
  36.             <groupId>org.apache.hbase</groupId>  
  37.             <artifactId>hbase-client</artifactId>  
  38.             <version>0.98.4-hadoop2</version>  
  39.         </dependency>  
  40.         <dependency>  
  41.             <groupId>com.google.protobuf</groupId>  
  42.             <artifactId>protobuf-java</artifactId>  
  43.             <version>2.5.0</version>  
  44.         </dependency>  
  45.         <dependency>  
  46.             <groupId>io.netty</groupId>  
  47.             <artifactId>netty</artifactId>  
  48.             <version>3.6.6.Final</version>  
  49.         </dependency>  
  50.         <dependency>  
  51.             <groupId>org.apache.hbase</groupId>  
  52.             <artifactId>hbase-common</artifactId>  
  53.             <version>0.98.4-hadoop2</version>  
  54.         </dependency>  
  55.         <dependency>  
  56.             <groupId>org.apache.hbase</groupId>  
  57.             <artifactId>hbase-protocol</artifactId>  
  58.             <version>0.98.4-hadoop2</version>  
  59.         </dependency>  
  60.         <dependency>  
  61.             <groupId>org.apache.zookeeper</groupId>  
  62.             <artifactId>zookeeper</artifactId>  
  63.             <version>3.4.5</version>  
  64.         </dependency>  
  65.         <dependency>  
  66.             <groupId>org.cloudera.htrace</groupId>  
  67.             <artifactId>htrace-core</artifactId>  
  68.             <version>2.01</version>  
  69.         </dependency>  
  70.     </dependencies>  
  71.   
  72.     <build>  
  73.         <plugins>  
  74.             <!-- Bind the maven-assembly-plugin to the package phase this will create   
  75.                 a jar file without the storm dependencies suitable for deployment to a cluster. -->  
  76.             <plugin>  
  77.                 <artifactId>maven-assembly-plugin</artifactId>  
  78.                 <configuration>  
  79.                     <descriptorRefs>  
  80.                         <descriptorRef>jar-with-dependencies</descriptorRef>  
  81.                     </descriptorRefs>  
  82.                     <archive>  
  83.                         <manifest>  
  84.                             <mainClass />  
  85.                         </manifest>  
  86.                     </archive>  
  87.                 </configuration>  
  88.                 <executions>  
  89.                     <execution>  
  90.                         <id>make-assembly</id>  
  91.                         <phase>package</phase>  
  92.                         <goals>  
  93.                             <goal>single</goal>  
  94.                         </goals>  
  95.                     </execution>  
  96.                 </executions>  
  97.             </plugin>  
  98.   
  99.             <plugin>  
  100.                 <groupId>com.theoryinpractise</groupId>  
  101.                 <artifactId>clojure-maven-plugin</artifactId>  
  102.                 <extensions>true</extensions>  
  103.                 <configuration>  
  104.                     <sourceDirectories>  
  105.                         <sourceDirectory>src/clj</sourceDirectory>  
  106.                     </sourceDirectories>  
  107.                 </configuration>  
  108.                 <executions>  
  109.                     <execution>  
  110.                         <id>compile</id>  
  111.                         <phase>compile</phase>  
  112.                         <goals>  
  113.                             <goal>compile</goal>  
  114.                         </goals>  
  115.                     </execution>  
  116.                 </executions>  
  117.             </plugin>  
  118.   
  119.             <plugin>  
  120.                 <groupId>org.codehaus.mojo</groupId>  
  121.                 <artifactId>exec-maven-plugin</artifactId>  
  122.                 <version>1.2.1</version>  
  123.                 <executions>  
  124.                     <execution>  
  125.                         <goals>  
  126.                             <goal>exec</goal>  
  127.                         </goals>  
  128.                     </execution>  
  129.                 </executions>  
  130.                 <configuration>  
  131.                     <executable>java</executable>  
  132.                     <includeProjectDependencies>true</includeProjectDependencies>  
  133.                     <includePluginDependencies>false</includePluginDependencies>  
  134.                     <classpathScope>compile</classpathScope>  
  135.                     <mainClass>${storm.topology}</mainClass>  
  136.                 </configuration>  
  137.             </plugin>  
  138.         </plugins>  
  139.     </build>  
  140.   
  141. </project>  


java代码SparkStreamingFromFlumeToHBaseExample.java

[java]  view plain  copy
  1. package org.apache.spark.examples.streaming;  
  2.   
  3. import java.util.Arrays;  
  4. import java.util.HashMap;  
  5. import java.util.Map;  
  6. import java.util.regex.Pattern;  
  7.   
  8. import org.apache.spark.SparkConf;  
  9. import org.apache.spark.api.java.JavaPairRDD;  
  10. import org.apache.spark.api.java.function.FlatMapFunction;  
  11. import org.apache.spark.api.java.function.Function;  
  12. import org.apache.spark.api.java.function.Function2;  
  13. import org.apache.spark.api.java.function.PairFunction;  
  14. import org.apache.spark.api.java.function.VoidFunction;  
  15. import org.apache.spark.broadcast.Broadcast;  
  16. import org.apache.spark.streaming.Duration;  
  17. import org.apache.spark.streaming.Time;  
  18. import org.apache.spark.streaming.api.java.JavaDStream;  
  19. import org.apache.spark.streaming.api.java.JavaPairDStream;  
  20. import org.apache.spark.streaming.api.java.JavaPairReceiverInputDStream;  
  21. import org.apache.spark.streaming.api.java.JavaStreamingContext;  
  22. import org.apache.spark.streaming.kafka.KafkaUtils;  
  23.   
  24. import scala.Tuple2;  
  25.   
  26. import com.google.common.base.Optional;  
  27. import com.google.common.collect.Lists;  
  28.   
  29. public class SparkStreamingFromFlumeToHBaseExample {  
  30.   
  31.   private static final Pattern SPACE = Pattern.compile(" ");  
  32.   
  33.   public static void main(String[] args) {  
  34.     if (args.length == 0) {  
  35.       System.err  
  36.           .println("Usage: SparkStreamingFromFlumeToHBaseWindowingExample {master} {host} {port} {table} {columnFamily} {windowInSeconds} {slideInSeconds");  
  37.       System.exit(1);  
  38.     }  
  39.   
  40.     // String master = args[0];  
  41.     // String host = args[1];  
  42.     // int port = Integer.parseInt(args[2]);  
  43.     String tableName = "test";// args[3];  
  44.     String columnFamily = "f";// args[4];  
  45.     // int windowInSeconds = 3;// Integer.parseInt(args[5]);  
  46.     // int slideInSeconds = 1;// Integer.parseInt(args[5]);  
  47.   
  48.     String zkQuorum = "localhost";  
  49.     String group = "test-consumer-group";  
  50.     String topicss = "test";  
  51.     String numThread = "2";  
  52.   
  53.     Duration batchInterval = new Duration(5000);  
  54.     // Duration windowInterval = new Duration(windowInSeconds * 1000);  
  55.     // Duration slideInterval = new Duration(slideInSeconds * 1000);  
  56.   
  57.     SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount");  
  58.     JavaStreamingContext jssc =  
  59.         new JavaStreamingContext(sparkConf, new Duration(2000));  
  60.   
  61.     final Broadcast<String> broadcastTableName =  
  62.         jssc.sparkContext().broadcast(tableName);  
  63.     final Broadcast<String> broadcastColumnFamily =  
  64.         jssc.sparkContext().broadcast(columnFamily);  
  65.   
  66.     // JavaDStream<SparkFlumeEvent> flumeStream = sc.flumeStream(host, port);  
  67.   
  68.     int numThreads = Integer.parseInt(numThread);  
  69.     Map<String, Integer> topicMap = new HashMap<String, Integer>();  
  70.     String[] topics = topicss.split(",");  
  71.     for (String topic : topics) {  
  72.       topicMap.put(topic, numThreads);  
  73.     }  
  74.   
  75.     JavaPairReceiverInputDStream<String, String> messages =  
  76.         KafkaUtils.createStream(jssc, zkQuorum, group, topicMap);  
  77.   
  78.     JavaDStream<String> lines =  
  79.         messages.map(new Function<Tuple2<String, String>, String>() {  
  80.           @Override  
  81.           public String call(Tuple2<String, String> tuple2) {  
  82.             return tuple2._2();  
  83.           }  
  84.         });  
  85.   
  86.     JavaDStream<String> words =  
  87.         lines.flatMap(new FlatMapFunction<String, String>() {  
  88.           @Override  
  89.           public Iterable<String> call(String x) {  
  90.             return Lists.newArrayList(SPACE.split(x));  
  91.           }  
  92.         });  
  93.   
  94.     JavaPairDStream<String, Integer> lastCounts =  
  95.         messages.map(new Function<Tuple2<String, String>, String>() {  
  96.           @Override  
  97.           public String call(Tuple2<String, String> tuple2) {  
  98.             return tuple2._2();  
  99.           }  
  100.         }).flatMap(new FlatMapFunction<String, String>() {  
  101.           @Override  
  102.           public Iterable<String> call(String x) {  
  103.             return Lists.newArrayList(SPACE.split(x));  
  104.           }  
  105.         }).mapToPair(new PairFunction<String, String, Integer>() {  
  106.           @Override  
  107.           public Tuple2<String, Integer> call(String s) {  
  108.             return new Tuple2<String, Integer>(s, 1);  
  109.           }  
  110.         }).reduceByKey(new Function2<Integer, Integer, Integer>() {  
  111.   
  112.           @Override  
  113.           public Integer call(Integer x, Integer y) throws Exception {  
  114.             // TODO Auto-generated method stub  
  115.             return x.intValue() + y.intValue();  
  116.           }  
  117.         });  
  118.   
  119.     lastCounts  
  120.         .foreach(new Function2<JavaPairRDD<String, Integer>, Time, Void>() {  
  121.   
  122.           @Override  
  123.           public Void call(JavaPairRDD<String, Integer> values, Time time)  
  124.               throws Exception {  
  125.   
  126.             values.foreach(new VoidFunction<Tuple2<String, Integer>>() {  
  127.   
  128.               @Override  
  129.               public void call(Tuple2<String, Integer> tuple) throws Exception {  
  130.                 HBaseCounterIncrementor incrementor =  
  131.                     HBaseCounterIncrementor.getInstance(  
  132.                         broadcastTableName.value(),  
  133.                         broadcastColumnFamily.value());  
  134.                 incrementor.incerment("Counter", tuple._1(), tuple._2());  
  135.                 System.out.println("Counter:" + tuple._1() + "," + tuple._2());  
  136.   
  137.               }  
  138.             });  
  139.   
  140.             return null;  
  141.           }  
  142.         });  
  143.   
  144.     jssc.start();  
  145.   
  146.   }  
  147. }  

java代码CounterMap.java

[java]  view plain  copy
  1. package org.apache.spark.examples.streaming;  
  2.   
  3. import java.util.HashMap;  
  4. import java.util.Map.Entry;  
  5. import java.util.Set;  
  6.   
  7. public class CounterMap {  
  8.   HashMap<String, Counter> map = new HashMap<String, Counter>();  
  9.     
  10.   public void increment(String key, long increment) {  
  11.     Counter count = map.get(key);  
  12.     if (count == null) {  
  13.       count = new Counter();  
  14.       map.put(key, count);  
  15.     }   
  16.     count.value += increment;  
  17.   }  
  18.     
  19.     
  20.   public long getValue(String key) {  
  21.     Counter count = map.get(key);  
  22.     if (count != null) {  
  23.       return count.value;  
  24.     } else {  
  25.       return 0;  
  26.     }  
  27.   }  
  28.     
  29.   public Set<Entry<String, Counter>> entrySet() {  
  30.     return map.entrySet();  
  31.   }  
  32.     
  33.   public void clear() {  
  34.     map.clear();  
  35.   }  
  36.     
  37.   public static class Counter {  
  38.     public long value;  
  39.   }  
  40.     
  41.     
  42. }  


java代码HBaseCounterIncrementor.java

[java]  view plain  copy
  1. package org.apache.spark.examples.streaming;  
  2.   
  3. import java.io.IOException;  
  4. import java.util.HashMap;  
  5. import java.util.Map.Entry;  
  6.   
  7. import org.apache.hadoop.conf.Configuration;  
  8. import org.apache.hadoop.hbase.HBaseConfiguration;  
  9. import org.apache.hadoop.hbase.client.HTable;  
  10. import org.apache.hadoop.hbase.client.Increment;  
  11. import org.apache.hadoop.hbase.util.Bytes;  
  12.   
  13. import org.apache.spark.examples.streaming.CounterMap;  
  14. import org.apache.spark.examples.streaming.CounterMap.Counter;  
  15.   
  16. public class HBaseCounterIncrementor {  
  17.   
  18.   static HBaseCounterIncrementor singleton;  
  19.   static String tableName;  
  20.   static String columnFamily;  
  21.   static HTable hTable;  
  22.   static long lastUsed;  
  23.   static long flushInterval;  
  24.   static CloserThread closerThread;  
  25.   static FlushThread flushThread;  
  26.   static HashMap<String, CounterMap> rowKeyCounterMap =  
  27.       new HashMap<String, CounterMap>();  
  28.   static Object locker = new Object();  
  29.   
  30.   private HBaseCounterIncrementor(String tableName, String columnFamily) {  
  31.     HBaseCounterIncrementor.tableName = tableName;  
  32.     HBaseCounterIncrementor.columnFamily = columnFamily;  
  33.   }  
  34.   
  35.   public static HBaseCounterIncrementor getInstance(String tableName,  
  36.       String columnFamily) {  
  37.   
  38.     if (singleton == null) {  
  39.       synchronized (locker) {  
  40.         if (singleton == null) {  
  41.           singleton = new HBaseCounterIncrementor(tableName, columnFamily);  
  42.           initialize();  
  43.         }  
  44.       }  
  45.     }  
  46.     return singleton;  
  47.   }  
  48.   
  49.   private static void initialize() {  
  50.     if (hTable == null) {  
  51.       synchronized (locker) {  
  52.         if (hTable == null) {  
  53.           Configuration hConfig = HBaseConfiguration.create();  
  54.           try {  
  55.             hTable = new HTable(hConfig, tableName);  
  56.             updateLastUsed();  
  57.   
  58.           } catch (IOException e) {  
  59.             throw new RuntimeException(e);  
  60.           }  
  61.           flushThread = new FlushThread(flushInterval);  
  62.           flushThread.start();  
  63.           closerThread = new CloserThread();  
  64.           closerThread.start();  
  65.         }  
  66.       }  
  67.     }  
  68.   }  
  69.   
  70.   public void incerment(String rowKey, String key, int increment) {  
  71.     incerment(rowKey, key, (long) increment);  
  72.   }  
  73.   
  74.   public void incerment(String rowKey, String key, long increment) {  
  75.     CounterMap counterMap = rowKeyCounterMap.get(rowKey);  
  76.     if (counterMap == null) {  
  77.       counterMap = new CounterMap();  
  78.       rowKeyCounterMap.put(rowKey, counterMap);  
  79.     }  
  80.     counterMap.increment(key, increment);  
  81.   
  82.     initialize();  
  83.   }  
  84.   
  85.   private static void updateLastUsed() {  
  86.     lastUsed = System.currentTimeMillis();  
  87.   }  
  88.   
  89.   protected void close() {  
  90.     if (hTable != null) {  
  91.       synchronized (locker) {  
  92.         if (hTable != null) {  
  93.           if (hTable != null && System.currentTimeMillis() - lastUsed > 30000) {  
  94.             flushThread.stopLoop();  
  95.             flushThread = null;  
  96.             try {  
  97.               hTable.close();  
  98.             } catch (IOException e) {  
  99.               // TODO Auto-generated catch block  
  100.               e.printStackTrace();  
  101.             }  
  102.   
  103.             hTable = null;  
  104.           }  
  105.         }  
  106.       }  
  107.     }  
  108.   }  
  109.   
  110.   public static class CloserThread extends Thread {  
  111.   
  112.     boolean continueLoop = true;  
  113.   
  114.     @Override  
  115.     public void run() {  
  116.       while (continueLoop) {  
  117.   
  118.         if (System.currentTimeMillis() - lastUsed > 30000) {  
  119.           singleton.close();  
  120.           break;  
  121.         }  
  122.   
  123.         try {  
  124.           Thread.sleep(60000);  
  125.         } catch (InterruptedException e) {  
  126.           e.printStackTrace();  
  127.         }  
  128.       }  
  129.     }  
  130.   
  131.     public void stopLoop() {  
  132.       continueLoop = false;  
  133.     }  
  134.   }  
  135.   
  136.   protected static class FlushThread extends Thread {  
  137.     long sleepTime;  
  138.     boolean continueLoop = true;  
  139.   
  140.     public FlushThread(long sleepTime) {  
  141.       this.sleepTime = sleepTime;  
  142.     }  
  143.   
  144.     @Override  
  145.     public void run() {  
  146.       while (continueLoop) {  
  147.         try {  
  148.           flushToHBase();  
  149.         } catch (IOException e) {  
  150.           e.printStackTrace();  
  151.           break;  
  152.         }  
  153.   
  154.         try {  
  155.           Thread.sleep(sleepTime);  
  156.         } catch (InterruptedException e) {  
  157.           e.printStackTrace();  
  158.         }  
  159.       }  
  160.     }  
  161.   
  162.     private void flushToHBase() throws IOException {  
  163.       synchronized (hTable) {  
  164.         if (hTable == null) {  
  165.           initialize();  
  166.         }  
  167.         updateLastUsed();  
  168.   
  169.         for (Entry<String, CounterMap> entry : rowKeyCounterMap.entrySet()) {  
  170.           CounterMap pastCounterMap = entry.getValue();  
  171.           rowKeyCounterMap.put(entry.getKey(), new CounterMap());  
  172.   
  173.           Increment increment = new Increment(Bytes.toBytes(entry.getKey()));  
  174.   
  175.           boolean hasColumns = false;  
  176.           for (Entry<String, Counter> entry2 : pastCounterMap.entrySet()) {  
  177.             increment.addColumn(Bytes.toBytes(columnFamily),  
  178.                 Bytes.toBytes(entry2.getKey()), entry2.getValue().value);  
  179.             hasColumns = true;  
  180.           }  
  181.           if (hasColumns) {  
  182.             updateLastUsed();  
  183.             hTable.increment(increment);  
  184.           }  
  185.         }  
  186.         updateLastUsed();  
  187.       }  
  188.     }  
  189.   
  190.     public void stopLoop() {  
  191.       continueLoop = false;  
  192.     }  
  193.   }  
  194.   
  195. }  


mvn package打包后将spark-1-jar-with-dependencies.jar上传到spark集群运行

[plain]  view plain  copy
  1. /root/spark-1.2.0-bin-hadoop2.3/bin/spark-submit --class org.apache.spark.examples.streaming.SparkStreamingFromFlumeToHBaseExample --master local[8]   /root/spark-1-jar-with-dependencies.jar  100  


Hbase创建相应的表

[plain]  view plain  copy
  1. hbase(main):002:0> create 'test', 'f'  


启动kafka的server和producer

[plain]  view plain  copy
  1. [root@n1 kafka-0.8.1]# bin/kafka-console-producer.sh --broker-list n1:9092 --topic test  
  2. [root@n1 kafka-0.8.1]# bin/kafka-server-start.sh config/server.properties  


在producer端输入字符后HBase中会插入相应记录如下





也可以到http://download.csdn.net/detail/q79969786/8369971下载源代码


  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值