spark kafka java_Kafka和Spark Streaming Java版本集成并将数据实时写入HBase及代码下载...

package org.apache.spark.examples.streaming;

import java.util.Arrays;

import java.util.HashMap;

import java.util.Map;

import java.util.regex.Pattern;

import org.apache.spark.SparkConf;

import org.apache.spark.api.java.JavaPairRDD;

import org.apache.spark.api.java.function.FlatMapFunction;

import org.apache.spark.api.java.function.Function;

import org.apache.spark.api.java.function.Function2;

import org.apache.spark.api.java.function.PairFunction;

import org.apache.spark.api.java.function.VoidFunction;

import org.apache.spark.broadcast.Broadcast;

import org.apache.spark.streaming.Duration;

import org.apache.spark.streaming.Time;

import org.apache.spark.streaming.api.java.JavaDStream;

import org.apache.spark.streaming.api.java.JavaPairDStream;

import org.apache.spark.streaming.api.java.JavaPairReceiverInputDStream;

import org.apache.spark.streaming.api.java.JavaStreamingContext;

import org.apache.spark.streaming.kafka.KafkaUtils;

import scala.Tuple2;

import com.google.common.base.Optional;

import com.google.common.collect.Lists;

public class SparkStreamingFromFlumeToHBaseExample {

private static final Pattern SPACE = Pattern.compile(" ");

public static void main(String[] args) {

if (args.length == 0) {

System.err

.println("Usage: SparkStreamingFromFlumeToHBaseWindowingExample {master} {host} {port} {table} {columnFamily} {windowInSeconds} {slideInSeconds");

System.exit(1);

}

// String master = args[0];

// String host = args[1];

// int port = Integer.parseInt(args[2]);

String tableName = "test";// args[3];

String columnFamily = "f";// args[4];

// int windowInSeconds = 3;// Integer.parseInt(args[5]);

// int slideInSeconds = 1;// Integer.parseInt(args[5]);

String zkQuorum = "localhost";

String group = "test-consumer-group";

String topicss = "test";

String numThread = "2";

Duration batchInterval = new Duration(5000);

// Duration windowInterval = new Duration(windowInSeconds * 1000);

// Duration slideInterval = new Duration(slideInSeconds * 1000);

SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount");

JavaStreamingContext jssc =

new JavaStreamingContext(sparkConf, new Duration(2000));

final Broadcast broadcastTableName =

jssc.sparkContext().broadcast(tableName);

final Broadcast broadcastColumnFamily =

jssc.sparkContext().broadcast(columnFamily);

// JavaDStream flumeStream = sc.flumeStream(host, port);

int numThreads = Integer.parseInt(numThread);

Map topicMap = new HashMap();

String[] topics = topicss.split(",");

for (String topic : topics) {

topicMap.put(topic, numThreads);

}

JavaPairReceiverInputDStream messages =

KafkaUtils.createStream(jssc, zkQuorum, group, topicMap);

JavaDStream lines =

messages.map(new Function, String>() {

@Override

public String call(Tuple2 tuple2) {

return tuple2._2();

}

});

JavaDStream words =

lines.flatMap(new FlatMapFunction() {

@Override

public Iterable call(String x) {

return Lists.newArrayList(SPACE.split(x));

}

});

JavaPairDStream lastCounts =

messages.map(new Function, String>() {

@Override

public String call(Tuple2 tuple2) {

return tuple2._2();

}

}).flatMap(new FlatMapFunction() {

@Override

public Iterable call(String x) {

return Lists.newArrayList(SPACE.split(x));

}

}).mapToPair(new PairFunction() {

@Override

public Tuple2 call(String s) {

return new Tuple2(s, 1);

}

}).reduceByKey(new Function2() {

@Override

public Integer call(Integer x, Integer y) throws Exception {

// TODO Auto-generated method stub

return x.intValue() + y.intValue();

}

});

lastCounts

.foreach(new Function2, Time, Void>() {

@Override

public Void call(JavaPairRDD values, Time time)

throws Exception {

values.foreach(new VoidFunction>() {

@Override

public void call(Tuple2 tuple) throws Exception {

HBaseCounterIncrementor incrementor =

HBaseCounterIncrementor.getInstance(

broadcastTableName.value(),

broadcastColumnFamily.value());

incrementor.incerment("Counter", tuple._1(), tuple._2());

System.out.println("Counter:" + tuple._1() + "," + tuple._2());

}

});

return null;

}

});

jssc.start();

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值