目录:
com.jinghang.sparkStreaming.SparkStreamingReceive.java
com.jinghang.test.sparkStreaming.Streaming.java
com.jinghang.utils.MapUtils.java
SparkStreamingReceive
package com.jinghang.sparkStreaming;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaInputDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka010.ConsumerStrategies;
import org.apache.spark.streaming.kafka010.KafkaUtils;
import org.apache.spark.streaming.kafka010.LocationStrategies;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.jinghang.entry.AppErrorLog;
import com.jinghang.utils.MapUtils;
import com.jinghang.utils.PropertityUtils;
import clojure.main;
import scala.Tuple2;
public class SparkStreamingReceive {
private static HashMap<String, Object> kafkaParams = new HashMap<>();
//topicerror=apperrorspark
private static List<String> topics = Arrays.asList(PropertityUtils.getValue("topicerror"));
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("SparkStreamingReceive").setMaster("local[3]");
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
JavaSparkContext jsc = new JavaSparkContext(conf);
JavaStreamingContext jssc = new JavaStreamingContext(jsc, Durations.seconds(5));
// kafkaParams.put("bootstrap.servers", "localhost:9092,anotherhost:9092");
// kafkaParams.put("key.deserializer", StringDeserializer.class);
// kafkaParams.put("value.deserializer", StringDeserializer.class);
// kafkaParams.put("group.id", "use_a_separate_group_id_for_each_stream");
// kafkaParams.put("auto.offset.reset", "latest");
// kafkaParams.put("enable.auto.commit", false);
kafkaParams.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, PropertityUtils.getValue("brokerList"));
kafkaParams.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
kafkaParams.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
kafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, PropertityUtils.getValue("groupid"));
kafkaParams.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, PropertityUtils.getValue("offsetreset"));
kafkaParams.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, PropertityUtils.getValue("autocommit"));
JavaInputDStream<ConsumerRecord<String, String>> directStream = KafkaUtils.createDirectStream(jssc,
LocationStrategies.PreferConsistent(),
ConsumerStrategies.<String, String>Subscribe(topics, kafkaParams));
//增大分区,提高并行度
JavaDStream<ConsumerRecord<String, String>> repartitionedDSteam = directStream.repartition(2);
//分析需求的过程: 统计错误日志 发生的次数
processmapresuce(jssc,repartitionedDSteam);
jssc.start();
try {
jssc.awaitTermination();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
private static ObjectMapper objectMapper = new ObjectMapper();
private static void processmapresuce(JavaStreamingContext jssc,
JavaDStream<ConsumerRecord<String, String>> repartitionedDSteam) {
JavaDStream<String> lines = repartitionedDSteam.map(new Function<ConsumerRecord<String,String>, String>() {
private static final long serialVersionUID = 1L;
@Override
public String call(ConsumerRecord<String, String> record) throws Exception {
System.err.println(record.value());
return record.value();
}
});
//
JavaPairDStream<String, Integer> pairDStream = lines.mapPartitionsToPair(new PairFlatMapFunction<Iterator<String>, String, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public Iterator<Tuple2<String, Integer>> call(Iterator<String> it) throws Exception {
ArrayList<Tuple2<String, Integer>> tuple2list = new ArrayList<Tuple2<String, Integer>>();
while (it.hasNext()) {
String line = it.next();
//"com.jinghang.entry.AppErrorLog:"+jsonstr
String[] splited = line.split(":", 2);
//将从kafka 获取的数据 转换成实体类对象
AppErrorLog appErrorLog = (AppErrorLog) objectMapper.readValue(splited[1], Class.forName(splited[0]));
MapUtils.processmap(tuple2list,appErrorLog);
}
return tuple2list.iterator();
}
});
JavaPairDStream<String, Integer> wordCounts = pairDStream.reduceByKey(new Function2<Integer, Integer, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public Integer call(Integer v1, Integer v2) throws Exception {
return v1+v2;
}
});
wordCounts.foreachRDD(new VoidFunction<JavaPairRDD<String,Integer>>() {
private static final long serialVersionUID = 1L;
@Override
public void call(JavaPairRDD<String, Integer> res) throws Exception {
System.err.println(res.collect());
}
});
}
}
Streaming
package com.jinghang.test.sparkStreaming;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka010.ConsumerStrategies;
import org.apache.spark.streaming.kafka010.ConsumerStrategy;
import org.apache.spark.streaming.kafka010.KafkaUtils;
import org.apache.spark.streaming.kafka010.LocationStrategies;
import org.apache.spark.streaming.kafka010.LocationStrategy;
import com.jinghang.utils.PropertityUtils;
public class Streaming {
public static void main(String[] args) throws InterruptedException {
SparkConf conf = new SparkConf().setAppName("Streaming").setMaster("local[2]");
JavaSparkContext jsc = new JavaSparkContext(conf);
JavaStreamingContext jssc = new JavaStreamingContext(jsc,Durations.seconds(5));
List<String> topics = Arrays.asList("test1");
HashMap<String, Object> params = new HashMap<>();
params.put("bootstrap.servers", "node7-2:9092");
params.put("group.id", "test");
params.put("enable.auto.commit", "true");
params.put("auto.commit.interval.ms", "1000");
params.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
params.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
params.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, PropertityUtils.getValue("brokerList"));
//从kafka 获取数据
JavaInputDStream<ConsumerRecord<String, String>> dStream = KafkaUtils.createDirectStream(
jssc,
LocationStrategies.PreferConsistent(),
ConsumerStrategies.<String,String>Subscribe(topics, params)
);
dStream.map(new Function<ConsumerRecord<String,String>, String>() {
private static final long serialVersionUID = 1L;
@Override
public String call(ConsumerRecord<String, String> record) throws Exception {
// record.offset()
// record.partition()
// record.key()
// record.topic()
return record.value();
}
}).print();
jssc.start();
jssc.awaitTermination();
}
}
MapUtils
package com.jinghang.utils;
import java.text.SimpleDateFormat;
import java.util.List;
import com.jinghang.entry.AppErrorLog;
import scala.Tuple2;
public class MapUtils {
public static void processmap(List<Tuple2<String, Integer>> tuple2list,AppErrorLog appEorrLog ){
Long createdAtMs = appEorrLog.getCreatedAtMs();//发生时间
String appId = appEorrLog.getAppId();//应用id
String deviceId = appEorrLog.getDeviceId(); //设备号,唯一号
String appVersion = appEorrLog.getAppVersion();//版本
String appChannel = appEorrLog.getAppChannel();//渠道
String appPlatform = appEorrLog.getAppPlatform();//平台
String osType = appEorrLog.getOsType(); //操作系统
String deviceStyle = appEorrLog.getDeviceStyle();//设备类型
String errorBrief = appEorrLog.getErrorBrief(); //错误摘要
String errorDetail = appEorrLog.getErrorDetail();//错误详情
String SPLITSTRING = "####";
String key = "ErrorInfoDaily"+SPLITSTRING
+appId+SPLITSTRING
+deviceId+SPLITSTRING
+appVersion+SPLITSTRING
+appChannel+SPLITSTRING
+appPlatform+SPLITSTRING
+osType+SPLITSTRING
+deviceStyle+SPLITSTRING
+errorBrief+SPLITSTRING
+errorDetail+SPLITSTRING
+transfertime(createdAtMs);
Tuple2<String,Integer> tuple2 = new Tuple2<String, Integer>(key, 1);
tuple2list.add(tuple2);
}
private static String transfertime(Long time) {
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyMMdd");
return simpleDateFormat.format(time);
}
}
运行:
1.连接kafka
2.代码Streaming: