App项目(三)

目录:

com.jinghang.sparkStreaming.SparkStreamingReceive.java

com.jinghang.test.sparkStreaming.Streaming.java

com.jinghang.utils.MapUtils.java

SparkStreamingReceive

package com.jinghang.sparkStreaming;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;

import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaInputDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka010.ConsumerStrategies;
import org.apache.spark.streaming.kafka010.KafkaUtils;
import org.apache.spark.streaming.kafka010.LocationStrategies;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.jinghang.entry.AppErrorLog;
import com.jinghang.utils.MapUtils;
import com.jinghang.utils.PropertityUtils;

import clojure.main;
import scala.Tuple2;

public class SparkStreamingReceive {

	
	private static HashMap<String, Object> kafkaParams = new HashMap<>();
	//topicerror=apperrorspark
	private static List<String> topics = Arrays.asList(PropertityUtils.getValue("topicerror"));
	
	
	public static void main(String[] args) {
		
		SparkConf conf = new SparkConf().setAppName("SparkStreamingReceive").setMaster("local[3]");
		conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
		JavaSparkContext jsc = new JavaSparkContext(conf);
		
		JavaStreamingContext jssc = new JavaStreamingContext(jsc, Durations.seconds(5));
//		kafkaParams.put("bootstrap.servers", "localhost:9092,anotherhost:9092");
//		kafkaParams.put("key.deserializer", StringDeserializer.class);
//		kafkaParams.put("value.deserializer", StringDeserializer.class);
//		kafkaParams.put("group.id", "use_a_separate_group_id_for_each_stream");
//		kafkaParams.put("auto.offset.reset", "latest");
//		kafkaParams.put("enable.auto.commit", false);

		kafkaParams.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, PropertityUtils.getValue("brokerList"));
		kafkaParams.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
		kafkaParams.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
		kafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, PropertityUtils.getValue("groupid"));
		kafkaParams.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, PropertityUtils.getValue("offsetreset"));
		kafkaParams.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, PropertityUtils.getValue("autocommit"));
		
		JavaInputDStream<ConsumerRecord<String, String>> directStream = KafkaUtils.createDirectStream(jssc, 
				LocationStrategies.PreferConsistent(), 
				ConsumerStrategies.<String, String>Subscribe(topics, kafkaParams));
		
		//增大分区,提高并行度
		JavaDStream<ConsumerRecord<String, String>> repartitionedDSteam = directStream.repartition(2);
		
		//分析需求的过程:  统计错误日志 发生的次数  
		processmapresuce(jssc,repartitionedDSteam);
		
		jssc.start();
		
		try {
			jssc.awaitTermination();
		} catch (InterruptedException e) {
			e.printStackTrace();
		}
		
	}
	
	
	private static ObjectMapper objectMapper = new ObjectMapper();

	private static void processmapresuce(JavaStreamingContext jssc,
			JavaDStream<ConsumerRecord<String, String>> repartitionedDSteam) {
		
		JavaDStream<String> lines = repartitionedDSteam.map(new Function<ConsumerRecord<String,String>, String>() {
			private static final long serialVersionUID = 1L;
			@Override
			public String call(ConsumerRecord<String, String> record) throws Exception {
				System.err.println(record.value());
				return record.value();
			}
		});
		
		//
		JavaPairDStream<String, Integer> pairDStream = lines.mapPartitionsToPair(new PairFlatMapFunction<Iterator<String>, String, Integer>() {
			private static final long serialVersionUID = 1L;
			
			@Override
			public Iterator<Tuple2<String, Integer>> call(Iterator<String> it) throws Exception {
				
				ArrayList<Tuple2<String, Integer>> tuple2list = new ArrayList<Tuple2<String, Integer>>();
				
				while (it.hasNext()) {
					String line = it.next();
					//"com.jinghang.entry.AppErrorLog:"+jsonstr
					String[] splited =  line.split(":", 2);
					//将从kafka 获取的数据  转换成实体类对象
					AppErrorLog appErrorLog = (AppErrorLog) objectMapper.readValue(splited[1], Class.forName(splited[0]));
					MapUtils.processmap(tuple2list,appErrorLog);
				}
				return tuple2list.iterator();
			}
		});
		
		JavaPairDStream<String, Integer> wordCounts = pairDStream.reduceByKey(new Function2<Integer, Integer, Integer>() {
			private static final long serialVersionUID = 1L;
			@Override
			public Integer call(Integer v1, Integer v2) throws Exception {
				return v1+v2;
			}
		});
		
		wordCounts.foreachRDD(new VoidFunction<JavaPairRDD<String,Integer>>() {
			private static final long serialVersionUID = 1L;
			@Override
			public void call(JavaPairRDD<String, Integer> res) throws Exception {
				System.err.println(res.collect());
			}
		});
	}
	
	
	
}

Streaming

package com.jinghang.test.sparkStreaming;

import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka010.ConsumerStrategies;
import org.apache.spark.streaming.kafka010.ConsumerStrategy;
import org.apache.spark.streaming.kafka010.KafkaUtils;
import org.apache.spark.streaming.kafka010.LocationStrategies;
import org.apache.spark.streaming.kafka010.LocationStrategy;

import com.jinghang.utils.PropertityUtils;

public class Streaming	 {

	public static void main(String[] args) throws InterruptedException {
		
		SparkConf conf = new SparkConf().setAppName("Streaming").setMaster("local[2]");
		JavaSparkContext jsc = new JavaSparkContext(conf);
		JavaStreamingContext jssc = new JavaStreamingContext(jsc,Durations.seconds(5));
		
		List<String> topics = Arrays.asList("test1");
		
		HashMap<String, Object> params = new HashMap<>();
			params.put("bootstrap.servers", "node7-2:9092");
			params.put("group.id", "test");
			params.put("enable.auto.commit", "true");
			params.put("auto.commit.interval.ms", "1000");
			params.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
			params.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
	     
		params.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, PropertityUtils.getValue("brokerList"));
		
		
		//从kafka 获取数据
		JavaInputDStream<ConsumerRecord<String, String>> dStream = KafkaUtils.createDirectStream(
				jssc, 
				LocationStrategies.PreferConsistent(), 
				ConsumerStrategies.<String,String>Subscribe(topics, params)
				);
		
		dStream.map(new Function<ConsumerRecord<String,String>, String>() {
			private static final long serialVersionUID = 1L;

			@Override
			public String call(ConsumerRecord<String, String> record) throws Exception {
				
//				record.offset()
//				record.partition()
//				record.key()
//				record.topic()
				
				return record.value();
			}
		}).print();
		
		jssc.start();
		jssc.awaitTermination();

	}

}

MapUtils

package com.jinghang.utils;

import java.text.SimpleDateFormat;
import java.util.List;

import com.jinghang.entry.AppErrorLog;

import scala.Tuple2;

public class MapUtils {

	public static void processmap(List<Tuple2<String, Integer>> tuple2list,AppErrorLog appEorrLog ){
		Long createdAtMs = appEorrLog.getCreatedAtMs();//发生时间			
		String appId = appEorrLog.getAppId();//应用id				
		String deviceId = appEorrLog.getDeviceId();	//设备号,唯一号		
		String appVersion = appEorrLog.getAppVersion();//版本			
		String appChannel = appEorrLog.getAppChannel();//渠道			
		String appPlatform = appEorrLog.getAppPlatform();//平台		
		String osType = appEorrLog.getOsType();	//操作系统			
		String deviceStyle = appEorrLog.getDeviceStyle();//设备类型			
		String errorBrief = appEorrLog.getErrorBrief();	//错误摘要	
		String errorDetail = appEorrLog.getErrorDetail();//错误详情
		
		String SPLITSTRING = "####";
		
		String key = "ErrorInfoDaily"+SPLITSTRING
				+appId+SPLITSTRING
				+deviceId+SPLITSTRING
				+appVersion+SPLITSTRING
				+appChannel+SPLITSTRING
				+appPlatform+SPLITSTRING
				+osType+SPLITSTRING
				+deviceStyle+SPLITSTRING
				+errorBrief+SPLITSTRING
				+errorDetail+SPLITSTRING
				+transfertime(createdAtMs);
		
		Tuple2<String,Integer> tuple2 = new Tuple2<String, Integer>(key, 1);
		tuple2list.add(tuple2);
	}

	private static String transfertime(Long time) {
		SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyMMdd");
		return simpleDateFormat.format(time);
	}
}

运行:

1.连接kafka

2.代码Streaming:

 

 

 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值