Spark Streaming Custom Receivers


建立maven项目,引入jar包:

        <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-streaming_2.10 -->
<dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-streaming_2.10</artifactId>
    <version>1.5.1</version>
    <scope>provided</scope>
</dependency> 

自定义接收器:

package com.eastcom.test.first.stream;

import java.io.InputStream;
import java.util.List;

import org.apache.commons.io.IOUtils;
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.streaming.receiver.Receiver;

public class FileReceiver extends Receiver<String> {

	private static final long serialVersionUID = 1L;

	public FileReceiver(StorageLevel storageLevel) {
		super(storageLevel);
	}

	/**
	 * 在次方法中一般是启动一个线程,线程不断的读取数据源往store中丢数据。
	 * 
	 * 接收到是通过jsc.receiverStream(new FileReceiver(StorageLevel.MEMORY_ONLY()))
	 * 
	 * 从store 中取得数据。
	 * 
	 */
	@Override
	public void onStart() {
		new Worker().start();
		// store("Hello World");
	}

	@Override
	public void onStop() {

	}

	class Worker extends Thread {

		@Override
		public void run() {
			while (true) {
				try {
					InputStream resource = FileReceiver.class.getResourceAsStream("content.txt");
					List<String> lines = IOUtils.readLines(resource);
					for (String line : lines) {
						store(line); // 往spark中丢数据
						System.out.println("sent " + line);
						Thread.sleep(1000);
					}
				} catch (Exception e) {
					e.printStackTrace();
				}
			}
		}

	}

	public static void main(String[] args) {

		FileReceiver fileReceiver = new FileReceiver(StorageLevel.MEMORY_ONLY());
		fileReceiver.onStart();

	}

}


main方法:

package com.eastcom.test.first.stream;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;

import org.apache.spark.SparkConf;
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;

import scala.Tuple2;

/**
 * 测试自定义接收器,多个接收器的数据流合成union成一股数据流。
 *
 */
public class TestApplication {

	private static final Pattern SPACE = Pattern.compile(" ");
	private static JavaStreamingContext jsc;

	public static void main(String[] args) throws Exception {

		onSparkConf(); // Diver 在驱动节点运行。

		// init(); // Executor 在work节点上运行。根据批处理时间,每隔5秒运行一次。

		initUnionStream();

		startAndWait(); // Diver 在驱动节点运行。

	}

	public static void onSparkConf() {

		System.setProperty("hadoop.home.dir", "D:/softTools/Hadoop/hadoop-2.6.5");

		// 一个驱动程序占用一个进程,一个接收器占用一个进程。如果local[n] n设置比较小,则只接收,不处理。
		SparkConf conf = new SparkConf().setAppName("SparkStreaming").setMaster("local[8]");
		jsc = new JavaStreamingContext(conf, Durations.seconds(5));
		// jsc.checkpoint("/checkpoint");
	}

	/**
	 * 单个接收器
	 * 
	 * 
	 */
	public static void init() {

		JavaReceiverInputDStream<String> lines = jsc.receiverStream(new FileReceiver(StorageLevel.MEMORY_ONLY()));
		JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(SPACE.split(x)));
		JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<>(s, 1))
				.reduceByKey((i1, i2) -> i1 + i2);
		wordCounts.print();

	}

	/**
	 * 
	 * 多个接收器
	 * 
	 * 
	 * 多股流合成一股流
	 * 
	 */
	public static void initUnionStream() {

		List<JavaDStream<String>> streams = new ArrayList<>();
		JavaDStream<String> lines_1 = jsc.receiverStream(new FileReceiver(StorageLevel.MEMORY_ONLY()));
		streams.add(lines_1);
		JavaDStream<String> lines_2 = jsc.receiverStream(new FileReceiver(StorageLevel.MEMORY_ONLY()));
		streams.add(lines_2);
		JavaDStream<String> lines_3 = jsc.receiverStream(new FileReceiver(StorageLevel.MEMORY_ONLY()));
		streams.add(lines_3);

		JavaDStream<String> unifiedStream = jsc.union(streams.get(0), streams.subList(1, streams.size()));

		JavaDStream<String> words = unifiedStream.flatMap(x -> Arrays.asList(SPACE.split(x)));
		JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<>(s, 1))
				.reduceByKey((i1, i2) -> i1 + i2);

		wordCounts.print();

	}

	/**
	 * 启动spark, 等待运行终止,关闭spark
	 * 
	 */
	public static void startAndWait() {
		jsc.start();
		jsc.awaitTermination();
		jsc.close();
	}

}








评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值