Spark Streaming 自定义 Receiver

Spark Streaming 自定义 Receiver

spark streaming 除了内部支持的数据源之外,还可以自定义数据源。只需要继承Receiver<>类,然后重写onStart()和onStop()方法就可以了。下面以从MySQL获取数据为例:

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.Map;
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.streaming.receiver.Receiver;

public class DataBaseReceiver extends Receiver<String> {
	
	private Connection conn;
	private Statement st;
	private ResultSet rs;
	private String url;
	private String username;
	private String password;
	private String dbtable;
	private String[] fields;
	private String driverClassName;
	private String sql;
	private int totalSize;
	private int pageSize;
	private StringBuilder sb = new StringBuilder();

	public DataBaseReceiver(StorageLevel storageLevel,Map<String,Object> map) {
		super(storageLevel);
		this.url = map.get("db_jdbc_url")+"";
		this.username = map.get("db_jdbc_userName")+"";
		this.password = map.get("db_jdbc_password")+"";
		this.dbtable = map.get("dbTableName")+"";
		this.fields = (map.get("fields")+"").split(",");
		this.totalSize = 0;
		this.pageSize = Integer.parseInt(map.get("pageSize")+"");
		this.driverClassName = map.get("db_jdbc_driverClass")+"";
	}

	@Override
	public void onStart() {//在此方法中,主要是做一些初始化,以及启动一个线程,用以获取数据
		try {
			Class.forName(driverClassName);
			conn = DriverManager.getConnection(url,username,password);
			st = conn.createStatement();
		} catch (Exception e) {
			e.printStackTrace();
		}
		new Thread(new DBRunnable()).start();
	}

	@Override
	public void onStop() {
		if(rs != null){
			try {
				rs.close();
			} catch (Exception e) {
				e.printStackTrace();
			}finally{
				rs = null;
			}
		}
		if(st != null){
			try {
				st.close();
			} catch (Exception e) {
				e.printStackTrace();
			}finally{
				st = null;
			}
		}
		if(conn != null){
			try {
				conn.close();
			} catch (Exception e) {
				e.printStackTrace();
			}finally{
				conn = null;
			}
		}
	}

	class DBRunnable implements Runnable{
		@Override
		public void run() {
			while(!isStopped()){
				try {
					System.out.println("===================================已读行数:"+totalSize+"=========================================");
					int newTotalSize = getTotalSize();
					int size = newTotalSize - totalSize;
					System.out.println("===================================新增行数:"+size+"=========================================");
					if(size > 0){
						int pageNum = size/pageSize;
						if(size%pageSize != 0){
							pageNum += 1;
						}
						executeSql(pageNum);
						totalSize = newTotalSize;
					}else{
						System.out.println("===================================没有新的数据,进入休眠=========================================");
						Thread.sleep(5000);
					}
				} catch (Exception e) {
					e.printStackTrace();
				}
			}
		}
		
		private int getTotalSize(){
			int newTotalSize = 0;
			try {
				sql = "select count(*) from "+dbtable;
				rs = st.executeQuery(sql);
				rs.next();
				newTotalSize = rs.getInt(1);
			} catch (Exception e) {
				e.printStackTrace();
			}
			return newTotalSize;
		}
		
		private void executeSql(int pageNum){
			try {
				System.out.println("===================================页数:"+pageNum+"=========================================");
				for(int i = 0;i < pageNum;i++){
					int startIndex = totalSize+i*pageSize;
					sql = "select * from "+dbtable+" limit "+startIndex+","+pageSize;
					System.out.println("===================================执行语句:"+sql+"=========================================");
					rs = st.executeQuery(sql);
					storeData();
				}
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
		
		private void storeData(){
			try {
				while(rs.next()){
					String line = getLine();
					store(line);
					System.out.println("===================================存储数据:"+line+"=========================================");
					sb.setLength(0);
				}
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
		
		private String getLine(){
			try {
				int i = 0;
				for(String field : fields){
					sb.append(rs.getString(field));
					i++;
					if(i < fields.length){
						sb.append(",");
					}
				}
			} catch (Exception e) {
				e.printStackTrace();
			}
			return sb.toString();
		}
	}
}

如果需要从其他数据源获取数据,只需如法炮制即可。

之后在Application中使用如下方式调用:

SparkConf conf = new SparkConf();
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext streamingContext = new JavaStreamingContext(sc, Durations.seconds(3));
Map<String,Object> source = new HashMap<>();
JavaReceiverInputDStream<String> receiverStream = streamingContext.receiverStream(new DataBaseReceiver(StorageLevel.MEMORY_ONLY(), source));
//关于StorageLevel的等级,可以参看官方文档

ps:如果运行程序的时候采取的是Local模式,在指定master的时候,需指定大于1的数(即 --master local[n],其中n>1)。否则spark只能获取数据,而不能处理数据。

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值