import cn.flink.streaming.sink.VehicleDetailSinkFunction;
import cn.flink.streaming.source.VechileDetailSourceFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.tuple.Tuple9;
import org.apache.flink.runtime.state.memory.MemoryStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
-
车辆明细指标开发
-
读取hbase的数据然后对数据进行业务操作以后写入到hbase中
-
注意:该作业为批量处理,数据读取完之后会停止作业,需要定期执行。可优化为流处理。
/
public class VechileDetailETLTask {
public static void main(String[] args) throws Exception {
/*
* 实现步骤:
* 1)初始化flink流处理的运行环境
* 2)设置检查点相关的参数(checkpoint周期,任务失败时是否结束job)
* 3)添加数据源(加载hbase)的数据,表:itcast_src
* 3.1)继承自RichSourceFunction
* 3.2)实现open、run、close方法
* 4)实现数据的写入到hbase中
* 4.1)创建表,使用gz压缩格式
* 4.2)自定义sink的实现
* 5)执行任务,查看明细数据ETL任务的结果
*///TODO 1)初始化flink流处理的运行环境 StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); //TODO 2)设置检查点相关的参数(checkpoint周期,任务失败时是否结束job) env.enableCheckpointing(5000L); env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); env.getCheckpointConfig().setCheckpointTimeout(30000); env.setStateBackend(new MemoryStateBackend()); env.setRestartStrategy(RestartStrategies.noRestart()); //TODO 3)添加数据源(加载hbase)的数据,表:itcast_src DataStreamSource<Tuple9<String, String, String, String, String, String, String, String, String>> streamSource = env.addSource(new VechileDetailSourceFunction("itcast_src")); //TODO 3.1)继承自RichSourceFunction //TODO 3.2)实现open、run、close方法 //TODO 4)实现数据的写入到hbase中 //streamSource.printToErr(); VehicleDetailSinkFunction vehicleDetailSinkFunction = new VehicleDetailSinkFunction("itcastsrc_vehicle_detail"); streamSource.addSink(vehicleDetailSinkFunction); //TODO 4.1)创建表,使用gz压缩格式 //TODO 4.2)自定义sink的实现 //TODO 5)执行任务,查看明细数据ETL任务的结果 env.execute(); } }
import cn.itcast.utils.ConfigLoader;
import org.apache.flink.api.java.tuple.Tuple9;
import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org