package cn.itcast.job;
import cn.itcast.avro.AvroDeserializerSchema;
import cn.itcast.avro.SseAvro;
import cn.itcast.avro.SzseAvro;
import cn.itcast.bean.CleanBean;
import cn.itcast.config.QuotConfig;
import cn.itcast.map.SseMap;
import cn.itcast.map.SzseMap;
import cn.itcast.task.*;
import cn.itcast.util.QuotUtil;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import java.util.Properties;
/**
* 流处理模块:指数业务
* 子业务包含:
* 1.秒级行情
* 2.分时行情
* 3.K线行情
* 5.分时数据备份
*/
public class IndexStream {
/**
* 1.创建IndexStream对象,创建main方法
* 2.获取流处理执行环境
* 3.设置事件时间、并行度
* 4.设置检查点机制
* 5.设置重启机制
* 6.整合Kafka(新建反序列化类)
* 7.数据过滤(时间和null字段)
* 8.数据转换、合并
* 9.过滤个股数据
* 10.设置水位线
* 11.业务数据处理
* 12.触发执行
*/
//1.创建StockStream对象,创建main方法
public static void main(String[] args) throws Exception {
//2.获取流处理执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//3.设置事件时间、并行度
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);//事件时间
env.setParallelism(1);//在生产环境设置成跟kafka的分区数一致,开发环境设置成1,便于数据调试
//开发环境不需要启用检查点
// //4.设置检查点机制
// env.enableCheckpointing(5000l);
// //检查点的保存路径
// env.setStateBackend(new FsStateBackend("hdfs://node01:8020/checkpoint/index"));
// env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);//强一致性
// env.getCheckpointConfig().setCheckpointTimeout(60000l);//检查点制作的超时时间
// env.getCheckpointConfig().setFailOnCheckpointingErrors(false); //检查点制作失败,任务继续运行
// //当任务停止的时候,保留检查点,需要手动删除,生产环境使用它
// env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
//
// //5.设置重启机制
// //固定延迟重启策略
// env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, org.apache.flink.api.common.time.Time.seconds(5)));
//6.整合Kafka(新建反序列化类)
//配置kafka参数
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", QuotConfig.config.getProperty("bootstrap.servers"));
properties.setProperty("group.id", QuotConfig.config.getProperty("group.id"));
//沪市行情sse 证券数据:个股/指数/债券/基金
FlinkKafkaConsumer011<SseAvro> sseKafkaConsumer = new FlinkKafkaConsumer011<SseAvro>(QuotConfig.config.getProperty("sse.topic"), new AvroDeserializerSchema(QuotConfig.config.getProperty("sse.topic")), properties);//需要自定义avro反序列化对象
//深市行情szse
FlinkKafkaConsumer011<SzseAvro> szseKafkaConsumer = new FlinkKafkaConsumer011<SzseAvro>(QuotConfig.config.getProperty("szse.topic"), new AvroDeserializerSchema(QuotConfig.config.getProperty("szse.topic")), properties);//需要自定义avro反序列化对象
//从头消费
sseKafkaConsumer.setStartFromEarliest();
szseKafkaConsumer.setStartFromEarliest();
//加载数据源
DataStreamSource<SseAvro> sseSource = env.addSource(sseKafkaConsumer);
DataStreamSource<SzseAvro> szseSource = env.addSource(szseKafkaConsumer);
// sseSource.print();
// szseSource.print();
//7.数据过滤(时间和null字段<高开低收为0>)
//沪市数据过滤
SingleOutputStreamOperator<SseAvro> sseFilter = sseSource.filter(new FilterFunction<SseAvro>() {
@Override
public boolean filter(SseAvro value) throws Exception { //返回boolean为true的数据
return QuotUtil.checkTime(value) && QuotUtil.checkData(value);
}
});
//深市数据过滤
SingleOutputStreamOperator<SzseAvro> szseFilter = szseSource.filter(new FilterFunction<SzseAvro>() {
@Override
public boolean filter(SzseAvro value) throws Exception {
return QuotUtil.checkTime(value) && QuotUtil.checkData(value);
}
});
//8.数据转换、合并
DataStream<CleanBean> unionData = sseFilter.map(new SseMap()).union(szseFilter.map(new SzseMap()));
// unionData.print("合并数据:");
//9.过滤个股数据
SingleOutputStreamOperator<CleanBean> stockData = unionData.filter(new FilterFunction<CleanBean>() {
@Override
public boolean filter(CleanBean value) throws Exception {
return QuotUtil.isIndex(value);
}
});
//10.设置水位线
//解决网络乱序和网络延迟
DataStream<CleanBean> waterData = stockData.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<CleanBean>(Time.seconds(Long.valueOf(QuotConfig.config.getProperty("delay.time")))) {
@Override
public long extractTimestamp(CleanBean element) {
return element.getEventTime();
}
});
waterData.print("水位线数据:");
/**
* 子业务包含:
* 1.秒级行情 -> Hbase
* 2.分时行情 -> Druid
* 3.K线行情 -> mysql
* 4.分时数据备份 -> Hdfs
*/
//1.秒级行情(5s) -> Hbase
new IndexSecTask().process(waterData);
//2.分时行情(60s) -> Druid
// new IndexMinTask().process(waterData);
//4.分时数据备份 -> Hdfs
/**
* 自我练习
*/
// new IndexMinHdfsTask().process(waterData);
/**
* 3.K线行情(日、周、月)
*/
new IndexKlineTask().process(waterData);
// 12.触发执行
env.execute("stock stream");
}
}