Flink1.11.2-事件时间分片存储文件
参考链接:https://developer.aliyun.com/article/719786
ToHdfs
package com.toHdfs;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.bean.PageLog;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.fs.StringWriter;
import org.apache.flink.streaming.connectors.fs.bucketing.BucketingSink;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import java.util.Properties;
public class ToHdfs {
public static void main(String[] args) {
// 创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "192.168.1.162:9092");
properties.setProperty("zookeeper.connect", "192.168.1.162:2181");
properties.setProperty("group.id", "FromKafka001");
properties.setProperty("key.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
properties.setProperty("value.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
properties.setProperty("auto.offset.reset", "latest");
FlinkKafkaConsumer011 kafkaSource = new FlinkKafkaConsumer011<String>("pageLog", new
SimpleStringSchema(), properties);
DataStreamSource<String> dsJsonStr = env.addSource(kafkaSource);
SingleOutputStreamOperator<PageLog> dsStr = dsJsonStr.map(new MapFunction<String, PageLog>() {
@Override
public PageLog map(String in) throws
Exception {
System.out.println(in);
JSONObject js = JSON.parseObject(in);
PageLog pageLog = new PageLog();
pageLog.setApp_id(js.getString("app_id").toString());
pageLog.setDevId(js.getString("device_id").toString());
pageLog.setPageId(js.getString("page_id").toString());
pageLog.setuId(js.getLong("uid"));
//System.out.println(pageLog.getuId());
return pageLog;
}
});
BucketingSink<PageLog> sink = new BucketingSink<>("hdfs://192.168.1.162:8020/kafkaTohdfs/");
//通过这样的方式来实现数据跨天分区
// sink.setBucketer(new EventTimeBucketer<PageLog>("yyyy/MM/dd"));
sink.setBucketer(new EventTimeBuc