MyDateTimeBucketer代码如下:
import org.apache.flink.streaming.connectors.fs.Clock;
import org.apache.flink.streaming.connectors.fs.bucketing.Bucketer;
import org.apache.flink.util.Preconditions;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.time.Instant;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
public class MyDateTimeBucketer<T> implements Bucketer<T> {
private static final long serialVersionUID = 1L;
private static final String DEFAULT_FORMAT_STRING = "yyyyMMddHH";
private final String formatString;
private final ZoneId zoneId;
private transient DateTimeFormatter dateTimeFormatter;
public MyDateTimeBucketer() {
this("yyyyMMddHH");
}
public MyDateTimeBucketer(String formatString) {
this(formatString, ZoneId.systemDefault());
}
public MyDateTimeBucketer(ZoneId zoneId) {
this("yyyyMMddHH", zoneId);
}
public MyDateTimeBucketer(String formatString, ZoneId zoneId) {
this.formatString = (String) Preconditions.checkNotNull(formatString);
this.zoneId = (ZoneId)Preconditions.checkNotNull(zoneId);
this.dateTimeFormatter = DateTimeFormatter.ofPattern(this.formatString).withZone(zoneId);
}
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
in.defaultReadObject();
this.dateTimeFormatter = DateTimeFormatter.ofPattern(this.formatString).withZone(this.zoneId);
}
public Path getBucketPath(Clock clock, Path basePath, T element) {
String newDateTimeString = this.dateTimeFormatter.format(Instant.ofEpochMilli(clock.currentTimeMillis()));
//这里自定义目录
return new Path(basePath + "/" +"data="+newDateTimeString);
}
public String toString() {
return "DateTimeBucketer{formatString='" + this.formatString + '\'' + ", zoneId=" + this.zoneId + '}';
}
}
在flink里引用如下:
BucketingSink<String> bucketingSink = new BucketingSink<>(HDFS_PATH);
bucketingSink.setBucketer(new MyDateTimeBucketer());