import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringEncoder;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy;
import java.util.concurrent.TimeUnit;
/**
* Author:panghu
* Date:2022-05-29
* Description: 输出数据到文件
*/
public class _15SinkToFileTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4); //输出的文件数量和并行度有关
DataStreamSource<Event> streamSource = env.fromElements(new Event("Mary", "./home", 1000L),
new Event("Bob", "./cart", 2000L),
new Event("Alice", "./prod?id=100", 3000L), new Event("Alice", "./prod?id=200", 3500L), new Event("Bob", "./prod?id=2", 2500L),
new Event("Alice", "./prod?id=300", 3600L), new Event("Bob", "./home", 3000L),
new Event("Bob", "./prod?id=1", 2300L),
new Event("Bob", "./prod?id=3", 3300L)
);
StreamingFileSink<String> streamingFileSink = StreamingFileSink.<String>forRowFormat(new Path("output/"),
new SimpleStringEncoder<>("utf-8"))
// 设置滚动策略
.withRollingPolicy(
DefaultRollingPolicy.builder()
.withRolloverInterval(TimeUnit.MINUTES.toMillis(15)) // 15分钟滚动一次
.withInactivityInterval(TimeUnit.MINUTES.toMillis(5)) // 空闲时间超过5分钟滚动一次
.withMaxPartSize(1024 * 1024 * 1024) // 设置文件大小1G,超过1G滚动一次(默认128M)
.build()
)
.build();
// 将event转成string,写入文件
streamSource.map(new MapFunction<Event, String>() {
@Override
public String map(Event event) throws Exception {
return event.toString();
}
}).addSink(streamingFileSink);
env.execute();
}
}
Flink将数据写入文件
最新推荐文章于 2024-08-22 08:31:00 发布