public class HdfsSourceAndSink { public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); // TODO 读取hdfs DataStreamSource<String> stringDataStreamSource = env.readTextFile( "hdfs://localhost//user//flink//date//01.txt); // TODO 写入hdfs stringDataStreamSource.addSink(new MySinkToHdfs()).setParallelism(1); stringDataStreamSource.print(); env.execute(); } }
// 自定义MySinkToHdfs
public class MySinkToHdfs extends RichSinkFunction { private FileSystem fs = null; private SimpleDateFormat sd = null; private String str = null; @Override public void open(Configuration parameters) throws Exception { super.open(parameters); // 自定义指定路径 str = "hdfs://localhost//user//flink//test//"; // 获取配置 org.apache.hadoop.conf.Configuration cf = new org.apache.hadoop.conf.Configuration(); cf.setBoolean("dfs.support.append", true); fs = FileSystem.get(cf); // 格式化日期(可用于给写入文件自定义名称) sd = new SimpleDateFormat("yyyy-MM-dd"); } @Override public void invoke(Object value, Context context) throws Exception { if (value != null) { // 拼接路径 String format = sd.format(new Date()); StringBuilder stringBuilder = new StringBuilder(); stringBuilder.append(str) .append(format); Path path = new Path(stringBuilder.toString()); // 判断路径是否存在 System.out.println("路径是否存在:" + fs.exists(path)); FSDataOutputStream ops = null; try { if (fs.exists(path)) { ops = fs.append(path); } else { ops = fs.create(path, false); } ops.write((value + "\n").getBytes("UTF-8")); }catch (IOException e) { e.printStackTrace(); } finally { ops.close(); } } } @Override public void close() throws Exception { fs.close(); super.close(); } }