Flink入门(十五) 写入hdfs文件csv格式,自定义StreamWriterBase

上篇文章Flink入门(七) 写入HDFS
现在,我想把java-bean写入csv文件。
有两种方式,第一种将增加map,将bean改成string类型(以“,”分割),但不可以复用。
第二种方式,重写StreamWriterBase,类似于StringWriter。
代码如下

import org.apache.flink.api.java.io.CsvOutputFormat;
import org.apache.flink.streaming.connectors.fs.StreamWriterBase;
import org.apache.flink.streaming.connectors.fs.Writer;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.lang.reflect.Field;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;

public class BeanCsvWriter<T> extends StreamWriterBase<T> {

    private static final Logger LOG = LoggerFactory.getLogger(BeanCsvWriter.class);

    private static final long serialVersionUID = 1L;
    private final String charsetName;
    private transient Charset charset;
    private String fieldDelimiter;
    private String recordDelimiter;
    private String[] fieldNames;

    public BeanCsvWriter() {
        this("UTF-8", null, CsvOutputFormat.DEFAULT_FIELD_DELIMITER, CsvOutputFormat.DEFAULT_LINE_DELIMITER);
    }

    public BeanCsvWriter(String fieldDelimiter) {
        this("UTF-8", null, fieldDelimiter, CsvOutputFormat.DEFAULT_LINE_DELIMITER);
    }

    public BeanCsvWriter(String[] fieldNames) {
        this("UTF-8", fieldNames, CsvOutputFormat.DEFAULT_FIELD_DELIMITER, CsvOutputFormat.DEFAULT_LINE_DELIMITER);
    }

    public BeanCsvWriter(String[] fieldNames, String fieldDelimiter) {
        this("UTF-8", fieldNames, fieldDelimiter, CsvOutputFormat.DEFAULT_LINE_DELIMITER);
    }

    public BeanCsvWriter(String charsetName, String[] fieldNames, String fieldDelimiter, String recordDelimiter) {
        this.charsetName = charsetName;
        this.fieldNames = fieldNames;
        this.fieldDelimiter = fieldDelimiter;
        this.recordDelimiter = recordDelimiter;
    }

    @Override
    public void open(FileSystem fs, Path path) throws IOException {
        super.open(fs, path);
        try {
            this.charset = Charset.forName(charsetName);
        } catch (IllegalCharsetNameException ex) {
            throw new IOException("The charset " + charsetName + " is not valid.", ex);
        } catch (UnsupportedCharsetException ex) {
            throw new IOException("The charset " + charsetName + " is not supported.", ex);
        }
    }

    @Override
    public void write(T element) throws IOException {
        FSDataOutputStream outputStream = getStream();
        try {
            writeRow(element, outputStream);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private void writeRow(T element, FSDataOutputStream out) throws IOException, IllegalAccessException, NoSuchFieldException {
        Class<?> clazz = element.getClass();
        String line;
        if (fieldNames != null) {
            line = fieldNamesNotNull(element, clazz);
        } else {
            line = fieldNameIsNull(element, clazz);
        }
        out.write(line.getBytes(charset));
    }

    private String fieldNamesNotNull(T element, Class<?> clazz) throws NoSuchFieldException, IllegalAccessException {
        StringBuilder sb = new StringBuilder();
        for (String fieldName : fieldNames) {
            Field field = clazz.getDeclaredField(fieldName);
            field.setAccessible(true);
            Object obj = field.get(element);
            if (obj != null) {
                sb.append(obj.toString());
            }
            sb.append(this.fieldDelimiter);
        }
        sb.deleteCharAt(sb.length() - 1);
        sb.append(this.recordDelimiter);
        return sb.toString();
    }

    //所有field字段全写入
    private String fieldNameIsNull(T element, Class<?> clazz) throws IllegalAccessException {
        StringBuilder sb = new StringBuilder();
        Field[] fields = clazz.getDeclaredFields();
        for (Field field : fields) {
            field.setAccessible(true);
            Object obj = field.get(element);
            if (obj != null) {
                sb.append(obj.toString());
            }
            sb.append(this.fieldDelimiter);
        }
        sb.deleteCharAt(sb.length() - 1);
        sb.append(this.recordDelimiter);
        return sb.toString();
    }

    @Override
    public Writer<T> duplicate() {
        return new BeanCsvWriter(charsetName, fieldNames, fieldDelimiter, recordDelimiter);
    }
}

调用方式

 		DataStream<BaseTransferBO> detailStream....;
        BucketingSink<BaseTransferBO> sink = new BucketingSink<BaseTransferBO>("/data/sss/plan_detail_product_new/");
        sink.setBucketer(new DateTimeBucketer<BaseTransferBO>("yyyy-MM-dd", ZoneId.of("Asia/Shanghai")));
        sink.setBatchSize(1024*1024*1024*50L); // this is 50G,
        sink.setBatchRolloverInterval(60*60*1000L); // this is 60 mins
        sink.setPendingPrefix("");
        sink.setPendingSuffix("");
        sink.setInProgressPrefix(".");
        //重写writer,BeanCsvWriter
        sink.setWriter(new BeanCsvWriter<BaseTransferBO>("$"));
        detailStream.addSink(sink).name("detail_log_sink").setParallelism(2);

就可以生成以’$'为分割符有的csv文件了。

  • 2
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值