java rcfile,Mapreduce中的RCFile输出RCFileOutputFormat实现及其应用 | 学步园

自定义实现RCFileOutputFormat.javaimport java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.compress.DefaultCodec;

import org.apache.hadoop.hive.ql.io.RCFile;

import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.WritableComparable;

import org.apache.hadoop.io.compress.CompressionCodec;

import org.apache.hadoop.mapreduce.RecordWriter;

import org.apache.hadoop.mapreduce.TaskAttemptContext;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.ReflectionUtils;

/**

* RCFileOutputFormat.

*

*/

public class RCFileOutputFormat

extends

FileOutputFormat, BytesRefArrayWritable> {

/**

* set number of columns into the given configuration.

*

* @param conf

* configuration instance which need to set the column number

* @param columnNum

* column number for RCFile's Writer

*

*/

public static void setColumnNumber(Configuration conf, int columnNum) {

assert columnNum > 0;

conf.setInt(RCFile.COLUMN_NUMBER_CONF_STR, columnNum);

}

/**

* Returns the number of columns set in the conf for writers.

*

* @param conf

* @return number of columns for RCFile's writer

*/

public static int getColumnNumber(Configuration conf) {

return conf.getInt(RCFile.COLUMN_NUMBER_CONF_STR, 0);

}

@Override

public RecordWriter, BytesRefArrayWritable> getRecordWriter(

TaskAttemptContext arg0) throws IOException, InterruptedException {

Configuration conf = arg0.getConfiguration();

conf.setBoolean("mapred.output.compress", true);

Path outputPath = FileOutputFormat.getOutputPath(arg0);

FileSystem fs = outputPath.getFileSystem(conf);

if (!fs.exists(outputPath)) {

fs.mkdirs(outputPath);

}

Path file = getDefaultWorkFile(arg0, "");

CompressionCodec codec = null;

if (getCompressOutput(arg0)) {

Class> codecClass = getOutputCompressorClass(arg0,

DefaultCodec.class);

codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass,

conf);

}

final RCFile.Writer out = new RCFile.Writer(fs, conf, file, null, codec);

return new RecordWriter, BytesRefArrayWritable>() {

@Override

public void write(WritableComparable key,

BytesRefArrayWritable value) throws IOException {

out.append(value);

}

@Override

public void close(TaskAttemptContext arg0) throws IOException,

InterruptedException {

out.close();

}

};

}

}

应用:job.setOutputFormatClass(RCFileOutputFormat.class);

job.setOutputValueClass(BytesRefArrayWritable.class);

BytesRefArrayWritable values = new BytesRefArrayWritable(COLUMNS);

values.set(0, new BytesRefWritable(fuid.getBytes()));

values.set(1, new BytesRefWritable(this.sid.getBytes()));

values.set(2, new BytesRefWritable(this.times.getBytes());

context.write(new Text(this.uid), values);

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值