1)将HBase表中数据导出到文件中(export),表的一行数据就是文件的一行数据,列之间的分隔符为制表符。
a)importtsv工具使用测试
HBase2Hdfs
51.5. HBase MapReduce Summary to File Example
将HBase表中的数据导出到文件中
* input :HBase table
a)importtsv工具使用测试
HBase2Hdfs
51.5. HBase MapReduce Summary to File Example
将HBase表中的数据导出到文件中
* input :HBase table
* output:hdfs files
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/*
需求:
将HBase表中数据导出到文件中(export),表的一行数据就是文件的一行数据,列之间的分隔符为制表符。
将HBase表中的数据导出到文件中
*input :HBase table
*output:hdfs files
*/
public class User2HDFSMapReduce extends Configured implements Tool {
// Mapper Class
// Mapper<ImmutableBytesWritable, Result, KEYOUT, VALUEOUT>
public static class ReadUserMapper extends TableMapper<Text, Text> {
private Text mapOutputKey = new Text();
private Text mapOutputValue = new Text();
@Override
protected void map(ImmutableBytesWritable key, Result value,
Context context) throws IOException, InterruptedException {
// get rowkey
String rowkey = Bytes.toString(key.get());
// set map ouput key
mapOutputKey.set(rowkey);
StringBuffer sb =new StringBuffer();
// iterator
for(Cell cell : value.rawCells()){
sb.append(Bytes.toString(CellUtil.cloneFamily(cell)))
.append(":")// +
.append(Bytes.toString(CellUtil.cloneQualifier(cell)))
.append("=")
.append(Bytes.toString(CellUtil.cloneValue(cell)))
.append("\t");
}
mapOutputValue.set(sb.toString());
context.write(mapOutputKey, mapOutputValue);
}
}
// Driver
public int run(String[] args) throws Exception {
if (args.length < 2) {
usage("Wrong number of arguments: " + args.length);
System.exit(-1);
}
// create job
Job job = Job.getInstance(this.getConf(),//
this.getClass().getSimpleName());
// set run job class
job.setJarByClass(this.getClass());
// set job
Scan scan = new Scan();
scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
scan.setCacheBlocks(false); // don't set to true for MR jobs
// set other scan attrs
TableMapReduceUtil.initTableMapperJob(
args[0], // input table
scan, // Scan instance to control CF and attribute selection
ReadUserMapper.class, // mapper class
Text.class, // mapper output key
Put.class, // mapper output value
job//
);
job.setNumReduceTasks(0); // at least one, adjust as required
//output format
job.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean b = job.waitForCompletion(true);
if (!b) {
throw new IOException("error with job!");
}
return 0;
}
/*
* @param errorMsg Error message. Can be null.
*/
private static void usage(final String errorMsg) {
if (errorMsg != null && errorMsg.length() > 0) {
System.err.println("ERROR: " + errorMsg);
}
System.err.println("Usage: User2HDFSMapReduce <tablename> <outputdir> ");
System.err.println("Examples: user hdfs://172.27.35.8:8020/user/hadoop/export/user");
}
public static void main(String[] args) throws Exception {
// get configuration
Configuration conf = HBaseConfiguration.create();
// submit job
int status = ToolRunner.run(//
conf, //
new User2HDFSMapReduce(), //
args//
);
// exit program
System.exit(status);
}
}