将文件中的数据导入到HBase数据表
其实该案例的思想和HBase表数据的转移没有太大不同,思路总体还是一样的,只不过这次Mapper不是从HBase的表里读取数据了,而是从HDFS上的文件中读取数据,所以Mapper可直接继承自HDFS的Mapper。
package com.zhanbk.mr2;
import java.io.IOException;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class ReadFruitFromHDFSMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String line = value.toString();
//初始化列族和列名名称数组
String[] CFs = {"info","other"};
String[] qualifiers = {"name","color"};
//数据导入过程可能会伴随着清洗
//数据模型
/*
* 00001 apple red
*/
//抽取字段数据
String[] values = line.split("\t");
String row = values[0];
String name = values[1];
String color = values[2];
//创建rowKey对象
ImmutableBytesWritable immutableBytesWritable = new ImmutableBytesWritable(Bytes.toBytes(row));
//创建Put对象,组装Cell
Put put = new Put(Bytes.toBytes(row));
put.add(Bytes.toBytes(CFs[0]),Bytes.toBytes(qualifiers[0]),Bytes.toBytes(name));
put.add(Bytes.toBytes(CFs[0]),Bytes.toBytes(qualifiers[1]),Bytes.toBytes(color));
context.write(immutableBytesWritable, put);
}
}
package com.zhanbk.mr2;
import java.io.IOException;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
public class WriteHBaseFruitReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable>{
@Override
protected void reduce(ImmutableBytesWritable immutableBytesWritable, Iterable<Put> puts, Context context)
throws IOException, InterruptedException {
for(Put put : puts){
context.write(NullWritable.get(), put);
}
}
}
package com.zhanbk.mr2;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class HDFS2HBaseDriver extends Configured implements Tool {
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
//组装Job
Job job = Job.getInstance(conf);
job.setJarByClass(HDFS2HBaseDriver.class);
//指定输入路径
Path inPath = new Path("hdfs://zhangbk:9000/input_hbase/fruit.tsv");
FileInputFormat.addInputPath(job, inPath);
//组装访问HDFS的Mapper
job.setMapperClass(ReadFruitFromHDFSMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(Put.class);
TableMapReduceUtil.initTableReducerJob(
"fruit_from_hdfs",
WriteHBaseFruitReducer.class,
job);
job.setNumReduceTasks(1);
boolean isSucceed = job.waitForCompletion(true);
System.out.println(isSucceed);
return isSucceed ? 0 : 1;
}
public static void main(String[] args) {
Configuration conf = HBaseConfiguration.create();
HDFS2HBaseDriver h2h = new HDFS2HBaseDriver();
try {
int status = ToolRunner.run(conf, h2h, args);
System.exit(status); //System.exit()这个方法,接收一个参数status,0表示正常退出,非零参数表示非正常退出。
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}