简介:HBase 作为输出源,即从其他存储介质中,使用MapReduce计算后将结果输出到HBase中。
下面直接上代码:
- 主程序
package apache.org.myhbase.asoutput;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
/**
* 是 HBaseASOutPutMR 的启动方法,配置作业
* @author 13277
*
*/
public class HBaseMR {
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
conf.set(TableOutputFormat.OUTPUT_TABLE, "access-log");
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
Job job = Job.getInstance(conf, "Hbase_Mr");
job.setNumReduceTasks(0);
job.setJarByClass(HBaseMRTest.class);
job.setMapperClass(HBaseASOutPutMR.class);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Put.class);
job.setOutputFormatClass(TableOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
- 主程序中设置的Mapper
package apache.org.myhbase.asoutput;
import java.io.IOException;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**
* 从HDFS读取文件内容后,解析成要要插进HBase的记录
* @author 13277
*
*/
public class HBaseASOutPutMR extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] strs = value.toString().split(" ");
String rowkey = strs[0] + "-" + strs[3].substring(1);
byte[] row = Bytes.toBytes(rowkey);
byte[] family = Bytes.toBytes("info");
byte[] qualifier = Bytes.toBytes("url");
byte[] values = Bytes.toBytes(strs[6]);
Put put = new Put(row);
put.add(family, qualifier, values);
context.write(new ImmutableBytesWritable(row), put);
}
}
PS:
在Hadoop任务配置中,使用FileInputFormat从HDFS读取数据,输出需要配置4项内容:输出格式为TableOutputFormat,输出表名,输出数据的Key和Value的类型。