用MR向hbase 写数据,在这里只用mapper不用hbase,避免mapper向reduce传输数据产生的IO网络流,从而提高效率。
第一步:编写代码,打jar包
package com.ww.hbase.mrimport;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class OnlyMapperDriver extends Configured implements Tool {
public static void main(String[] args) {
try {
int response = ToolRunner.run(HBaseConfiguration.create(), new OnlyMapperDriver(), args);
if(response == 0) {
System.out.println("Job is successfully completed...");
} else {
System.out.println("Job failed...");
}
} catch(Exception exception) {
exception.printStackTrace();
}
}
@Override
public int run(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
Job job = Job.getInstance(conf);
job.setJarByClass(OnlyMapperDriver.class);
job.setMapperClass(OnlyMapper.class);
job.setOutputFormatClass(TableOutputFormat.class); //指定输出格式
job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE,"user");//指定hbase的要插入数据的表名
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Put.class);
job.setNumReduceTasks(0);
FileInputFormat.addInputPath(job, new Path( args[0]));
job.waitForCompletion(true);
if(job.isSuccessful()){
return 0;
}else {
return 1;
}
}
}
class OnlyMapper extends Mapper<LongWritable,Text,ImmutableBytesWritable,Put> {
private static final String DATA_SEPERATOR = "\\s+";
Put put = null;
@Override
protected void map(LongWritable key, Text value,
Context context)
throws IOException, InterruptedException {
String values = value.toString();
String [] vals = values.split(DATA_SEPERATOR);
if(vals != null && vals.length == 5){
put = new Put(Bytes.toBytes(vals[0]));
put.add(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(vals[1]));
put.add(Bytes.toBytes("info"), Bytes.toBytes("gender"), Bytes.toBytes(vals[2]));
put.add(Bytes.toBytes("data"), Bytes.toBytes("height"), Bytes.toBytes(vals[3]));
put.add(Bytes.toBytes("data"), Bytes.toBytes("weight"), Bytes.toBytes(vals[4]));
context.write(new ImmutableBytesWritable(vals[0].getBytes()) ,put);
}
}
}
第二步:上传测试数据到hdfs
hadoop fs -put /data/test.txt /data/test2
第三步:执行脚本运行作业,以下脚本会有一些问题,hbase相关的jar包并没有加载来,现在做的就是将需要的jar包拷贝到
hadoop目录下:
HADOOP_CLASSPATH=`/cloud/hbase-0.96.2-hadoop2/bin/hbase classpath` /usr/local/hadoop-2.6.0-cdh5.4.8/bin/hadoop jar OnlyMapperDriver.jar /data/test2
第四步:校验,用hbase的命令行控制,
scan 'user’
结果如下所示: