mapreduce向hbase写数据

用MR向hbase 写数据,在这里只用mapper不用hbase,避免mapper向reduce传输数据产生的IO网络流,从而提高效率。


第一步:编写代码,打jar包


package com.ww.hbase.mrimport;


import java.io.IOException;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;




public class OnlyMapperDriver  extends Configured implements Tool {


public static void main(String[] args) {
 
try {
int response = ToolRunner.run(HBaseConfiguration.create(), new OnlyMapperDriver(), args);
if(response == 0) {
System.out.println("Job is successfully completed...");
} else {
System.out.println("Job failed...");
}
} catch(Exception exception) {
exception.printStackTrace();
}
}
 
 
@Override
public int run(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
Job job = Job.getInstance(conf);

job.setJarByClass(OnlyMapperDriver.class);
job.setMapperClass(OnlyMapper.class);
job.setOutputFormatClass(TableOutputFormat.class); //指定输出格式

job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE,"user");//指定hbase的要插入数据的表名
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Put.class);
job.setNumReduceTasks(0);

FileInputFormat.addInputPath(job, new Path( args[0]));
job.waitForCompletion(true);
if(job.isSuccessful()){
return 0;
}else {
            return 1;
        }

}



}



class OnlyMapper extends Mapper<LongWritable,Text,ImmutableBytesWritable,Put> {

private static final String DATA_SEPERATOR = "\\s+";
Put put = null;


@Override
protected void map(LongWritable key, Text value,
Context context)
throws IOException, InterruptedException {
String values = value.toString();
String [] vals = values.split(DATA_SEPERATOR);
if(vals != null && vals.length == 5){
put = new Put(Bytes.toBytes(vals[0]));
put.add(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(vals[1]));
put.add(Bytes.toBytes("info"), Bytes.toBytes("gender"), Bytes.toBytes(vals[2]));
                        put.add(Bytes.toBytes("data"), Bytes.toBytes("height"), Bytes.toBytes(vals[3]));
                        put.add(Bytes.toBytes("data"), Bytes.toBytes("weight"), Bytes.toBytes(vals[4]));

context.write(new ImmutableBytesWritable(vals[0].getBytes()) ,put);
}
}
}


第二步:上传测试数据到hdfs

 hadoop fs -put /data/test.txt /data/test2

第三步:执行脚本运行作业,以下脚本会有一些问题,hbase相关的jar包并没有加载来,现在做的就是将需要的jar包拷贝到

hadoop目录下:

  HADOOP_CLASSPATH=`/cloud/hbase-0.96.2-hadoop2/bin/hbase classpath` /usr/local/hadoop-2.6.0-cdh5.4.8/bin/hadoop jar OnlyMapperDriver.jar /data/test2


第四步:校验,用hbase的命令行控制,

scan 'user’ 

结果如下所示:






评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值