anxiao的HBase和Mapreduce

最新推荐文章于 2023-03-01 11:27:06 发布

时空鱼

最新推荐文章于 2023-03-01 11:27:06 发布

阅读量1.5k

点赞数 2

分类专栏：大数据辅助组件

本文链接：https://blog.csdn.net/qq_43742212/article/details/103867820

版权

大数据辅助组件专栏收录该内容

7 篇文章 0 订阅

订阅专栏

14、HBase与MapReduce的集成
HBase当中的数据最终都是存储在HDFS上面的，HBase天生的支持MR的操作，我们可以通过MR直接处理HBase当中的数据，并且MR可以将处理后的结果直接存储到HBase当中去

需求：读取HBase当中一张表的数据，然后将数据写入到HBase当中的另外一张表当中去。注意：我们可以使用TableMapper与TableReducer来实现从HBase当中读取与写入数据

这里我们将myuser这张表当中f1列族的name和age字段写入到myuser2这张表的f1列族当中去

第三步：开发MR的程序
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class HBaseMR extends Configured implements Tool{

public static class HBaseMapper extends  TableMapper<Text,Put>{
    /**
     *
     * @param key  我们的主键rowkey
     * @param value  我们一行数据所有列的值都封装在value里面了
     * @param context
     * @throws IOException
     * @throws InterruptedException
     */
    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
        byte[] bytes = key.get();
        String rowKey = Bytes.toString(bytes);
        Put put = new Put(key.get());
        Cell[] cells = value.rawCells();
        for (Cell cell : cells) {
            if("f1".equals(Bytes.toString(CellUtil.cloneFamily(cell)))){
                if("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
                    put.add(cell);
                }
                if("age".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
                    put.add(cell);
                }
            }
        }
       if(!put.isEmpty()){
            context.write(new Text(rowKey),put);
        }
    }
}

public static class HBaseReducer extends TableReducer<Text,Put,ImmutableBytesWritable>{
    @Override
    protected void reduce(Text key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
        for (Put value : values) {
            context.write(null,value);
        }
    }
}

  @Override
    public int run(String[] args) throws Exception {
        Job job = Job.getInstance(super.getConf(), "hbaseMr");
        job.setJarByClass(this.getClass());
        Scan scan = new Scan();
        scan.setCaching(500);
        scan.setCacheBlocks(false);
        //使用TableMapReduceUtil 工具类来初始化我们的mapper
        TableMapReduceUtil.initTableMapperJob(TableName.valueOf("myuser"),scan,HBaseMapper.class,Text.class,Put.class,job);
        //使用TableMapReduceUtil 工具类来初始化我们的reducer
        TableMapReduceUtil.initTableReducerJob("myuser2",HBaseReducer.class,job);

        job.setNumReduceTasks(1);

        boolean b = job.waitForCompletion(true);
        return b?0:1;
    }

public static void main(String[] args) throws Exception {
    //创建HBaseConfiguration配置
    Configuration conf =new Configuration();
    int run = ToolRunner.run(configuration, new HBaseMR(), args);
    System.exit(run);

}

}

第四步：打包运行
注意，我们需要使用打包插件，将HBase的依赖jar包都打入到工程jar包里面去
然后执行
yarn jar hbaseStudy-1.0-SNAPSHOT.jar cn.itcast.hbasemr.HBaseMR

或者我们也可以自己设置我们的环境变量

export HADOOP_HOME=/export/servers/hadoop-2.6.0-cdh5.14.0/
export HBASE_HOME=/export/servers/hbase-1.2.0-cdh5.14.0/
export HADOOP_CLASSPATH=${HBASE_HOME}/bin/hbase mapredcp
yarn jar original-hbaseStudy-1.0-SNAPSHOT.jar cn.itcast.hbasemr.HBaseMR

需求二：读取HDFS文件，写入到HBase表当中去

读取hdfs路径/hbase/input/user.txt内容如下
0007 zhangsan 18
0008 lisi 25
0009 wangwu 20

第一步：准备数据文件
准备数据文件，并将数据文件上传到HDFS上面去
hdfs dfs -mkdir -p /hbase/input
cd /export/servers/
vim user.txt

0007 zhangsan 18
0008 lisi 25
0009 wangwu 20
第二步：开发MR程序

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class Hdfs2Hbase extends Configured implements Tool{

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(super.getConf(), "hdfs2Hbase");
    job.setJarByClass(Hdfs2Hbase.class);
    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job,new Path("hdfs://node01:8020/hbase/input"));
    job.setMapperClass(HdfsMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    TableMapReduceUtil.initTableReducerJob("myuser2",HBaseReducer.class,job);
    job.setNumReduceTasks(1);
    boolean b = job.waitForCompletion(true);

    return b?0:1;
}

public static void main(String[] args) throws Exception {
    Configuration conf =new Configuration();
    int run = ToolRunner.run(configuration, new Hdfs2Hbase(), args);
    System.exit(run);
}

public static class HdfsMapper extends Mapper<LongWritable,Text,Text,NullWritable>{
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        context.write(value,NullWritable.get());
    }
}

public static class HBaseReducer extends TableReducer<Text,NullWritable,ImmutableBytesWritable>{

    @Override
    protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
        String[] split = key.toString().split("\t");
        Put put = new Put(Bytes.toBytes(split[0]));
        put.addColumn("f1".getBytes(),"name".getBytes(),split[1].getBytes());
        put.addColumn("f1".getBytes(),"age".getBytes(),Bytes.toBytes(Integer.parseInt(split[2])));
        context.write(new ImmutableBytesWritable(Bytes.toBytes(split[0])),put);
    }
}

}