Hbase之RM读写


一、环境准备

  1. 需要准备hadoop、zookeeper以及hbase集群

  2. 将HBASE_CLASSPATH加入环境变量:export HBASE_CLASSPATH=`$HBASE_HOME/bin/hbase mapredcp`

  3. 并将准备的如下数据上传hdfs,数据按照\t拆分
    1001 zhangsan 19
    1002 lisi 29
    1003 wangwu 19
    1004 xiaoming 29
    1005 xiaobai 19
    1006 xiaoli 29
    1007 xiapeng 19

  4. Hbase建表:create “stu”,“info”,“info1”

  5. maven环境依赖

<properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <maven.compiler.source>8</maven.compiler.source>
    <maven.compiler.target>8</maven.compiler.target>
    <hbase.version>2.4.0</hbase.version>
    <hadoop.version>3.1.3</hadoop.version>
</properties>
<dependencies>
    <!--hbase读写-->
    <dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase-client</artifactId>
        <version>${hbase.version}</version>
    </dependency>

    <dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase-server</artifactId>
        <version>${hbase.version}</version>
    </dependency>

    <dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase-mapreduce</artifactId>
        <version>${hbase.version}</version>
    </dependency>
</dependencies>

二、HDFS TO HBASE

2.1、HdfsToHbaeDriver

package com.hpsk.mr.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class HdfsToHbaeDriver implements Tool {
    // 定义Configuration
    private Configuration configuration = null;

    @Override
    public int run(String[] args) throws Exception {
        // 1.获取Job对象
        Job job = Job.getInstance(this.configuration);

        // 2.设置驱动类路径
        job.setJarByClass(HdfsToHbaeDriver.class);

        // 3.设置Mapper&Mapper的输出KV类型
        job.setMapperClass(HdfsToHbaeMapper.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);

        // 4.设置Reducer&Reducer的输出KV类型
        TableMapReduceUtil.initTableReducerJob(args[1],
                HdfsToHbaeReducer.class,
                job);

        // 5.设置输入路径
        FileInputFormat.setInputPaths(job, new Path(args[0]));

        // 6.提交任务
        boolean result = job.waitForCompletion(true);

        return result ? 0: 1;
    }

    @Override
    public void setConf(Configuration configuration) {
        this.configuration = configuration;
    }

    @Override
    public Configuration getConf() {
        return this.configuration;
    }

    public static void main(String[] args) {
        try {
            Configuration configuration = new Configuration();
            int run = ToolRunner.run(configuration, new HdfsToHbaeDriver(), args);
            System.exit(run);
        } catch (Exception e) {
            e.printStackTrace();
        }

    }
}

2.2、HdfsToHbaeMapper

package com.hpsk.mr.hdfs;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;


public class HdfsToHbaeMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        // 直接输出
        context.write(key, value);
    }
}

2.3、HdfsToHbaeReducer

package com.hpsk.mr.hdfs;


import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import java.io.IOException;

public class HdfsToHbaeReducer extends TableReducer<LongWritable, Text, NullWritable> {

    @Override
    protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        for (Text value : values) {
            // 1.切分字段
            String[] fields = value.toString().split("\t");
            // 2.创建put对象
            Put put = new Put(Bytes.toBytes(fields[0]));
            // 3.put赋值
            put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(fields[1]));
            put.addColumn(Bytes.toBytes("info1"), Bytes.toBytes("sex"), Bytes.toBytes(fields[2]));
            // 4.写出
            context.write(NullWritable.get(), put);
        }
    }
}

2.4、YARN提交任务

yarn jar myjars/hbase-1.0-SNAPSHOT.jar com.hpsk.mr.hdfs.HdfsToHbaeDriver /datas/stu.txt stu

三、HBASE TO HBASE

2.1、HBASE创建表

create "stu1","info"

3.1、HbaseToHbaseDriver

package com.hpsk.mr.hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class HbaseToHbaseDriver implements Tool {
    private Configuration configuration = null;
    @Override
    public int run(String[] args) throws Exception {
        // 1.获取Job对象
        Job job = Job.getInstance(this.configuration);

        // 2.设置Mapper的输入输出类
        TableMapReduceUtil.initTableMapperJob(args[0],
                new Scan(),
                HbaseToHbaseMapper.class,
                ImmutableBytesWritable.class,
                Put.class,
                job);

        // 3. 设置Reducer的输出类
        TableMapReduceUtil.initTableReducerJob(args[1],
                HbaseToHbaseReducer.class,
                job);

        // 4.提交任务
        boolean result = job.waitForCompletion(true);

        return result ? 0 : 1;
    }

    @Override
    public void setConf(Configuration configuration) {
        this.configuration = configuration;
    }

    @Override
    public Configuration getConf() {
        return this.configuration;
    }

    public static void main(String[] args) {
        try {
            Configuration configuration = new Configuration();
            int run = ToolRunner.run(configuration, new HbaseToHbaseDriver(), args);
            System.exit(run);
        } catch (Exception e) {
            e.printStackTrace();
        }

    }
}

3.2、HbaseToHbaseMapper

package com.hpsk.mr.hbase;

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;

public class HbaseToHbaseMapper extends TableMapper<ImmutableBytesWritable, Put> {
    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
        // 创建put对象
        Put put = new Put(key.get());
        for (Cell cell : value.rawCells()) {
            // 过滤name列数据
            if ("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) {
                // put对象赋值
                put.add(cell);
            }
        }
        // 写出
        context.write(key, put);
    }
}

3.3、HbaseToHbaseReducer

package com.hpsk.mr.hbase;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;

public class HbaseToHbaseReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
    @Override
    protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
        for (Put value : values) {
            context.write(NullWritable.get(), value);
        }
    }
}

3.4、YARN提交任务

yarn jar myjars/hbase-1.0-SNAPSHOT.jar com.hpsk.mr.hbase.HbaseToHbaseDriver stu stu1

四、HIVE TO HBASE

4.1、HIVE集成HBASE

  1. hive-site.xml文件添加如下配置

    <property>
     <name>hive.zookeeper.quorum</name>
     <value>hadoop102,hadoop103,hadoop104</value>
    </property>
    <property>
     <name>hive.zookeeper.client.port</name>
     <value>2181</value>
    </property>
    

4.2、HIVE中建表关联HBASE

CREATE TABLE hive_hbase_stu_table(
 id int,
 name string,
 age string
)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,info:name,info:age")
TBLPROPERTIES ("hbase.table.name" = "hbase_stu_table");

4.3、HIVE中创建emp表

CREATE TABLE stu(
 id int,
 name string,
 age string
)
row format delimited fields terminated by '\t';

4.4、导入本地数据到emp表

load data local inpath '/opt/datas/stu.txt' into table xtu;

数据:最前面准备的stu.txt

4.5、将emp表数据插入hive_hbase_emp_table

load data local inpath '/opt/datas/stu.txt' into table stu;

4.6、查看HBASE表数据

scan "hbase_stu_table"

五、HBASE TO HIVE

5.1、创建关联HBASE的HIVE外部表

  1. 建表语句

    CREATE EXTERNAL TABLE relevance_hbase_stu(
     id int,
     name string,
     age string
    )
    STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
    WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,info:name,info:age") 
    TBLPROPERTIES ("hbase.table.name" = "hbase_stu_table");
    
  2. 查看数据

    select * from relevance_hbase_stu;
    

5.1、基于HIVE统计学生年纪范围

  1. 分析数据并插入结果表

    create table stu_result as 
    select 
    case when cast(age as int) < 20 then '20以下'
         when cast(age as int) < 30 then '20到30'
         when cast(age as int) < 40 then '30到40'
         when cast(age as int) < 50 then '40到50'
         else '50以上'
    end as age_range,
    count(1)
    from relevance_hbase_stu 
    group by 
    case when cast(age as int) < 20 then '20以下'
         when cast(age as int) < 30 then '20到30'
         when cast(age as int) < 40 then '30到40'
         when cast(age as int) < 50 then '40到50'
         else '50以上'
    end;
    
  2. 查看结果

    select * from stu_result ;
    
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值