Hbase之RM读写

最新推荐文章于 2024-07-29 09:10:44 发布

码上_成功

最新推荐文章于 2024-07-29 09:10:44 发布

阅读量141

点赞数

分类专栏： Hbase mapreduce 文章标签： hbase hadoop 大数据

本文链接：https://blog.csdn.net/qq_41924766/article/details/130809444

版权

mapreduce 同时被 2 个专栏收录

6 篇文章 0 订阅

订阅专栏

Hbase

3 篇文章 0 订阅

订阅专栏

一、环境准备

需要准备hadoop、zookeeper以及hbase集群
将HBASE_CLASSPATH加入环境变量：export HBASE_CLASSPATH=`$HBASE_HOME/bin/hbase mapredcp`
并将准备的如下数据上传hdfs，数据按照\t拆分
1001 zhangsan 19
1002 lisi 29
1003 wangwu 19
1004 xiaoming 29
1005 xiaobai 19
1006 xiaoli 29
1007 xiapeng 19
Hbase建表：create “stu”,“info”,“info1”
maven环境依赖

<properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <maven.compiler.source>8</maven.compiler.source>
    <maven.compiler.target>8</maven.compiler.target>
    <hbase.version>2.4.0</hbase.version>
    <hadoop.version>3.1.3</hadoop.version>
</properties>
<dependencies>
    <!--hbase读写-->
    <dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase-client</artifactId>
        <version>${hbase.version}</version>
    </dependency>

    <dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase-server</artifactId>
        <version>${hbase.version}</version>
    </dependency>

    <dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase-mapreduce</artifactId>
        <version>${hbase.version}</version>
    </dependency>
</dependencies>

二、HDFS TO HBASE

2.1、HdfsToHbaeDriver

package com.hpsk.mr.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class HdfsToHbaeDriver implements Tool {
    // 定义Configuration
    private Configuration configuration = null;

    @Override
    public int run(String[] args) throws Exception {
        // 1.获取Job对象
        Job job = Job.getInstance(this.configuration);

        // 2.设置驱动类路径
        job.setJarByClass(HdfsToHbaeDriver.class);

        // 3.设置Mapper&Mapper的输出KV类型
        job.setMapperClass(HdfsToHbaeMapper.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);

        // 4.设置Reducer&Reducer的输出KV类型
        TableMapReduceUtil.initTableReducerJob(args[1],
                HdfsToHbaeReducer.class,
                job);

        // 5.设置输入路径
        FileInputFormat.setInputPaths(job, new Path(args[0]));

        // 6.提交任务
        boolean result = job.waitForCompletion(true);

        return result ? 0: 1;
    }

    @Override
    public void setConf(Configuration configuration) {
        this.configuration = configuration;
    }

    @Override
    public Configuration getConf() {
        return this.configuration;
    }

    public static void main(String[] args) {
        try {
            Configuration configuration = new Configuration();
            int run = ToolRunner.run(configuration, new HdfsToHbaeDriver(), args);
            System.exit(run);
        } catch (Exception e) {
            e.printStackTrace();
        }

    }
}

2.2、HdfsToHbaeMapper

package com.hpsk.mr.hdfs;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;


public class HdfsToHbaeMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        // 直接输出
        context.write(key, value);
    }
}

2.3、HdfsToHbaeReducer

package com.hpsk.mr.hdfs;


import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import java.io.IOException;

public class HdfsToHbaeReducer extends TableReducer<LongWritable, Text, NullWritable> {

    @Override
    protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        for (Text value : values) {
            // 1.切分字段
            String[] fields = value.toString().split("\t");
            // 2.创建put对象
            Put put = new Put(Bytes.toBytes(fields[0]));
            // 3.put赋值
            put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(fields[1]));
            put.addColumn(Bytes.toBytes("info1"), Bytes.toBytes("sex"), Bytes.toBytes(fields[2]));
            // 4.写出
            context.write(NullWritable.get(), put);
        }
    }
}

2.4、YARN提交任务

yarn jar myjars/hbase-1.0-SNAPSHOT.jar com.hpsk.mr.hdfs.HdfsToHbaeDriver /datas/stu.txt stu

三、HBASE TO HBASE

2.1、HBASE创建表

create "stu1","info"

3.1、HbaseToHbaseDriver

package com.hpsk.mr.hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class HbaseToHbaseDriver implements Tool {
    private Configuration configuration = null;
    @Override
    public int run(String[] args) throws Exception {
        // 1.获取Job对象
        Job job = Job.getInstance(this.configuration);

        // 2.设置Mapper的输入输出类
        TableMapReduceUtil.initTableMapperJob(args[0],
                new Scan(),
                HbaseToHbaseMapper.class,
                ImmutableBytesWritable.class,
                Put.class,
                job);

        // 3. 设置Reducer的输出类
        TableMapReduceUtil.initTableReducerJob(args[1],
                HbaseToHbaseReducer.class,
                job);

        // 4.提交任务
        boolean result = job.waitForCompletion(true);

        return result ? 0 : 1;
    }

    @Override
    public void setConf(Configuration configuration) {
        this.configuration = configuration;
    }

    @Override
    public Configuration getConf() {
        return this.configuration;
    }

    public static void main(String[] args) {
        try {
            Configuration configuration = new Configuration();
            int run = ToolRunner.run(configuration, new HbaseToHbaseDriver(), args);
            System.exit(run);
        } catch (Exception e) {
            e.printStackTrace();
        }

    }
}

3.2、HbaseToHbaseMapper

package com.hpsk.mr.hbase;

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;

public class HbaseToHbaseMapper extends TableMapper<ImmutableBytesWritable, Put> {
    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
        // 创建put对象
        Put put = new Put(key.get());
        for (Cell cell : value.rawCells()) {
            // 过滤name列数据
            if ("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) {
                // put对象赋值
                put.add(cell);
            }
        }
        // 写出
        context.write(key, put);
    }
}

3.3、HbaseToHbaseReducer

package com.hpsk.mr.hbase;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;

public class HbaseToHbaseReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
    @Override
    protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
        for (Put value : values) {
            context.write(NullWritable.get(), value);
        }
    }
}

3.4、YARN提交任务

yarn jar myjars/hbase-1.0-SNAPSHOT.jar com.hpsk.mr.hbase.HbaseToHbaseDriver stu stu1

四、HIVE TO HBASE

4.1、HIVE集成HBASE

hive-site.xml文件添加如下配置

<property>
 <name>hive.zookeeper.quorum</name>
 <value>hadoop102,hadoop103,hadoop104</value>
</property>
<property>
 <name>hive.zookeeper.client.port</name>
 <value>2181</value>
</property>

4.2、HIVE中建表关联HBASE

CREATE TABLE hive_hbase_stu_table(
 id int,
 name string,
 age string
)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,info:name,info:age")
TBLPROPERTIES ("hbase.table.name" = "hbase_stu_table");

4.3、HIVE中创建emp表

CREATE TABLE stu(
 id int,
 name string,
 age string
)
row format delimited fields terminated by '\t';

4.4、导入本地数据到emp表

load data local inpath '/opt/datas/stu.txt' into table xtu;

数据：最前面准备的stu.txt

4.5、将emp表数据插入hive_hbase_emp_table

load data local inpath '/opt/datas/stu.txt' into table stu;

4.6、查看HBASE表数据

scan "hbase_stu_table"

五、HBASE TO HIVE

5.1、创建关联HBASE的HIVE外部表

建表语句

CREATE EXTERNAL TABLE relevance_hbase_stu(
 id int,
 name string,
 age string
)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,info:name,info:age") 
TBLPROPERTIES ("hbase.table.name" = "hbase_stu_table");

查看数据
```
select * from relevance_hbase_stu;
```

5.1、基于HIVE统计学生年纪范围

分析数据并插入结果表

create table stu_result as 
select 
case when cast(age as int) < 20 then '20以下'
     when cast(age as int) < 30 then '20到30'
     when cast(age as int) < 40 then '30到40'
     when cast(age as int) < 50 then '40到50'
     else '50以上'
end as age_range,
count(1)
from relevance_hbase_stu 
group by 
case when cast(age as int) < 20 then '20以下'
     when cast(age as int) < 30 then '20到30'
     when cast(age as int) < 40 then '30到40'
     when cast(age as int) < 50 then '40到50'
     else '50以上'
end;

查看结果
```
select * from stu_result ;
```

码上_成功

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Hbase之RM读写

将HBASE_CLASSPATH加入环境变量：export HBASE_CLASSPATH=`$HBASE_HOME/bin/hbase mapredcp`Hbase建表：create “stu”,“info”,“info1”需要准备hadoop、zookeeper以及hbase集群。并将准备的如下数据上传hdfs，数据按照\t拆分。hive-site.xml文件添加如下配置。数据：最前面准备的stu.txt。分析数据并插入结果表。
复制链接

扫一扫

专栏目录