MR读取HBase

最新推荐文章于 2022-04-15 16:03:30 发布

小胖超凶哦！

最新推荐文章于 2022-04-15 16:03:30 发布

阅读量1k

点赞数 1

分类专栏：初学大数据 HBase基础文章标签： hbase

本文链接：https://blog.csdn.net/ZZJXP/article/details/124187643

版权

初学大数据同时被 2 个专栏收录

158 篇文章 1 订阅

订阅专栏

HBase基础

13 篇文章 0 订阅

订阅专栏

[root@master ~]# hive

Logging initialized using configuration in jar:file:/usr/local/soft/hive-1.2.1/lib/hive-common-1.2.1.jar!/hive-log4j.properties
hive> create external table students_hbase(
    >     id string,
    >     name string,
    >     age string,
    >     gender string, 
    >     clazz string
    > ) stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
    > with serdeproperties ("hbase.columns.mapping" = "
    >     :key,
    >     info:name,
    >     info:age,
    >     info:gender,
    >     info:clazz
    > ")
    > tblproperties("hbase.table.name" = "default:stu");
OK
Time taken: 1.402 seconds
hive> show tables;
OK
students_hbase
Time taken: 0.077 seconds, Fetched: 1 row(s)
hive> desc students_hbase;
OK
id                  	string              	from deserializer   
name                	string              	from deserializer   
age                 	string              	from deserializer   
gender              	string              	from deserializer   
clazz               	string              	from deserializer   
Time taken: 0.093 seconds, Fetched: 5 row(s)
hive> select * from students_hbase limit 10;
OK
1500100001	施笑槐	22	女	文科六班
1500100002	吕金鹏	24	男	文科六班
1500100003	单乐蕊	22	女	理科六班
1500100004	葛德曜	24	男	理科三班
1500100005	宣谷芹	22	女	理科五班
1500100006	边昂雄	21	男	理科二班
1500100007	尚孤风	23	女	文科六班
1500100008	符半双	22	女	理科六班
1500100009	沈德昌	21	男	理科一班
1500100010	羿彦昌	23	男	理科六班
Time taken: 0.179 seconds, Fetched: 10 row(s)
hive> select clazz,gender,count(*) from students_hbase group by clazz,gender;
OK
文科一班	女	41
文科一班	男	31
文科三班	女	44
文科三班	男	50
文科二班	女	38
文科二班	男	49
文科五班	女	41
文科五班	男	43
文科六班	女	49
文科六班	男	55
文科四班	女	42
文科四班	男	39
理科一班	女	47
理科一班	男	31
理科三班	女	33
理科三班	男	35
理科二班	女	43
理科二班	男	36
理科五班	女	37
理科五班	男	33
理科六班	女	37
理科六班	男	55
理科四班	女	41
理科四班	男	50
Time taken: 35.317 seconds, Fetched: 24 row(s)

<dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase-server</artifactId>
    <version> 1.4.6</version>
</dependency>

package com.shujia;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

//统计班级人数 最后写入HDFS
public class Demo05MRReadHBase {
    //Map端
    public static class ReadHBaseMapper extends TableMapper<Text, IntWritable>{
        @Override
        /**
         * key 对应rowkey
         * value 对应hbase的一条数据
         */
        protected void map(ImmutableBytesWritable key, Result value, Mapper<ImmutableBytesWritable, Result, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            String clazz = Bytes.toString(value.getValue("info".getBytes(), "clazz".getBytes()));

            context.write(new Text(clazz),new IntWritable(1));
        }
    }

    //Reduce端
    public static class  MyReducer extends Reducer<Text,IntWritable,Text, IntWritable> {
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            int cnt=0;
            for (IntWritable value : values) {
                cnt+=value.get();
            }
            context.write(key,new IntWritable(cnt));
        }
    }

    //Driver端
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum","master:2181,node1:2181,node2:2181");
        conf.set("fs.defaultFS","hdfs://master:9000");

        Job job = Job.getInstance(conf);
        job.setJobName("Demo05MRReadHBase");
        job.setJarByClass(Demo05MRReadHBase.class);

        TableName stu = TableName.valueOf("stu");
        Scan scan = new Scan();

        //配置Map端
        TableMapReduceUtil.initTableMapperJob(
                stu
                ,scan
                ,ReadHBaseMapper.class
                ,Text.class
                ,IntWritable.class
                ,job
        );

        //配置Reduce端
        job.setReducerClass(MyReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        //配置输入输出路径
        Path path = new Path("/data/mr/stu_cnt");
        FileSystem fs = FileSystem.get(conf);
        if(fs.exists(path)){
            fs.delete(path,true);
        }
        FileOutputFormat.setOutputPath(job,path);
        job.waitForCompletion(true);
    }
}

<build>
        <plugins>
            <!-- Java Compiler -->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.1</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                </configuration>
            </plugin>

            <!-- 带依赖jars插件 -->
            <plugin>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>3.3.0</version>
                <configuration>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>

[root@master jars]# ls
hadoop-1.0-SNAPSHOT.jar  HIve-1.0-SNAPSHOT.jar
HBase-1.0-SNAPSHOT.jar   HiveUDF-1.0.jar
[root@master jars]# rz -E
rz waiting to receive.
[root@master jars]# ls
hadoop-1.0-SNAPSHOT.jar
HBase-1.0-SNAPSHOT.jar
HBase-1.0-SNAPSHOT-jar-with-dependencies.jar
HIve-1.0-SNAPSHOT.jar
HiveUDF-1.0.jar
[root@master jars]# hadoop jar HBase-1.0-SNAPSHOT-jar-with-dependencies.jar com.shujia.Demo05MRReadHBase
[root@master jars]# hdfs dfs -cat /data/mr/stu_cnt/*
文科一班	72
文科三班	94
文科二班	87
文科五班	84
文科六班	104
文科四班	81
理科一班	78
理科三班	68
理科二班	79
理科五班	70
理科六班	92
理科四班	91