小白学hadoop日记day17——hbase与mr和hive的整合

最新推荐文章于 2022-04-20 10:27:32 发布

兰翎翡竹

最新推荐文章于 2022-04-20 10:27:32 发布

阅读量176

点赞数

本文链接：https://blog.csdn.net/qq_42515611/article/details/118863448

版权

hbase 和 mr整合

hdfs 100G ---导入-> hbase中

import com.al.util.HbaseUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import java.io.IOException;

/**
 * 读取hdfs中的数据，并将其通过mr存储到hbase中
 */
public class HbaseOnMR {
    //自定义Map阶段
    public static class MyMapper extends Mapper<LongWritable, Text,Text,Text>{
            @Override
            protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
                String[] s = value.toString().split(" ");
                context.write(new Text(s[0]+"_"+s[1]),new Text(s[2]));
            }
        }

        //自定义reduce阶段
        public static class MyReducer extends TableReducer<Text, Text,ImmutableBytesWritable> {
            @Override
            protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
                //定义一个累加器
                int couter = 0;
                for(Text i:values){
                    couter += Integer.parseInt(i.toString());
                }

                //构造输出到hbase的对象
                Put put = new Put(Bytes.toBytes(key.toString()));
                //为put添加数据
                put.addImmutable(Bytes.toBytes("data"),
                        Bytes.toBytes("accesss"),
                        Bytes.toBytes(couter+""));
                //将数据输出
                context.write(new ImmutableBytesWritable(),put);
            }
        }

        /**
         * 驱动程序
         * @param args
         */
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            //输出表名
            String outputTableName = "report";

            //获取admin对象
            Admin admin = HbaseUtil.getAdmin();

            //获取htable对象
            Table hTable = HbaseUtil.getTable(outputTableName);
            //判断输出表是否存在，如果不存在，则创建
            if(!admin.tableExists(TableName.valueOf(outputTableName))){
                //不存在创建
                HTableDescriptor hTableDescriptor = new HTableDescriptor(TableName.valueOf(outputTableName));
                HColumnDescriptor hColumnDescriptor = new HColumnDescriptor(Bytes.toBytes("data"));
                hTableDescriptor.addFamily(hColumnDescriptor);
                //使用admin提交创建
                admin.createTable(hTableDescriptor);
            }

            //获取job实例
            Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://hadoop01:8020");
        conf.set("hbase.zookeeper.quorum","hadoop01:2181,hadoop02:2181,hadoop03:2181");
        Job job = Job.getInstance(conf, "hdfs2base");
        job.setJarByClass(HbaseOnMR.class);
        //初始化map
        job.setMapperClass(MyMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        FileInputFormat.addInputPath(job,new Path(args[0]));
        //初始化reduce
        TableMapReduceUtil.initTableReducerJob(outputTableName,MyReducer.class,job);
        //将job进行提交
        boolean b = job.waitForCompletion(true);
        //退出
        System.exit(b?0:1);
    }
}

hive整合hbase
1. HBase的最主要的目的是做数据存储
2. Hive的最主要作用是做数据分析
3. 整合的目的是为了方便的在项目中存储+分析
4. hbase中的表数据在hive中能看到，hive中的表数据在hbase中也能看到

hive中创建hbase表

1.在hive中创建hbase能看到的表：
create table if not exists hive2hbase(
    name string,
    sex string,
    age string)
    stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
    with serdeproperties(
  "hbase.columns.mapping"=":key,data:age,data:sex")
  tblproperties("hbase.table.name"="hht");
#hbase中put数据  
put 'hht','goudan','data:age','18'
put 'hht','goudan','data:sex','1'
put 'hht','cuihua','data:age','13'
put 'hht','cuihua','data:sex','2'
put 'hht','shanjige','data:sex','3'

hive (default)>select * from hive2hbase;

hive中创建表映射hbase的表

#如果hbase中已经存在表，则需要使用外部表来去映射
create external table if not exists hbase2hive(
    name string,
    sex string,
    age string)
    stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
    with serdeproperties(
  "hbase.columns.mapping"=":key,data:age,data:sex")
  tblproperties("hbase.table.name"="tb");


 hive (default)>select * from hbase2hive;

注意：
1、映射hbase的列时，要么就写:key，要么不写，否则列数不匹配，默认使用:key
2、hbase中表存在的时候，在hive中创建表时应该使用external关键字。
3. hive关联hbase当hive整合hbase时,hbase中的表是通过hive自动创建的，
当删除hive中的表,hbase中对应的表会自动删除；
当删除hbase中的表,hive中对应的表还存在,但是已经无法保存数据(出错)org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException: Failed 6 actions: Table 'hive2hbase1' was not found, got: hbase:namespace.: 6 times,
4、hbase关联hive如果删除了hbase中对应的表数据，那么hive中就不能查询出来数据,这时如果我们在hbase中创建一个与原来一样的表,hive仍然可以照常管理hbase.如果删除了hive中的表,对hbase中的表没有影响
5、hbase中的列和hive中的列个数和数据类型应该尽量相同，hive表和hbase表的字段不是按照名字匹配，而是按照顺序来匹配的。
6、hive、hbase和mysql等可以使用第三方工具来进行整合。

兰翎翡竹

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
2
评论
小白学hadoop日记day17——hbase与mr和hive的整合

hbase 和 mr整合hdfs 100G ---导入-> hbase中import com.al.util.HbaseUtil;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hbase.HColumnDescriptor;import org.apache.hadoop.hbase.HTableDescriptor;impo
复制链接

扫一扫