HBase数据模型及HBaseJavaAPI HBase导入数据

最新推荐文章于 2024-04-23 14:32:29 发布

问题一箩筐

最新推荐文章于 2024-04-23 14:32:29 发布

阅读量494

点赞数 1

本文链接：https://blog.csdn.net/liu918458630/article/details/108193887

版权

标题

HBaseJavaAPI
把member表备份到member_bak中
虚拟机中的操作
HBase导入数据

在这里插入图片描述

HBaseJavaAPI

地址
在这里插入图片描述
 HBase官方文档中文版

IDEA中开始写

把member表备份到member_bak中

package com.kgc.hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * @Date: 2020/8/24 9:48
 * @Description: 使用mapreduce完成HBase表数据的拷贝
 * 把member表备份到member_bak中
 * <p>
 * create 'member','info','address'
 * put 'member','tom','info:age','18'
 * put 'member','tom','info:address','BeiJing'

 * scan 'member'
 * ROW                            COLUMN+CELL
 * tom                           column=info:address, timestamp=1598234615578, value=BeiJing
 * tom                           column=info:age, timestamp=1598234602279, value=18
 * 1 row(s) in 0.0210 seconds
 * put 'member','jason','info:age','25'
 * <p>
 *     下面建个空的 把上表中的年龄放到这个表里
 * create 'member_bak','info','address'
 *使用命令打包
 *E:\大数据学习PPT\项目练习\itrip-redis这块练习\demo3\test-hdfs>mvn package -DskipTests
 *
 *  $HADOOP_CLASSPATH=`${hbase_home}/bin/hbase classpath`
 *  export $HADOOP_CLASSPATH=`${hbase_home}/bin/hbase classpath`
 *  上面两个都不对 下面这个终于对了
 *  export HADOOP_CLASSPATH=`${hbase_home}/bin/hbase classpath`
 *  com.kgc.hbase.HBaseCopyApp01
 *  hadoop jar ~/app/tmp/libs/test-hdfs-1.0-SNAPSHOT.jar com.kgc.hbase.HBaseCopyApp01 member member_bak
 *我们member_bak里本没有东西 我们复制
 * hbase(main):001:0> scan'member_bak'
 * ROW                                COLUMN+CELL
 *  jason                             column=info:age, timestamp=1598255370591, value=25
 *  tom                               column=info:age, timestamp=1598255346426, value=18
 * 2 row(s) in 0.3440 seconds
 */

public class HBaseCopyApp01 {
    public static class MyMapper extends TableMapper<Text, Put> {
        //rowkey：字符串类型
        Text mapOutPutKey = new Text();

        //alt+Ins  找方法中的map
        @Override
        protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
            //ImmutableBytesWritable:rowkey的数据类型
            //key就是hbase中的rowkey
            //hbase数据构成：rowkey-列族(列键：列修饰符)-值

            //设置输出的rowkey
            mapOutPutKey.set(Bytes.toString(key.get()));
            //创建Put，添加rowkey
            Put p = new Put(key.get());

            //只备份cf:info中的age
            //遍历result:ResultScanner
            for (Cell cell : value.listCells()) {
                //判断列族是否为info
                if ("info".equals(Bytes.toString(CellUtil.cloneFamily(cell)))) {
                    //判断列修饰符是否为age
                    if ("age".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) {
                        p.add(cell);
                    }
                }
            }
            context.write(mapOutPutKey, p);
        }
    }
        public static class MyTableReducer extends TableReducer<Text, Put, ImmutableBytesWritable> {

            @Override
            protected void reduce(Text key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
                for (Put put : values) {
                    context.write(null, put);
                }
            }
        }

        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            Configuration config = HBaseConfiguration.create();
            Job job = new Job(config, "HBaseCopyApp01");
            job.setJarByClass(HBaseCopyApp01.class);     // class that contains mapper and reducer

            Scan scan = new Scan();
            scan.setCaching(500);        // 1 is the default in Scan, which will be bad for MapReduce jobs
            scan.setCacheBlocks(false);  // don't set to true for MR jobs
// set other scan attrs

            TableMapReduceUtil.initTableMapperJob(
                    args[0],      // input table
                    scan,               // Scan instance to control CF and attribute selection
                    MyMapper.class,     // mapper class
                    Text.class,         // mapper output key
                    Put.class,  // mapper output value
                    job);
            TableMapReduceUtil.initTableReducerJob(
                    args[1],        // output table
                    MyTableReducer.class,    // reducer class
                    job);
            job.setNumReduceTasks(1);   // at least one, adjust as required

            boolean b = job.waitForCompletion(true);
            if (!b) {
                throw new IOException("error with job!");
            }
        }

}

虚拟机中的操作

上面是代码实现从一个表复制想要内容到另外一个表
因此需要两个表一个放数据一个空表接收复制来的数据

create 'member','info','address'
put 'member','tom','info:age','18'
put 'member','tom','info:address','BeiJing'
scan 'member'
put 'member','jason','info:age','25'
create 'member_bak','info','address'

代码写完打成jar包（采取命令打包：mvn package -DskipTests）
在这里插入图片描述

把到好的包上传到：/home/hadoop/app/tmp/libs
在这里插入图片描述
常见错误参考

# 报下面错误需要执行的命令 （hbase_home是你配置的环境变量 里面用的大(小)写这里就大(小)写）
#导出这个全局变量
export HADOOP_CLASSPATH=`${hbase_home}/bin/hbase classpath`

hadoop jar ~/app/tmp/libs/test-hdfs-1.0-SNAPSHOT.jar com.kgc.hbase.HBaseCopyApp01 member member_bak

在这里插入图片描述

HBase导入数据

在这里插入图片描述

package com.kgc.hbase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import java.io.IOException;

/**
 * @Author: 
 * @Date: 2020/8/24 15:29
 * @Description:
 */
public class HDFSToHBaseApp {
    public static class MyMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
        //rowkey
        ImmutableBytesWritable rowKey = new ImmutableBytesWritable();

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            //按逗号分割数据
            String[] splits = value.toString().split(",");
            //设置rowkey，取值我分割后的第一段内容
            Put put = new Put(Bytes.toBytes(splits[0]));
            /*
            * zhangsan,30,company1
                lisi,40,company2
                wangwu,50,company3
            */
            put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("age"),Bytes.toBytes(splits[1]));
            put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("company"),Bytes.toBytes(splits[2]));

            //
            rowKey.set(Bytes.toBytes(splits[0]));
            context.write(rowKey,put);

        }
    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration config = HBaseConfiguration.create();
        Job job = new Job(config,"HDFSToHBaseApp");
        job.setJarByClass(HBaseCopyApp01.class);     // class that contains mapper and reducer

        //map设置
        job.setMapperClass(MyMapper.class);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(Put.class);
        FileInputFormat.addInputPath(job,new Path(args[0]));

        TableMapReduceUtil.initTableReducerJob(
                args[1],        // output table
                null,    // reducer class
                job);
        job.setNumReduceTasks(1);   // at least one, adjust as required

        boolean b = job.waitForCompletion(true);
        if (!b) {
            throw new IOException("error with job!");
        }

    }
}

然后打包继续弄到libs下删除原来的

mvn package -DskipTests

 hadoop jar ~/app/tmp/libs/test-hdfs-1.0-SNAPSHOT.jar com.kgc.hbase.HDFSToHBaseApp /data/employee.txt employee

在这里插入图片描述

问题一箩筐

关注

1
点赞
踩
4

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫