南修子学Hbase-HBase和MapperReducer 写入数据到hbase

本文链接：https://blog.csdn.net/huang_rx/article/details/115344774

这篇博客详细介绍了如何通过编写MapReduce程序，利用POM配置、Map类、Reduce类以及Driver类，将TXT文件内容批量写入到HBase数据库中。博主依次讲解了每个步骤，包括设置POM文件，定义Map和Reduce操作，上传文件，创建HBase表，运行MR任务，以及最后验证数据导入结果的过程。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

pom文件

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.nanxiuzi</groupId>
    <artifactId>hbase_demo</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
    </properties>
    <dependencies>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>2.1.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>2.1.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-mapreduce</artifactId>
            <version>2.1.0</version>
        </dependency>
    </dependencies>

</project>

Map类

package org.nanxiuzi.hbase_demo.mr.write;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;
/**
 * LongWritable 偏移量 long，表示该行在文件中的位置，而不是行号
 * Text map阶段的输入数据 一行文本信息 字符串类型 String
 * **/
public class studentMapper extends Mapper<LongWritable, Text,LongWritable,Text > {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        context.write(key,value);
    }
}

Reduce类

package org.nanxiuzi.hbase_demo.mr.write;


import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;

import java.io.IOException;


public class studentReduce extends TableReducer<LongWritable, Text, NullWritable> {
    //这段是外部传参的方式
   /* String myconfig1=null;
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        Configuration configuration = context.getConfiguration();
        myconfig1=configuration.get("myconfig1");
    }*/

    @Override
    protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        //1.遍历values ->   1001  lixiang   23  beijing
        //                  rowkey name age addr
        for(Text tt : values){
            //2.获取每一行数据
            String[] split = tt.toString().split("\t");
            //3.构建put对象
            Put put = new Put(Bytes.toBytes(split[0]));
            //4.构建column
            put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"),Bytes.toBytes(split[1]));
            put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("age"),Bytes.toBytes(split[2]));
            put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("addr"),Bytes.toBytes(split[3]));
            //5.写出
            context.write(NullWritable.get(),put);
        }
    }
}

Driver类

package org.nanxiuzi.hbase_demo.mr.write;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class studentDriver implements Tool {
    //定义一个Configuration
    private Configuration configuration = null;

    @Override
    public int run(String[] args) throws Exception {
        String outputPath = args[0];
        String tableName = args[1];
        //1.获取job对象
        Job job = Job.getInstance(configuration);
        //2.设置驱动类路径
        job.setJarByClass(studentDriver.class);
        //3.设置Mapper&Mapper输出的KV类型
        job.setMapperClass(studentMapper.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);
        //4.设置Reducer类
        TableMapReduceUtil.initTableReducerJob(
                tableName,
                studentReduce.class,
                job);
        //5.设置最终输出数据的KV类型
        //不需要了，已经定义表名了
        //6.设置输入参数
        FileInputFormat.setInputPaths(job, new Path(outputPath));
        //7.提交任务
        boolean b = job.waitForCompletion(true);
        return b ? 0 : 1;
    }

    @Override
    public void setConf(Configuration conf) {
        configuration = conf;

    }

    @Override
    public Configuration getConf() {
        return configuration;
    }

    public static void main(String[] args) {
        try {
            Configuration conf = new Configuration();
            int run = ToolRunner.run(conf, new studentDriver(), args);
            System.exit(run);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

Txt文件内容

1001	lixiang	23	beijing
1002	zhaoyun	23	shanghai
1003	zhangqinag	23	xinjiang
1004	liugui	23	henan
1005	huansdd	23	haerbin
1006	fengtian	23	nanning

上传文件

hdfs dfs -put hbase.txt /user/nanxiuzi

创建表

 create 'filestudent','info'

运行mr任务

yarn jar /home/hrx/hbase_demo-1.0-SNAPSHOT.jar org.nanxiuzi.hbase_demo.mr.write.studentDriver  /user/nanxiuzi/hbase.txt filestudent

查看结果

hbase(main):011:0> scan 'filestudent'
ROW                     COLUMN+CELL                                                        
 1001                   column=info:addr, timestamp=1617157683187, value=beijing           
 1001                   column=info:age, timestamp=1617157683187, value=23                 
 1001                   column=info:name, timestamp=1617157683187, value=lixiang           
 1002                   column=info:addr, timestamp=1617157687929, value=shanghai          
 1002                   column=info:age, timestamp=1617157687929, value=23                 
 1002                   column=info:name, timestamp=1617157687929, value=zhaoyun           
 1003                   column=info:addr, timestamp=1617157696338, value=xinjiang          
 1003                   column=info:age, timestamp=1617157696338, value=23                 
 1003                   column=info:name, timestamp=1617157696338, value=zhangqinag        
 1005                   column=info:addr, timestamp=1617157692253, value=haerbin           
 1005                   column=info:age, timestamp=1617157692253, value=23                 
 1005                   column=info:name, timestamp=1617157692253, value=huansdd           
4 row(s)
Took 0.4567 seconds