MapReduce:Hdfs2HBase

MapReduce:Hdfs2HBase

导入Hbase的MapReduce的依赖

<!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-mapreduce -->
<dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase-mapreduce</artifactId>
    <version>${hbase.version}</version>
</dependency>

Map端

package day7hbase.Hdfs2Hbase;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * @date 2019/7/24
 * @author Fantome
 */
public class ReadMap extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
    /**
     * 从hdfs中读取数据,进行切分后存入到Put中,传入Reduce端
     * 因为是从hdfs中读取的文件,使用的是 Mapper 而不是 TableMapper
     * @param key
     * @param value
     * @param context
     * @throws IOException
     * @throws InterruptedException
     */
    @Override
    protected void map(LongWritable key,
                       Text value,
                       Context context) throws IOException, InterruptedException {
        // 返回的key
        ImmutableBytesWritable immutableBytesWritable = new ImmutableBytesWritable(rowkey.getBytes());
        
        // 数据格式:1	lisa	0	17
        String[] split = value.toString().split("\t");
        String rowkey=split[0];
        String name=split[1];
        String sex=split[2];
        String age=split[3];
        // 创建Put 返回的put
        Put put=new Put(rowkey.getBytes());
        put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(name));
        put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("sex"), Bytes.toBytes(sex));
        put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("age"), Bytes.toBytes(age));
		
        context.write(immutableBytesWritable, put);
    }
}

Reduce端

package day7hbase.Hdfs2Hbase;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;

import java.io.IOException;
/**
 * @date 2019/7/24
 * @author Fantome
 */
public class ReadReduce extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
    /**
     * 输出数据 不做操作
     * 数据输出到HBase里,使用的是 TableReducer
     * @param key
     * @param values
     * @param context
     * @throws IOException
     * @throws InterruptedException
     */
    @Override
    protected void reduce(ImmutableBytesWritable key,
                          Iterable<Put> values,
                          Context context) throws IOException, InterruptedException {
        for (Put p:values){
            context.write(NullWritable.get(), p);
        }
    }
}

drive端

package day7hbase.Hdfs2Hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

public class ReadDrive extends Configured implements Tool {
    /**
     * 因为需要对drive传入HBase的conf,所以使用了run的方法来运行drive
     * 在main中传入 hbase的conf
     * @param strings
     * @return
     * @throws Exception
     */
    @Override
    public int run(String[] args) throws Exception {
        //得到Configuration
        Configuration conf = this.getConf();

        //创建Job任务
        Job job = Job.getInstance(conf, this.getClass().getSimpleName());
        job.setJarByClass(ReadDrive.class);
        // 输入数据路径
        Path inPath = new Path(args[0]);
        FileInputFormat.addInputPath(job, inPath);

        //设置Mapper
        job.setMapperClass(ReadMap.class);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(Put.class);

        //设置Reducer
        job.setReducerClass(ReadReduce.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Put.class);
        // 指定需要导入的表名,
        TableMapReduceUtil.initTableReducerJob(
                "fruit_mr",
                ReadReduce.class,
                job);
        //设置Reduce数量,最少1个
        job.setNumReduceTasks(1);

        boolean isSuccess = job.waitForCompletion(true);
        if (!isSuccess) {
            throw new IOException("Job running with error");
        }

        return isSuccess ? 0 : 1;
    }

    public static void main(String[] args) throws Exception {
        // 创建Hbase的conf
        Configuration conf = HBaseConfiguration.create();
        // 使用 ToolRunner 运行 new ReadDrive()自己的这个Drive
        int result = ToolRunner.run(conf, new ReadDrive(), args);
        System.exit(result);
    }
}

运行命令:

yarn jar /opt/test/sparkStudy-1.0-SNAPSHOT.jar day7hbase.Hdfs2Hbase.ReadDrive /Hdfs2HbaseData.txt
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值