hbase使用MapReduce操作4（实现将 HDFS 中的数据写入到 HBase 表中）

最新推荐文章于 2021-02-26 00:07:31 发布

dci75702

最新推荐文章于 2021-02-26 00:07:31 发布

阅读量364

点赞数

文章标签：大数据数据库 java

原文链接：http://www.cnblogs.com/pursue339/p/10658127.html

版权

实现将 HDFS 中的数据写入到 HBase 表中

Runner类

 1 package com.yjsj.hbase_mr2;
 2 
 3 import com.yjsj.hbase_mr2.ReadFruitFromHDFSMapper;
 4 import com.yjsj.hbase_mr2.WriteFruitMRFromTxtReducer;
 5 import org.apache.hadoop.conf.Configuration;
 6 import org.apache.hadoop.conf.Configured;
 7 import org.apache.hadoop.fs.Path;
 8 import org.apache.hadoop.hbase.HBaseConfiguration;
 9 import org.apache.hadoop.hbase.client.Put;
10 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
11 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
12 import org.apache.hadoop.mapreduce.Job;
13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
14 import org.apache.hadoop.util.Tool;
15 import org.apache.hadoop.util.ToolRunner;
16 
17 import java.io.IOException;
18 
19 class Txt2FruitRunner extends Configured implements Tool {
20     public int run(String[] args) throws Exception {
21 //得到 Configuration
22         Configuration conf = this.getConf();
23 //创建 Job 任务
24         Job job = Job.getInstance(conf, this.getClass().getSimpleName());
25         job.setJarByClass(Txt2FruitRunner.class);
26         Path inPath = new Path("hdfs://master:9000/input_fruit/fruit.tsv");
27 
28         FileInputFormat.addInputPath(job, inPath);
29         //设置 Mapper
30         job.setMapperClass(ReadFruitFromHDFSMapper.class);
31         job.setMapOutputKeyClass(ImmutableBytesWritable.class);
32         job.setMapOutputValueClass(Put.class);
33         //设置 Reducer
34         TableMapReduceUtil.initTableReducerJob("fruit_hdfs", WriteFruitMRFromTxtReducer.class, job);
35         //设置 Reduce 数量，最少 1 个
36         job.setNumReduceTasks(1);
37         boolean isSuccess = job.waitForCompletion(true);
38         if (!isSuccess) {
39             throw new IOException("Job running with error");
40         }
41         return isSuccess ? 0 : 1;
42     }
43 
44     public static void main(String[] args) throws Exception {
45         Configuration conf = HBaseConfiguration.create();
46         conf = HBaseConfiguration.create();
47         conf.set("hbase.zookeeper.quorum", "master,node1,node2");
48         conf.set("hbase.zookeeper.property.clientPort", "2181");
49         conf.set("hbase.master", "master:60000");
50         int status = ToolRunner.run(conf, new Txt2FruitRunner(), args);
51         System.exit(status);
52     }
53 }

Mapper类

 1 package com.yjsj.hbase_mr2;
 2 
 3 import java.io.IOException;
 4 
 5 import org.apache.hadoop.hbase.client.Put;
 6 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 7 import org.apache.hadoop.hbase.util.Bytes;
 8 import org.apache.hadoop.io.LongWritable;
 9 import org.apache.hadoop.io.Text;
10 import org.apache.hadoop.mapreduce.Mapper;
11 
12 public class ReadFruitFromHDFSMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
13     @Override
14     protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
15         //从 HDFS 中读取的数据
16         String lineValue = value.toString();
17         //读取出来的每行数据使用\t 进行分割，存于 String 数组
18         String[] values = lineValue.split("\t");
19         //根据数据中值的含义取值
20         String rowKey = values[0];
21         String name = values[1];
22         String color = values[2];
23         //初始化 rowKey
24         ImmutableBytesWritable rowKeyWritable = new ImmutableBytesWritable(Bytes.toBytes(rowKey));
25         //初始化 put 对象
26         Put put = new Put(Bytes.toBytes(rowKey));
27         //参数分别:列族、列、值
28         put.add(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(name));
29         put.add(Bytes.toBytes("info"), Bytes.toBytes("color"), Bytes.toBytes(color));
30         context.write(rowKeyWritable, put);
31     }
32 }

Reduce类

package com.yjsj.hbase_mr2;

import java.io.IOException;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;

public class WriteFruitMRFromTxtReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
    @Override
    protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
        //读出来的每一行数据写入到 fruit_hdfs 表中
        for (Put put : values) {
            context.write(NullWritable.get(), put);
        }
    }
}