MapReduce将HDFS文本数据导入到HBase中

该博客介绍了如何在Java程序中使用Hadoop和HBase进行大数据处理。通过添加Hadoop和HBase的依赖,设置配置,实现了从HDFS读取文件并映射到HBase表的过程。MapReduce任务将文本数据映射为键值对,并在Reducer阶段将结果写入HBase表中,实现数据的存储。
摘要由CSDN通过智能技术生成
//pom文件
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.7.3</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-client -->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.3.1</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-server -->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.3.1</version>
        </dependency>

//java代码
package hbaseuploaddata;
import java.io.IOException;

import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.Reducer;

public class WeatherData {
    public static void main(String[] args) throws Exception {
        Configuration configuration = HBaseConfiguration.create();
        configuration.set("hbase.zookeeper.quorum", "hadoop102,hadoop103,hadoop104");
        Job job = Job.getInstance(configuration,"Hbase");
        job.setJarByClass(WeatherData.class);

        job.setMapperClass(WordCountToBaseMap.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        //这个是实现在Hbase中建好的表,本来想着写个程序让自动创建表的,我就歇着,就不写,还不如去写一条命令直接创建呢,干嘛费那个劲; create 'WC:wc','i'
        TableMapReduceUtil.initTableReducerJob("weather_data:data", WordCountToBaseReduce.class, job);
        //这是上传到hdfs上的文档
        FileInputFormat.addInputPath(job, new Path("hdfs://hadoop102:8020/haha.txt"));
        boolean completion = job.waitForCompletion(true);
        System.out.println(completion);
    }

    //定义reducer对接输出到hbase
    //reduce的输入类型KEYIN, VALUEIN
    //reduce输出的key的类型KEYOUT,写入hbase中reduce的输出key并不重要,重要的是value,value的数据会被写入hbase表,key的数据不重要
    //只需要保证reduce的输出value是put类型就可以了
    //create 'bd20:wc','i'
    public static class WordCountToBaseMap extends Mapper<LongWritable, Text, Text, IntWritable> {
        Text k = new Text();
        IntWritable v = new IntWritable(1);

        @Override
        protected void map(LongWritable key, Text value, Context context)	throws IOException, InterruptedException {

            // 1 获取一行
            String line = value.toString();

            // 2 切割
            String[] words = line.split(" ");

            // 3 输出
            for (String word : words) {

                k.set(word);
                context.write(k, v);
            }
        }

    }

    public static class WordCountToBaseReduce extends TableReducer<Text, IntWritable, NullWritable> {
        public static NullWritable OUT_PUT_KEY = NullWritable.get();
        public Put outputValue;
        public int sum;

        @Override
        protected void reduce(Text key, Iterable<IntWritable> value,
                              Reducer<Text, IntWritable, NullWritable, Mutation>.Context context)
                throws IOException, InterruptedException {
            sum = 0;
            for (IntWritable intWritable : value) {
                sum += intWritable.get();
            }
            outputValue = new Put(Bytes.toBytes(key.toString()));
            outputValue.addColumn(Bytes.toBytes("info"), Bytes.toBytes("count"), Bytes.toBytes(String.valueOf(sum)));
            context.write(OUT_PUT_KEY, outputValue);
        }
    }

}
  • 文件
    在这里插入图片描述
  • hbase结果
    在这里插入图片描述
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值