利用MapReduce将文件内容写入Hbase表

最新推荐文章于 2021-03-24 09:00:05 发布

修水管的

最新推荐文章于 2021-03-24 09:00:05 发布

阅读量4.3k

点赞数 3

分类专栏： Hbase 文章标签： Hbase mapreduce

本文链接：https://blog.csdn.net/xiushuiguande/article/details/79502326

版权

Hbase 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

HBase – Hadoop Database，是一个高可靠性、高性能、面向列、可伸缩的分布式存储系统，利用HBase技术可在廉价PC Server上搭建起大规模结构化存储集群。
HBase是Google Bigtable的开源实现，类似Google Bigtable利用GFS作为其文件存储系统，HBase利用Hadoop HDFS作为其文件存储系统；Google运行MapReduce来处理Bigtable中的海量数据，HBase同样利用Hadoop MapReduce来处理HBase中的海量数据；Google Bigtable利用 Chubby作为协同服务，HBase利用Zookeeper作为对应。

今天闲来无聊，随便写一个简单的mapreduce将一个数据文件写入Hbase表中。
首先创建maven工程什么的不多BB了，记住要引入Hbase的配置文件

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
    <property>
        <name>hbase.zookeeper.quorum</name>
        <value>master,slave1,slave2</value>
        <description>The directory shared by RegionServers.</description>
    </property>

</configuration>

如果不引入hbase-site.xml 程序在运行时，连接不到Hbase，那还谈什么写入，xi不xi，哈哈。
什么什么 pom文件的依赖还要说吗？？？？？还是说吧，万一你忘了怎么办呢？？

    <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.7.3</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-client -->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.3.1</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-server -->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.3.1</version>
        </dependency>

版本自己看去，我怎么知道你的版本是否和我一样。略略略

//计算wordcount，把结果写入hbase
public class WordCountHbase {

    public static void main(String[] args) throws Exception {
        Configuration configuration = HBaseConfiguration.create();
        Job job = Job.getInstance(configuration);
        job.setJarByClass(WordCountHbase.class);

        job.setMapperClass(WordCountToBaseMap.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        //这个是实现在Hbase中建好的表，本来想着写个程序让自动创建表的，我就歇着，就不写，还不如去写一条命令直接创建呢，干嘛费那个劲； create 'WC:wc','i'
        TableMapReduceUtil.initTableReducerJob("WC:wc", WordCountToBaseReduce.class, job);
        //这是上传到hdfs上的文档
        FileInputFormat.addInputPath(job, new Path("hdfs://master:9000/user-logs-large.txt"));
        boolean completion = job.waitForCompletion(true);
        System.out.println(completion);
    }

    //定义reducer对接输出到hbase
    //reduce的输入类型KEYIN, VALUEIN
    //reduce输出的key的类型KEYOUT，写入hbase中reduce的输出key并不重要，重要的是value，value的数据会被写入hbase表，key的数据不重要
    //只需要保证reduce的输出value是put类型就可以了
    //create 'bd20:wc','i'
    public static class WordCountToBaseMap extends Mapper<Object, Text, Text, IntWritable> {
        public static IntWritable ONE = new IntWritable(1);
        public Text outputKey = new Text();
        public String[] info;

        @Override
        protected void map(Object key, Text value, Mapper<Object, Text, Text, IntWritable>.Context context)
                throws IOException, InterruptedException {
            String[] readline = value.toString().split("\\s+");
            for (String word : readline) {
                if (!word.equals("")) {
                    outputKey.set(word);
                    context.write(outputKey, ONE);
                }
            }
        }

    }

    public static class WordCountToBaseReduce extends TableReducer<Text, IntWritable, NullWritable> {
        public static NullWritable OUT_PUT_KEY = NullWritable.get();
        public Put outputValue;
        public int sum;

        @Override
        protected void reduce(Text key, Iterable<IntWritable> value,
                Reducer<Text, IntWritable, NullWritable, Mutation>.Context context)
                throws IOException, InterruptedException {
            sum = 0;
            for (IntWritable intWritable : value) {
                sum += intWritable.get();
            }
            outputValue = new Put(Bytes.toBytes(key.toString()));
            outputValue.addColumn(Bytes.toBytes("i"), Bytes.toBytes("count"), Bytes.toBytes(String.valueOf(sum)));
            context.write(OUT_PUT_KEY, outputValue);
        }
    }

}