hbase bulkload 实例

本项目在windows环境下,访问远程linux实现

代码逻辑相对简单 就是一个wordcount所以没有写注释​

package demo8_hbasebulkload;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.client.HTable;

import org.apache.hadoop.hbase.client.Put;

import org.apache.hadoop.hbase.io.ImmutableBytesWritable;

import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat;

import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;

import org.apache.hadoop.hbase.util.Bytes;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;

public class GeneratePutHFileAndBulkLoadToHBase {

public static class WordCountMapper extends Mapper

   {

 

       private Text wordText=new Text();

       private IntWritable one=new IntWritable(1);

       @Override

       protected void map(LongWritable key, Text value, Context context)

               throws IOException, InterruptedException {

           // TODO Auto-generated method stub

           String line=value.toString();

           String[] wordArray=line.split(" ");

           for(String word:wordArray)

           {

               wordText.set(word);

               context.write(wordText, one);

           }

            

       }

   }

    

   public static class WordCountReducer extends Reducer

   {

 

       private IntWritable result=new IntWritable();

       protected void reduce(Text key, Iterable valueList,

               Context context)

               throws IOException, InterruptedException {

           // TODO Auto-generated method stub

           int sum=0;

           for(IntWritable value:valueList)

           {

               sum+=value.get();

           }

           result.set(sum);

           context.write(key, result);

       }    

   }

    

   public static class ConvertWordCountOutToHFileMapper extends Mapper

   {

       @Override

       protected void map(LongWritable key, Text value, Context context)

               throws IOException, InterruptedException {

           // TODO Auto-generated method stub

           String wordCountStr=value.toString();

           String[] wordCountArray=wordCountStr.split("\t");

           String word=wordCountArray[0];

           int count=Integer.valueOf(wordCountArray[1]);

            

           //创建HBase中的RowKey

           byte[] rowKey=Bytes.toBytes(word);

           ImmutableBytesWritable rowKeyWritable=new ImmutableBytesWritable(rowKey);

           byte[] family=Bytes.toBytes("cf");

           byte[] qualifier=Bytes.toBytes("count");

           byte[] hbaseValue=Bytes.toBytes(count);

           // Put 用于列簇下的多列提交,若只有一个列,则可以使用 KeyValue 格式

           // KeyValue keyValue = new KeyValue(rowKey, family, qualifier, hbaseValue);

           Put put=new Put(rowKey);

           put.add(family, qualifier, hbaseValue);

           context.write(rowKeyWritable, put);

            

       }

        

   }

    

   public static void main(String[] args) throws Exception {

       // TODO Auto-generated method stub

   args=new String[3];

   args[0]="/wc/bulkloadinput";

   args[1]="/wc/bulkloadoutput";

   args[2]="/wc/bulkloadhbaseout";

       Configuration hadoopConfiguration=new Configuration();

        //本地跑集群必须加下面三个参数,如果本地跑本地 不用加

       hadoopConfiguration.set("mapreduce.framework.name", "yarn");

       hadoopConfiguration.set("yarn.resourcemanager.hostname", "shizhan");

       hadoopConfiguration.set("fs.defaultFS", "hdfs://shizhan:9000/");

       String[] dfsArgs = new GenericOptionsParser(hadoopConfiguration, args).getRemainingArgs();

        

       //第一个Job就是普通MR,输出到指定的目录

       Job job=new Job(hadoopConfiguration, "wordCountJob");

       //本低跑集群必须打jar

       job.setJar("d:/hadoopSecond.jar");

       job.setJarByClass(GeneratePutHFileAndBulkLoadToHBase.class);

       job.setMapperClass(WordCountMapper.class);

       job.setReducerClass(WordCountReducer.class);

       job.setOutputKeyClass(Text.class);

       job.setOutputValueClass(IntWritable.class);

       FileInputFormat.setInputPaths(job, new Path(dfsArgs[0]));

       FileOutputFormat.setOutputPath(job, new Path(dfsArgs[1]));

       //提交第一个Job

       int wordCountJobResult=job.waitForCompletion(true)?0:1;

        

       //第二个Job以第一个Job的输出做为输入,只需要编写Mapper类,在Mapper类中对一个job的输出进行分析,并转换为HBase需要的KeyValue的方式。

       Job convertWordCountJobOutputToHFileJob=new Job(hadoopConfiguration, "wordCount_bulkload");

       convertWordCountJobOutputToHFileJob.setJar("d:/hadoopSecond.jar");

       convertWordCountJobOutputToHFileJob.setJarByClass(GeneratePutHFileAndBulkLoadToHBase.class);

       convertWordCountJobOutputToHFileJob.setMapperClass(ConvertWordCountOutToHFileMapper.class);

       //ReducerClass 无需指定,框架会自行根据 MapOutputValueClass 来决定是使用 KeyValueSortReducer 还是 PutSortReducer

       //convertWordCountJobOutputToHFileJob.setReducerClass(KeyValueSortReducer.class);

       convertWordCountJobOutputToHFileJob.setMapOutputKeyClass(ImmutableBytesWritable.class);

       convertWordCountJobOutputToHFileJob.setMapOutputValueClass(Put.class);

        

       //以第一个Job的输出做为第二个Job的输入

       FileInputFormat.addInputPath(convertWordCountJobOutputToHFileJob, new Path(dfsArgs[1]));

       FileOutputFormat.setOutputPath(convertWordCountJobOutputToHFileJob, new Path(dfsArgs[2]));

       //创建HBase的配置对象

       Configuration hbaseConfiguration=HBaseConfiguration.create();

       hbaseConfiguration = HBaseConfiguration.create();

       hbaseConfiguration.set("hbase.zookeeper.quorum", "mini1,mini2,mini3");

       hbaseConfiguration.set("hbase.zookeeper.property.clientPort", "2181");

       //创建目标表对象

       HTable wordCountTable =new HTable(hbaseConfiguration, "word_count");

       HFileOutputFormat.configureIncrementalLoad(convertWordCountJobOutputToHFileJob,wordCountTable);



       //提交第二个job

       int convertWordCountJobOutputToHFileJobResult=convertWordCountJobOutputToHFileJob.waitForCompletion(true)?0:1;

       //当第二个job结束之后,调用BulkLoad方式来将MR结果批量入库

       LoadIncrementalHFiles loader = new LoadIncrementalHFiles(hbaseConfiguration);

       //第一个参数为第二个Job的输出目录即保存HFile的目录,第二个参数为目标表

       loader.doBulkLoad(new Path(dfsArgs[2]), wordCountTable);

       //最后调用System.exit进行退出

       System.exit(convertWordCountJobOutputToHFileJobResult);

}

}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值