HBase结合wordcount

最新推荐文章于 2021-02-25 10:25:33 发布

cihongmo6452

最新推荐文章于 2021-02-25 10:25:33 发布

阅读量177

点赞数

文章标签：大数据 java

原文链接：https://my.oschina.net/sniperLi/blog/350469

版权

package com.hbase;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class WordCount extends Configured implements Tool {
 public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
  public void map(LongWritable key, Text value, Context context) {
   String[] strs = value.toString().split(" ");
   for(String str:strs) {
    try {
     context.write(new Text(str), new IntWritable(1));
    } catch (IOException e) {
     e.printStackTrace();
    } catch (InterruptedException e) {
     e.printStackTrace();
    }
   }
  }
 }
 
 public static class Reduce extends Reducer<Text, IntWritable, NullWritable, Put> {
  public void reduce(Text key, Iterable<IntWritable> values, Context context) {
   int sum = 0;
   Iterator<IntWritable> it = values.iterator();
   while(it.hasNext()) {
    sum = sum + it.next().get();
   }
   try {
    //key作为rowkey
    Put put = new Put(Bytes.toBytes(key.toString()));
    //f1作为列族，c1作为列标识符，sum作为value
    put.add(Bytes.toBytes("f1"), Bytes.toBytes("c1"), Bytes.toBytes(sum));
    
    context.write(NullWritable.get(), put);
   } catch (IOException e) {
    e.printStackTrace();
   } catch (InterruptedException e) {
    e.printStackTrace();
   }
  }
 }
 
 public int run(String[] args) throws Exception {
  Configuration conf = this.getConf();
  String OUTPUT_TABLE = "hbase.mapred.outputtable";
  conf.set(OUTPUT_TABLE, "wordcount");
  
  //以下语句直接赋值会报错！！！在编译的时候就报错！！！
  //conf.set(TableOutputFormat.OUTPUT_TABLE, "wordcount");
  //在hbase创建表，列族为f1
  HbaseApi.create("wordcount", "f1");
  
  Job job = new Job(conf);
  job.setJarByClass(WordCount.class);
  job.setJobName(WordCount.class.getSimpleName());
  
  String path = "hdfs://grid131:9000/stu.txt";
  FileInputFormat.addInputPath(job, new Path(path));
  
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputFormatClass(TableOutputFormat.class);
  
  job.setMapperClass(Map.class);
  //job.setCombinerClass(Reduce.class);
  job.setReducerClass(Reduce.class);
  
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(IntWritable.class);
  
  job.waitForCompletion(true);
  
  return job.isSuccessful()?0:1;
 }
 
 public static void main(String[] args) throws Exception {  
  int exit = ToolRunner.run(new WordCount(), args);
  System.exit(exit);
 }
 
}

转载于:https://my.oschina.net/sniperLi/blog/350469

cihongmo6452

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
HBase结合wordcount

packagecom.hbase;importjava.io.IOException;importjava.util.Iterator;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.con...
复制链接

扫一扫