WordCount 到 Hdfs

import java.io.IOException;
 
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
 
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
 
public class WordCount {
     //计数变量
     private static final IntWritable ONE = new IntWritable( 1 );
     /**
      *
      * @author 汤高
      *  Mapper<longwritable, intwritable="">中  LongWritable,IntWritable是Hadoop数据类型表示长整型和整形
      *
      *  LongWritable, Text表示输入类型 (比如本应用单词计数输入是 偏移量(字符串中的第一个单词的其实位置),对应的单词(值))
      *  Text, IntWritable表示输出类型  输出是单词  和他的个数
      *  注意:map函数中前两个参数LongWritable key, Text value和输出类型不一致
      *      所以后面要设置输出类型 要使他们一致
      */
     //Map过程
     public static class WordCountMapper extends Mapper<longwritable, intwritable= "" > {
         /***
          *
          */
         @Override
         protected void map(LongWritable key, Text value, Mapper<longwritable, intwritable= "" >.Context context)
                 throws IOException, InterruptedException {
             //默认的map的value是每一行,我这里自定义的是以空格分割
             String[] vs = value.toString().split( "\\s" );
             for (String v : vs) {
                 //写出去
                 context.write( new Text(v), ONE);
             }
 
         }
     }
     //Reduce过程
     /***
      * @author 汤高
      * Text, IntWritable输入类型,从map过程获得 既map的输出作为Reduce的输入
      * Text, IntWritable输出类型
      */
     public static class WordCountReducer extends Reducer<text, intwritable= "" >{
         @Override
         protected void reduce(Text key, Iterable<intwritable> values,
                 Reducer<text, intwritable= "" >.Context context) throws IOException, InterruptedException {
             int count= 0 ;
             for (IntWritable v:values){
                 count+=v.get(); //单词个数加一
             }
 
             context.write(key, new IntWritable(count));
         }
 
     }
 
     public static void main(String[] args) {
 
         Configuration conf= new Configuration();
         try {
 
 
             //得到一个Job 并设置名字
             Job job=Job.getInstance(conf, "wordcount1" );
             //设置Jar 使本程序在Hadoop中运行
             job.setJarByClass(WordCount. class );
             //设置Map处理类
             job.setMapperClass(WordCountMapper. class );
             //设置map的输出类型,因为不一致,所以要设置
             job.setMapOutputKeyClass(Text. class );
             job.setMapOutputValueClass(IntWritable. class );
             //设置Reduce处理类
             job.setReducerClass(WordCountReducer. class );
             //设置输入和输出目录
             FileInputFormat.addInputPath(job, new Path( "hdfs://192.168.52.140:9000/in_2/" ));
             FileOutputFormat.setOutputPath(job, new Path( "hdfs://192.168.52.140:9000/myhbase" +System.currentTimeMillis()));
             //启动运行
             System.exit(job.waitForCompletion( true ) ? 0 : 1 );
         } catch (IOException e) {
             e.printStackTrace();
         } catch (ClassNotFoundException e) {
             e.printStackTrace();
         } catch (InterruptedException e) {
             e.printStackTrace();
         }
     }
 
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值