Hadoop经典案例(一)WordCount的简易实现

1:添加依赖包

   <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-common</artifactId>
        <version>2.6.5</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-hdfs</artifactId>
        <version>2.6.5</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-client</artifactId>
        <version>2.6.5</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-mapreduce-client-core</artifactId>
        <version>2.6.5</version>
    </dependency>

2:添加日志包 log4j.properties

log4j.rootLogger=info,stdout,logFile
#\u63A7\u5236\u53F0\u8F93\u51FA
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target=System.out
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=[%d{ABSOLUTE}] %5p %c{1}:%L - %m%n
#\u7CFB\u7EDF\u65E5\u5FD7\u8F93\u51FA
log4j.appender.logFile=org.apache.log4j.DailyRollingFileAppender
log4j.appender.logFile.File=logs/mad_ccg.log
log4j.appender.logFile.DatePattern='.'yyyy-MM-dd
log4j.appender.logFile.layout=org.apache.log4j.PatternLayout
log4j.appender.logFile.layout.ConversionPattern=[%d{ABSOLUTE}] %5p %c{1}:%L - %m%n
#\u8BBE\u7F6E\u672C\u5DE5\u7A0B\u7C7B\u7EA7\u522B
log4j.logger.com.ctc.email=DEBUG

3: 创建Mapper

public class WordCountMapper extends Mapper<LongWritable, Text,Text, IntWritable> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] letters = value.toString().split("  ");
        for (String letter : letters) {
            context.write(new Text(letter),new IntWritable(1));
        }
    }
}

4:创建Reduce

public class WordCountReduce extends Reducer<Text, IntWritable,Text,IntWritable> {
    @Override
    protected void reduce(Text letter, Iterable<IntWritable> showCounts, Context context) throws IOException, InterruptedException {
        int totalShowCount = 0;
        Iterator<IntWritable> iterator = showCounts.iterator();
        while (iterator.hasNext()) {
            totalShowCount+=iterator.next().get();
        }
        context.write(letter,new IntWritable(totalShowCount));
    }
}

5:创建驱动类

public class WordCountDriver2 {
    public static void main(String[] args) throws Exception {
        //如果没有配置就是使用默认的配置
        Configuration conf = new Configuration();
        Job mrJob = Job.getInstance();
        //让系统知道要使用哪一个驱动类
        mrJob.setJarByClass(WordCountDriver2.class);
        mrJob.setMapperClass(WordCountMapper.class);
        mrJob.setReducerClass(WordCountReduce.class);
        //如果是map输出的键值对和reduce输出的键值对一样,可以忽略map的输出类型
        mrJob.setOutputKeyClass(Text.class);
        mrJob.setOutputValueClass(IntWritable.class);
        //如果是默认的输出输入实现,那么Textin/OutFormat可以忽略不写
        FileInpuFormat.setInputPaths(mrJob, new Path("F:\\wordcount\\input"));
        Path outPath = new Path("F:\\wordcount\\output");
        FileOutputFormat.setOutputPath(mrJob, outPath);
        FileSystem fs = FileSystem.get(conf);
        if (fs.exists(outPath)) {
            fs.delete(outPath);
        }
        boolean ifsuccess = mrJob.waitForCompletion(true);
        System.out.println(ifsuccess);
    }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值