作业

最新推荐文章于 2023-10-28 15:31:23 发布

monster键盘

最新推荐文章于 2023-10-28 15:31:23 发布

阅读量91

点赞数

分类专栏：笔记

本文链接：https://blog.csdn.net/weixin_49471910/article/details/111942646

版权

笔记专栏收录该内容

3 篇文章 0 订阅

订阅专栏

这篇博客详细介绍了如何进行Hadoop上的词频统计作业。从准备阶段的Java环境和Hadoop组件安装，到编写Java代码并打包成jar，再到虚拟机中运行Hadoop作业，最后查看运行结果，每个步骤都有清晰的说明。

摘要由CSDN通过智能技术生成

Hadoop作业

内容:词频统计

一.准备

1.需用软件：有Java语言编译器(eclipse 或IDEA)
2.前提：虚拟机上已经安装完成Hadoop,且已安装完成mapreduce和hdfs这两个组件。
3.新建一个文本文档hbb.txt 在这里插入图片描述

二.制作一个包jar

1.打开eclipse，写入Java代码

package hdfs;
import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.map.InverseMapper;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class word {
    public static class TokenizerMapper extends
            Mapper<Object, Text, Text, IntWritable> {
        public void map(Object key, Text value, Context context)
                throws IOException, InterruptedException {
            System.out.println(key);
            Text keyOut;
            IntWritable valueOut = new IntWritable(1);
            StringTokenizer token = new StringTokenizer(value.toString());
            while (token.hasMoreTokens()) {
                keyOut = new Text(token.nextToken());
                context.write(keyOut, valueOut);
            }
        }
    }

    public static class IntSumReducer extends
            Reducer<Text, IntWritable, Text, IntWritable> {
        private IntWritable result = new IntWritable();

        public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            int sum = 0;
            for (IntWritable val : values) {
                sum += val.get();
            }
            result.set(sum);
            context.write(key, result);
        }
    }

    private static class IntWritableDecreaseingComparator extends
            IntWritable.Comparator {
        @Override
        public int compare(WritableComparable a, WritableComparable b) {
            return -super.compare(a, b);
        }

        @Override
        public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
            return -super.compare(b1, s1, l1, b2, s2, l2);
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        Path tempDir = new Path("hdfs://hadoop0:9000/output2/word1");
        try{
            Job job = new Job(conf, "word count ");
            job.setJarByClass(wordhigh.class);
            job.setMapperClass(TokenizerMapper.class);
            job.setCombinerClass(IntSumReducer.class);
            job.setReducerClass(IntSumReducer.class);
            job.setNumReduceTasks(2);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            job.setOutputFormatClass(SequenceFileOutputFormat.class);
            FileInputFormat.addInputPath(job, new Path("hdfs://hadoop/0:9000/input2"));
            FileOutputFormat.setOutputPath(job, tempDir);
            if (job.waitForCompletion(true)){
                Job sortJob =new Job(conf,"sort");
                sortJob.setJarByClass(wordhigh.class);
                FileInputFormat.addInputPath(sortJob,tempDir);
                sortJob.setInputFormatClass(SequenceFileInputFormat.class);
                sortJob.setMapperClass(InverseMapper.class);
                sortJob.setNumReduceTasks(1);
                FileOutputFormat.setOutputPath(sortJob,new Path("hdfs://hadoop0:9000/output2/word2"));
                sortJob.setOutputKeyClass(IntWritable.class);
                sortJob.setOutputValueClass(Text.class);
                sortJob.setOutputFormatClass(TextOutputFormat.class);
                sortJob.setSortComparatorClass(IntWritableDecreaseingComparator.class);
                if (sortJob.waitForCompletion(true)){
                    System.out.println("ok");
                }
            }
        }
        catch (Exception ex){
            ex.printStackTrace();
        }
    }
}

将Java代码在eclipse中运行，运行之后导出包(word.jar),在运行和导出包时需要一些其他的包在这里插入图片描述