hadoop wordcount求共同好友代码实现

package com.Practice.SameFriend;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.Arrays;

/**
 * 求共同好友合并版
 * 主要思路: 第一步:求出每一个好友所对应的所有用户
 *           第二步:将第一步中所有用户进行排序,两两组合,最后求出两用户间的共同好友
 */
public class SameFriendMerge1 {

    public static void main(String[] args) throws IOException, InterruptedException {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);

        //第一个job信息
        Job job = Job.getInstance(conf);
        job.setJar("wordcountJar/wordcount.jar");

        job.setMapperClass(SFMerge1Mapper1.class);
        job.setReducerClass(SFMerge1Reducer1.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        Path inputPath = new Path("input/sameFriend");
        Path outputPath = new Path("output/sameFriend");

        if(fs.isDirectory(outputPath)){
            fs.delete(outputPath,true);
        }

        FileInputFormat.setInputPaths(job,inputPath);
        FileOutputFormat.setOutputPath(job,outputPath);

        //第二个job信息
       Job job1 = Job.getInstance(conf);
       job1.setJar("wordcountJar/wordcount.jar");

       job1.setMapperClass(SFMerge1Mapper2.class);
       job1.setReducerClass(SFMerge1Reducer2.class);

       job1.setOutputKeyClass(Text.class);
       job1.setOutputValueClass(Text.class);

       Path inputPath1 = new Path("output/sameFriend");
       Path outputPath1 = new Path("output/sameFriend1");

       if(fs.isDirectory(outputPath1)){
           fs.delete(outputPath1,true);
       }

       FileInputFormat.setInputPaths(job1,inputPath1);
       FileOutputFormat.setOutputPath(job1,outputPath1);

        ControlledJob ctlJob1 = new ControlledJob(job.getConfiguration());
        ControlledJob ctlJob2 = new ControlledJob(job.getConfiguration());

        ctlJob1.setJob(job);
        ctlJob2.setJob(job1);

        ctlJob2.addDependingJob(ctlJob1);

        JobControl jobControl = new JobControl("SameFriends");
        jobControl.addJob(ctlJob1);
        jobControl.addJob(ctlJob2);

        Thread jobThread = new Thread(jobControl);
        jobThread.start();

        // 每隔一段时间来判断一下该jc线程的任务是否执行完成
        while (!jobControl.allFinished()){
            Thread.sleep(500);
        }

        jobControl.stop();





    }

    public static class SFMerge1Mapper1 extends Mapper<LongWritable,Text,Text,Text>{
        private Text outKey = new Text();
        private Text outValue = new Text();
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] split = value.toString().split(":");
            outValue.set(split[0]);
            String[] friends = split[1].split(",");
            for (String str :
                    friends) {
                outKey.set(str);
                context.write(outKey,outValue);
            }
        }
    }

    /**
     * 第一次reducer输出结果:
         A  F,I,O,K,G,D,C,H,B
         B  E,J,F,A
         C  B,E,K,A,H,G,F
         D  H,C,G,F,E,A,K,L
         E  A,B,L,G,M,F,D,H
         F  C,M,L,A,D,G
         G  M
         H  O
         I  O,C
         J  O
         K  O,B
         L  D,E
         M  E,F
         O  A,H,I,J,F
     */
    public static class SFMerge1Reducer1 extends Reducer<Text,Text,Text,Text>{
        private Text outValue = new Text();
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            StringBuilder sb = new StringBuilder();
            for (Text str :
                    values) {
                if( sb.length()!= 0 ){
                    sb.append(",");
                }
                sb.append(str);
            }
            outValue.set(sb.toString());
            context.write(key,outValue);
        }
    }

    public static class SFMerge1Mapper2 extends Mapper<LongWritable,Text,Text,Text>{
        private Text outValue = new Text();
        private Text outKey = new Text();
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] splits = value.toString().split("\t");
            String[] strings = splits[1].split(",");
            outValue.set(splits[0]);
            Arrays.sort(strings);
            for (int i = 0; i < strings.length - 1; i++) {
                for (int j = i+1; j < strings.length; j++) {
                    outKey.set(strings[i]+"-"+strings[j]);
                    context.write(outKey,outValue);
                }
            }
        }
    }

    /**
     * 第二次reducer输出结果:
         A-B    E,C
         A-C    D,F
         A-D    E,F
         A-E    B,C,D
         A-F    C,E,O,D,B
         A-G    E,F,C,D
         A-H    C,D,E,O
         A-I    O
         A-J    O,B
         A-K    C,D
         A-L    F,D,E
         A-M    F,E
         B-C    A
         B-D    A,E
         B-E    C
         B-F    C,A,E
         B-G    E,C,A
         B-H    E,C,A
         B-I    A
         B-K    A,C
         B-L    E
         B-M    E
         B-O    A,K
         C-D    A,F
         C-E    D
         C-F    A,D
         C-G    A,D,F
         C-H    D,A
         C-I    A
         C-K    A,D
         C-L    D,F
         C-M    F
         C-O    I,A
         D-E    L
         D-F    A,E
         D-G    E,A,F
         D-H    A,E
         D-I    A
         D-K    A
         D-L    E,F
         D-M    F,E
         D-O    A
         E-F    D,M,C,B
         E-G    C,D
         E-H    C,D
         E-J    B
         E-K    C,D
         E-L    D
         F-G    D,C,A,E
         F-H    A,D,O,E,C
         F-I    O,A
         F-J    B,O
         F-K    D,C,A
         F-L    E,D
         F-M    E
         F-O    A
         G-H    D,C,E,A
         G-I    A
         G-K    D,A,C
         G-L    D,F,E
         G-M    E,F
         G-O    A
         H-I    O,A
         H-J    O
         H-K    A,C,D
         H-L    D,E
         H-M    E
         H-O    A
         I-J    O
         I-K    A
         I-O    A
         K-L    D
         K-O    A
         L-M    E,F

     */
    public static class SFMerge1Reducer2 extends Reducer<Text,Text,Text,Text>{
        private Text outValue = new Text();
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            StringBuilder sb = new StringBuilder();
            for (Text str :
                    values) {
                if(sb.length()!= 0){
                    sb.append(",");
                }
                sb.append(str);
                outValue.set(sb.toString());
            }
            context.write(key,outValue);
        }
    }
}
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
### 回答1: Hadoop WordCount代码如下: ``` import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class WordCount { public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{ private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(Object key, Text value, Context context ) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } } } public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> { private IntWritable result = new IntWritable(); public void reduce(Text key, Iterable<IntWritable> values, Context context ) throws IOException, InterruptedException { int sum = ; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? : 1); } } ``` 这是一个标准的Hadoop WordCount代码,包括Mapper和Reducer两个类。Mapper类用于将输入的文本分割成单词,并将每个单词映射为一个键值对,其中键为单词,值为1。Reducer类用于将相同的单词进行合并,并计算每个单词出现的次数。最终的结果将以单词为键,出现次数为值输出到输出文件中。 ### 回答2: Hadoop是一个开源的分布式系统框架,可以处理大规模的数据集。在Hadoop中,WordCount是一个非常经典的示例,该示例可以帮助初学者了解基本的Hadoop MapReduce编程。下面我来为大家介绍一下Hadoop WordCount代码实现过程。 1. 环境准备 首先,我们需要安装Hadoop并配置其环境。可以参考Hadoop官方文档进行安装和配置。安装后,我们需要在本地创建文本文件,作为WordCount程序的输入数据源。 2. 编写Map函数 Map函数是Hadoop WordCount程序的核心,它负责将输入数据切分并对每个切分后的单词进行计数。下面是常见的Map函数实现: ```java public static class Map extends Mapper<Object, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(Object key, Text value, Context context) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } } } ``` 在Map函数中,我们首先定义了一个IntWritable类型的变量one,用于记录单词出现的次数。接着,我们通过StringTokenizer对value进行分词,并遍历每个单词,对单词进行计数,并输出。 3. 编写Reduce函数 Reduce函数是Hadoop WordCount程序的另一个核心,它负责对Map输出的单词进行合并,并输出单词的总数。下面是常见的Reduce函数实现: ```java public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> { private IntWritable result = new IntWritable(); public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } } ``` 在Reduce函数中,我们遍历key对应的所有值,并将它们加起来。最终,我们将计数结果封装在IntWritable类型的变量result中,并输出。 4. 配置Job 在配置Job时,我们需要指定输入和输出路径,并设置Mapper和Reduce函数等信息。下面是一个常见的Job配置实现: ```java Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); ``` 在Job配置中,我们需要设置Job名称、Jar包、Map和Reduce函数、输入输出类型等信息。其中,我们指定了Map和Reduce函数,开启了Combiner,这个类在Map任务之后的节点先局部聚合。最后,我们通过FileInputFormat和FileOutputFormat指定输入和输出文件路径。 5. 运行程序 在完成以上步骤后,我们可以执行WordCount程序。在执行前,我们需要将程序打包成Jar包。执行命令如下: ```sh hadoop jar WordCount.jar WordCount /path/to/input /path/to/output ``` 在执行命令后,Hadoop会自动将输入文件切分成多块,并分配给不同的节点进行处理。每个节点运行Map函数,生成中间结果。然后,这些中间结果会被发送到Reduce节点,在Reduce节点上进行合并,得到最终结果。最后,Hadoop将输出结果保存在指定路径下的文件中。 以上就是Hadoop WordCount代码的基本实现过程。通过以上代码实现,我们可以更好的了解MapReduce编程的基本过程。如果有兴趣进行更深入的探索,可以尝试打印日志、调用第三方库等。 ### 回答3: Hadoop是一个高性能、可伸缩、分布式计算框架,非常适合处理大规模的数据集。WordcountHadoop中最简单的例子,它演示了如何使用Hadoop来扫描文件中的单词,并计算每个单词在文件中出现的次数。下面是一个简单的Hadoop Wordcount代码: Mapper类: ``` public class WordcountMapper extends Mapper<LongWritable, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); @Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); context.write(word, one); } } } ``` Mapper类继承自Hadoop中的Mapper类,这个类接收输入数据,将输入数据转化为单词,然后以单词为关键字,发射1作为值,传递给Reducer。 Reducer类: ``` public class WordcountReducer extends Reducer<Text, IntWritable, Text, IntWritable> { private IntWritable result = new IntWritable(); @Override public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } } ``` Reducer类继承自Hadoop中的Reducer类,这个类接收Mapper类发射出来的每个单词以及1,然后将相同单词的所有1加起来,作为该单词在文件中出现的次数,以单词为关键字,该单词出现的次数为值,传递给输出文件。 Driver类: ``` public class WordcountDriver { public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordcountDriver.class); job.setMapperClass(WordcountMapper.class); job.setCombinerClass(WordcountReducer.class); job.setReducerClass(WordcountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } } ``` 以上是Hadoop Wordcount代码的简介,它演示了如何使用Java编写Hadoop MapReduce作业,并将其运行在Hadoop分布式计算框架中。实现这个作业的过程中,您需要了解Hadoop的基本概念和原理,包括Mapper和Reducer类的使用、Hadoop作业的驱动程序和计算框架的作用。同时还需了解Java编程语言和编写Hadoop作业的基本结构。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值