MapReduce实战练习三:倒排索引

需求:

1、再不同的文件中 有着各种单词,每行单词之间以空格间隔

2、统计所有文件,以每行为   

单词(空格)文件1名-->单词出现的次数(空格)文件2名-->单词出现的次数(空格)文件3名-->单词出现的次数        的格式产生输出

思路:

1、先以 单词--文件名(空格)单词出现次数 的格式输出。

2、然后再将相同的单词进行整合,按要求格式输出。

(本地)

测试数据:

a.txt:

tom jerry bpf
good nice bpf

b.txt:

hello calvin
nice job bro

c.txt:

bpf you are right
so cool calvin


第一步代码:

package com.bpf.mr.inverindex;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class InverIndexStepone {

    
    static class InverIndexSteponeMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
        
        Text k = new Text();
        IntWritable v = new IntWritable(1);
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();
            String[] split = line.split(" ");
            FileSplit inputSplit = (FileSplit) context.getInputSplit();
            String name = inputSplit.getPath().getName();
            for (String word : split) {
                k.set(word + "--" + name);
                context.write(k, v);
            }
        }
               
    }
    
    static class InverIndexSteponeReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            int count = 0;
            for (IntWritable value : values) {
                count += value.get();
            }
            
            context.write(key, new IntWritable(count));
        }
    }
    
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        
        job.setJarByClass(InverIndexStepone.class);
        
        job.setMapperClass(InverIndexSteponeMapper.class);
        job.setReducerClass(InverIndexSteponeReducer.class);
        
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        
        FileInputFormat.setInputPaths(job, new Path("D:\\测试数据\\输入"));
        FileOutputFormat.setOutputPath(job, new Path("D:\\测试数据\\输出"));
                
        boolean res = job.waitForCompletion(true);
        System.exit(res?0:1);
    }
}
输出结果:




第二步代码:

package com.bpf.mr.inverindex;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class InverIndexSteptwo {

    
    static class InverIndexSteptwoMapper extends Mapper<LongWritable, Text, Text, Text>{
        
        Text k = new Text();
        Text v = new Text();
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();
            String[] split1 = line.split("--");
            String[] split2 = split1[1].split("\t");
            k.set(split1[0]);
            v.set(split2[0] + "-->" + split2[1] + " ");
            context.write(k, v);
            
        }
    }
    
    static class InverIndexSteptwoReducer extends Reducer<Text, Text, Text, Text>{
        Text v = new Text();
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            String v1 = "";
            for (Text value : values) {
                v1 += value.toString();
            }
            
            v.set(v1);
            context.write(key, v);
        }
    }
    
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        
        job.setJarByClass(InverIndexSteptwo.class);
        
        job.setMapperClass(InverIndexSteptwoMapper.class);
        job.setReducerClass(InverIndexSteptwoReducer.class);
        
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        
        FileInputFormat.setInputPaths(job, new Path("D:\\测试数据\\输出"));
        FileOutputFormat.setOutputPath(job, new Path("D:\\测试数据\\再输出"));
                
        boolean res = job.waitForCompletion(true);
        System.exit(res?0:1);
    }
}
输出结果:





  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值