Hadoop-----Index索引案例

Index索引案例

案例需求

a.html
hello world
hello lucy
hello jack
hello liuyan

b.html
hello aaa
aaa bbb
bbb ccc
hello liuyan 
liuyan  tangyan

c.html
world hello 
liuyan tangyan
tangyan aaa
bbb	ccc


计算每个单词在每个文件中出现的次数 
aaa	b.html-2 c.html-1 
bbb	b.html-2 c.html-1 
ccc	b.html-1 
hello	a.html-4 b.html-2 c.html-1 
jack	a.html-1 
liuyan	b.html-2 c.html-1 
lucy	a.html-1 
tangyan	c.html-2 b.html-1 
world	a.html-1 

需求分析

在这里插入图片描述
在这里插入图片描述
代码实现

 package com.doit.demo07;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class Index01 {

    private static class Index01Mapper extends Mapper<LongWritable, Text,Text,LongWritable>{

        String FileName ;

        @Override
        protected void setup(Mapper<LongWritable, Text, Text, LongWritable>.Context context) throws IOException, InterruptedException {
            //获取文件名
            FileSplit in = (FileSplit) context.getInputSplit();
            FileName= in.getPath().getName();
        }

        Text k2 = new Text();
        LongWritable v2 = new LongWritable();

        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, LongWritable>.Context context) throws IOException, InterruptedException {

            String[] split = value.toString().split("\\s+");

            for (String s : split) {
                k2.set(s + "-" + FileName);
                v2.set(1);

                context.write(k2,v2 );
            }

        }
    }

    private static  class Index01Reduce extends Reducer<Text,LongWritable,Text,LongWritable>{

        LongWritable v3 = new LongWritable();

        @Override
        protected void reduce(Text key, Iterable<LongWritable> values, Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {
            //hello-a.html <1,1,1,1,>

            long sum = 0;
            for (LongWritable value : values) {
                sum+=value.get();
            }

            v3.set(sum);


            context.write(key,v3);

        }
    }

    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = new Configuration();

        //创建任务
        Job job = Job.getInstance(conf, "index01");
        //设置Mapper类
        job.setMapperClass(Index01.Index01Mapper.class);
        //设置Reduce类
        job.setReducerClass(Index01.Index01Reduce.class);
        //设置map的输出类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);
        //设置reduce的输出类型
        job.setOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);


        //设置输入文件位置
        FileInputFormat.setInputPaths(job,new Path("d:\\work\\abc\\input"));
        //设置输出文件位置
        FileOutputFormat.setOutputPath(job,new Path("d:\\work\\abc\\out_put3"));

        //将任务提交 并等待完成
        job.waitForCompletion(true);
    }
}
package com.doit.demo12;
	
	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.io.LongWritable;
	import org.apache.hadoop.io.NullWritable;
	import org.apache.hadoop.io.Text;
	import org.apache.hadoop.mapreduce.Job;
	import org.apache.hadoop.mapreduce.Mapper;
	import org.apache.hadoop.mapreduce.Reducer;
	import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
	import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
	
	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.Collections;
	import java.util.Comparator;
	
	public class Index02Demo {
	    private static class  Index02Mapper extends Mapper<LongWritable, Text,Text,Text>{
	
	        Text k2 = new Text();
	        Text v2 = new Text();
	        @Override
	        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {
	            String s = value.toString();
	            String[] split = s.split("-");
	
	            k2.set(split[0]);
	
	            v2.set(split[1].replaceAll("\\s+","-"));
	            context.write(k2,v2);
	        }
	    }
	
	    private static class Index02Reducer extends Reducer<Text,Text,Text, NullWritable>{
	
	        Text k3 = new Text();
	        @Override
	        protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, NullWritable>.Context context) throws IOException, InterruptedException {
	
	            ArrayList<String> list = new ArrayList<>();
	
	            for (Text value : values) {
	               list.add(value.toString());
	
	            }
	            Collections.sort(list, new Comparator<String>() {
	                @Override
	                public int compare(String s1, String s2) {
	                    return  s2.split("-")[1].compareTo(s1.split("-")[1]);
	                }
	            });
	
	
	            StringBuilder sb = new StringBuilder(key.toString());
	            for (String s : list) {
	                sb.append(" "+s);
	
	                k3.set(sb.toString());
	            }
	
	
	
	            context.write(k3,NullWritable.get());
	        }
	    }
	
	    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
	        Configuration conf = new Configuration();
	
	        Job job = Job.getInstance(conf, "index02");
	
	        //设置Mapper类
	        job.setMapperClass(Index02Mapper.class);
	        //设置Reduce类
	        job.setReducerClass(Index02Reducer.class);
	        //设置map的输出类型
	        job.setMapOutputKeyClass(Text.class);
	        job.setMapOutputValueClass(Text.class);
	        //设置reduce的输出类型
	        job.setOutputKeyClass(Text.class);
	        job.setOutputValueClass(NullWritable.class);
	
	
	
	        //设置输入文件位置
	        FileInputFormat.setInputPaths(job,new Path("d:\\work\\abc\\out_put2"));
	        //设置输出文件位置
	        FileOutputFormat.setOutputPath(job,new Path("d:\\work\\abc\\out_put4"));
	
	        //将任务提交 并等待完成
	        job.waitForCompletion(true);
	    }
	}
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值