索引案例/共同好友案例

3.3.3.索引案例

有大量的文本数据 , 查找出每个单词在每个文件中出现的次数
3.3.3.1.INDEX1
/**

  • 查看单词在各个文档中出现的次数
  • @author DOIT_HANG_GE
  • @version 2019年2月26日
    */
    public class Index1 {
    public static class Index1Mapper extends Mapper<LongWritable, Text, Text, IntWritable>{
    String fileName = null ;
    Text k = new Text() ;
    IntWritable v = new IntWritable() ;
    @Override
    protected void setup(Mapper<LongWritable, Text, Text, IntWritable>.Context context)
    throws IOException, InterruptedException {
    FileSplit fs =(FileSplit) context.getInputSplit();
    fileName = fs.getPath().getName();
    }
    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
    throws IOException, InterruptedException {
    String[] words = value.toString().split(" “);
    for (String word : words) {
    k.set(word+”-"+fileName);
    v.set(1);
    context.write(k, v);
    }
    }
    }
    public static class Index1Reducer extends Reducer<Text, IntWritable, Text, IntWritable>{
    @Override
    protected void reduce(Text key, Iterable iters, Reducer<Text, IntWritable, Text, IntWritable>.Context context)
    throws IOException, InterruptedException {
    int count = 0 ;
    for (IntWritable is : iters) {
    count++ ;
    }
    context.write(key, new IntWritable(count));
    }
    }
    public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    job.setMapperClass(Index1Mapper.class);
    job.setReducerClass(Index1Reducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.setInputPaths(job, new Path(“d:/data/index/input”));
    FileOutputFormat.setOutputPath(job, new Path(“d:/data/index/output1”));
    job.setNumReduceTasks(1);
    boolean res = job.waitForCompletion(true);
    System.exit(res ? 0 : -1);
    }
    }

3.3.3.2.INDEX2
public class Index2 {
public static class Index2Mapper extends Mapper<LongWritable, Text, Text, Text> {
// word+"-"+fileName n
Text k = new Text();
Text v = new Text();
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String line = value.toString();
String[] split = line.split("-");
String[] split2 = split[1].split("\t");
k.set(split[0]);// word
v.set(split2[0] + “-” + split2[1]); // 文件名-n
context.write(k, v);
}
}
public static class Index2Reducer extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable iters, Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
StringBuilder sb = new StringBuilder();
//filename-count
for (Text text : iters) {
sb.append(text.toString() + " ");
}
context.write(key, new Text(sb.toString()));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setMapperClass(Index2Mapper.class);
job.setReducerClass(Index2Reducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, new Path(“d:/data/index/output1”));
FileOutputFormat.setOutputPath(job, new Path(“d:/data/index/output2”));
job.setNumReduceTasks(1);
boolean res = job.waitForCompletion(true);
System.exit(res ? 0 : -1);
}
}

3.3.4.共同好友案例
共同好友案例,使用两个mapreduce程序来完成需求
STEP ONE
**

  • 获取任意两个人的共同好友数据 A:B,C,G,F,D

  • @author DOIT_HANG_GE

  • @version 2019年3月6日
    */
    public class SameFriend {

    public static class SameFriendMapper1 extends Mapper<LongWritable, Text, Text, Text> {
    Text k = new Text() ;
    Text v = new Text() ;
    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
    throws IOException, InterruptedException {
    String line = value.toString();

     	String[] split = line.split(":");
     	v.set(split[0]);
     	String[] fs = split[1].split(",");
     	for (String f : fs) {
     		k.set(f);
     		context.write(k, v);
     	}
     }
    

    }
    // key 是朋友 value是人
    public static class SameFriendReducer1 extends Reducer<Text, Text, Text, Text> {

     @Override
     protected void reduce(Text key, Iterable<Text> iter, Reducer<Text, Text, Text, Text>.Context context)
     		throws IOException, InterruptedException {
     	ArrayList<String> list = new ArrayList<>() ;
     	for (Text text : iter) {
     		list.add(text.toString()) ;
     	}
     	Collections.sort(list);
     	for (int i = 0; i < list.size()-1; i++) {
     		for (int j = i+1; j < list.size(); j++) {
     			context.write(new Text(list.get(i)+"和"+list.get(j)+"的好友是:"), key);
     		}
     	}
     }
    

    }

    public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    job.setMapperClass(SameFriendMapper1.class);
    job.setReducerClass(SameFriendReducer1.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(2);
    FileInputFormat.setInputPaths(job, new Path(“d:/data/friend/input”));
    FileOutputFormat.setOutputPath(job, new Path(“d:/data/friend/output1”));
    boolean res = job.waitForCompletion(true) ;
    System.exit(res ? 0 : 1);

    }
    }

STEP TWO

public class SameFriend2 {
public static class SameFriendMapper2 extends Mapper<LongWritable, Text, Text, Text> {
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String line = value.toString();

	String[] split = line.split("\t");
	context.write(new Text(split[0]), new Text(split[1]));
	}
}
public static class SameFriendReducer2 extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
		throws IOException, InterruptedException {
	StringBuilder sb = new StringBuilder();
	for (Text text : values) {
		sb.append(text.toString()).append(" ") ;
	}
	context.write(key, new Text(sb.toString().trim()));
	
}

}

public static void main(String[] args) throws Exception {
	
	Configuration conf = new Configuration();

	Job job = Job.getInstance(conf);

	job.setMapperClass(SameFriendMapper2.class);
	job.setReducerClass(SameFriendReducer2.class);
	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(Text.class);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(Text.class);
	job.setNumReduceTasks(1);
	
	FileInputFormat.setInputPaths(job, new Path("d:/data/friend/output1"));
	FileOutputFormat.setOutputPath(job, new Path("d:/data/friend/res"));
	
	boolean res = job.waitForCompletion(true) ;
	System.exit(res ? 0 : 1);

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值