3.3.3.索引案例
有大量的文本数据 , 查找出每个单词在每个文件中出现的次数
3.3.3.1.INDEX1
/**
- 查看单词在各个文档中出现的次数
- @author DOIT_HANG_GE
- @version 2019年2月26日
*/
public class Index1 {
public static class Index1Mapper extends Mapper<LongWritable, Text, Text, IntWritable>{
String fileName = null ;
Text k = new Text() ;
IntWritable v = new IntWritable() ;
@Override
protected void setup(Mapper<LongWritable, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
FileSplit fs =(FileSplit) context.getInputSplit();
fileName = fs.getPath().getName();
}
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
String[] words = value.toString().split(" “);
for (String word : words) {
k.set(word+”-"+fileName);
v.set(1);
context.write(k, v);
}
}
}
public static class Index1Reducer extends Reducer<Text, IntWritable, Text, IntWritable>{
@Override
protected void reduce(Text key, Iterable iters, Reducer<Text, IntWritable, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
int count = 0 ;
for (IntWritable is : iters) {
count++ ;
}
context.write(key, new IntWritable(count));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setMapperClass(Index1Mapper.class);
job.setReducerClass(Index1Reducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job, new Path(“d:/data/index/input”));
FileOutputFormat.setOutputPath(job, new Path(“d:/data/index/output1”));
job.setNumReduceTasks(1);
boolean res = job.waitForCompletion(true);
System.exit(res ? 0 : -1);
}
}
3.3.3.2.INDEX2
public class Index2 {
public static class Index2Mapper extends Mapper<LongWritable, Text, Text, Text> {
// word+"-"+fileName n
Text k = new Text();
Text v = new Text();
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String line = value.toString();
String[] split = line.split("-");
String[] split2 = split[1].split("\t");
k.set(split[0]);// word
v.set(split2[0] + “-” + split2[1]); // 文件名-n
context.write(k, v);
}
}
public static class Index2Reducer extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable
throws IOException, InterruptedException {
StringBuilder sb = new StringBuilder();
//filename-count
for (Text text : iters) {
sb.append(text.toString() + " ");
}
context.write(key, new Text(sb.toString()));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setMapperClass(Index2Mapper.class);
job.setReducerClass(Index2Reducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, new Path(“d:/data/index/output1”));
FileOutputFormat.setOutputPath(job, new Path(“d:/data/index/output2”));
job.setNumReduceTasks(1);
boolean res = job.waitForCompletion(true);
System.exit(res ? 0 : -1);
}
}
3.3.4.共同好友案例
共同好友案例,使用两个mapreduce程序来完成需求
STEP ONE
**
-
获取任意两个人的共同好友数据 A:B,C,G,F,D
-
@author DOIT_HANG_GE
-
@version 2019年3月6日
*/
public class SameFriend {public static class SameFriendMapper1 extends Mapper<LongWritable, Text, Text, Text> {
Text k = new Text() ;
Text v = new Text() ;
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String line = value.toString();String[] split = line.split(":"); v.set(split[0]); String[] fs = split[1].split(","); for (String f : fs) { k.set(f); context.write(k, v); } }
}
// key 是朋友 value是人
public static class SameFriendReducer1 extends Reducer<Text, Text, Text, Text> {@Override protected void reduce(Text key, Iterable<Text> iter, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { ArrayList<String> list = new ArrayList<>() ; for (Text text : iter) { list.add(text.toString()) ; } Collections.sort(list); for (int i = 0; i < list.size()-1; i++) { for (int j = i+1; j < list.size(); j++) { context.write(new Text(list.get(i)+"和"+list.get(j)+"的好友是:"), key); } } }
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setMapperClass(SameFriendMapper1.class);
job.setReducerClass(SameFriendReducer1.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(2);
FileInputFormat.setInputPaths(job, new Path(“d:/data/friend/input”));
FileOutputFormat.setOutputPath(job, new Path(“d:/data/friend/output1”));
boolean res = job.waitForCompletion(true) ;
System.exit(res ? 0 : 1);}
}
STEP TWO
public class SameFriend2 {
public static class SameFriendMapper2 extends Mapper<LongWritable, Text, Text, Text> {
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String line = value.toString();
String[] split = line.split("\t");
context.write(new Text(split[0]), new Text(split[1]));
}
}
public static class SameFriendReducer2 extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
StringBuilder sb = new StringBuilder();
for (Text text : values) {
sb.append(text.toString()).append(" ") ;
}
context.write(key, new Text(sb.toString().trim()));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setMapperClass(SameFriendMapper2.class);
job.setReducerClass(SameFriendReducer2.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(1);
FileInputFormat.setInputPaths(job, new Path("d:/data/friend/output1"));
FileOutputFormat.setOutputPath(job, new Path("d:/data/friend/res"));
boolean res = job.waitForCompletion(true) ;
System.exit(res ? 0 : 1);
}
}