主要就是通过递归把[a,b,c,d]切分成n个的组
如果n=2
为:[a,b],[a,c],[a,d],[b,c],[b,d],[c,d]
如果n=3
为:[a,b,c],[a,b,d],[b,c,d],[a,c,d]
然后统计
public class MBAMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
public static final int DEFAULT_NUMBER_OF_PAIRS = 2;
private static final Text reducerKey = new Text();
private static final IntWritable NUMBER_ONE = new IntWritable(1);
int numberOfPairs;
@Override
protected void setup(Context context) throws IOException,InterruptedException {
this.numberOfPairs = context.getConfiguration().getInt(
"number.of.pairs", DEFAULT_NUMBER_OF_PAIRS);
}
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
List<String> items = convertItemsToList(line);
if ((items == null) || (items.isEmpty())) {
// no mapper output will be generated
return;
}
generateMapperOutput(numberOfPairs, items, context);
}
private static List<String> convertItemsToList(String line) {
if ((line == null) || (line.length() == 0)) {
// no mapper output will be generated
return null;
}
String[] tokens = StringUtils.split(line, ",");
if ((tokens == null) || (tokens.length == 0)) {
return null;
}
List<String> items = new ArrayList<String>();
for (String token : tokens) {
if (token != null) {
items.add(token.trim());
}
}
return items;
}
private void generateMapperOutput(int numberOfPairs, List<String> items,
Context context) throws IOException, InterruptedException {
List<List<String>> sortedCombinations = Combination
.findSortedCombinations(items, numberOfPairs);
for (List<String> itemList : sortedCombinations) {
System.out.println("itemlist=" + itemList.toString());
reducerKey.set(itemList.toString());
context.write(reducerKey, NUMBER_ONE);
}
}
}
public class MBAReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int sum = 0; // total items paired
for (IntWritable value : values) {
sum += value.get();
}
context.write(key, new IntWritable(sum));
}
}
public static void main(String[] args) throws Exception {
Configuration conf1 = new Configuration();
System.setProperty("hadoop.home.dir", "E:\\hadoop-2.6.0");
conf1.setBoolean("dfs.permissions", false);
Job job = Job.getInstance(conf1, "MBA");
job.setMapperClass(MBAMapper.class);
job.setReducerClass(MBAReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// output format
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setNumReduceTasks(1);
FileInputFormat.setInputPaths(job, new Path(
"E:\\java\\test\\07\\07.txt"));
FileOutputFormat
.setOutputPath(job, new Path("E:\\java\\test\\07\\out"));
if (job.waitForCompletion(true)) {
// log.info("MR run successfully");
} else {
// log.error("MR run failed");
}
}
输入
crackers,bread,banana
crackers,coke,butter,coffee
crackers,bread
crackers,bread
crackers,bread,coffee
butter,coke
butter,coke,bread,crackers
输出
[banana, bread] 1
[banana, crackers] 1
[bread, butter] 1
[bread, coffee] 1
[bread, coke] 1
[bread, crackers] 5
[butter, coffee] 1
[butter, coke] 3
[butter, crackers] 2
[coffee, coke] 1
[coffee, crackers] 2
[coke, crackers] 2