Hadoop Mapreduce
形式化映射器
//key 交易ID 忽略
//value 交易商品(i1,i2,...in)
map(key,value){
(s1,s2,...sn)=sort(i1,i2,...in);
List<Tuple2<si,sj>> listofpairs=Combinations.generateCombinations(s1,s2,...sn)//combinnations 是一个java工具类,为给定的商品列表生成购物篮商品组合(2项)
for(Tuple2<si,sj>pair : listofpairs){
emit([Tuple2<si,sj>,1]);}
}
归约器
//key Tuple2<si,sj>
//value list<iteger>
reduce(Tuple2<si,sj> key, List<iteger> values){
integer sum=0;
for (integer i :values){
sum +=i;}
emit(key,sum);}
MBAMapper
public class MBAMapper extends Mapper<LongWritable, Text,Text,IntWritable>{
public static final int DEFAULT_NUMBER_OF_PAIRS = 2;
//输出key2
private static final Text reducekey = new Text();
// 输出value2
private static final IntWritable Number_ONE=new IntWritable(1);
int numberofpairs
protected void setup(Context context)
throws IOException, InterruptedException{
this.numberofpairs=context.getConfiguration().getInt("number of pair", DEFAULT_NUMBER_OF_PAIRS);}
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{
String line =value.toString().trim();
List<String> items = convertItemsToList(line);
if ((items==null)||(items.isEmpty())){ return;}
generateMapperOutput(numberOfPairs, items,context);}
private static List<String> convertItemsToList(String line){
...}
private void generateMapperOutput(){...}
}
spark 关联规则
public class FindAssoci