(1)描述
关联规则挖掘最典型的例子是购物篮分析,通过分析可以知道哪些商品经常被一起购买,从而可以改进商品货架的布局。
(2) 测试数据
r z h k p
z y x w v u t s
s x o n r
x z y m t s q e
z
x z y r q t p
(3) 样例程序
public static void main(String[] args) {
String inputFile;
double minSupport = 0.3;
int numPartition = -1;
inputFile = "sample_fpgrowth.txt";
if (args.length >= 2) {
minSupport = Double.parseDouble(args[1]);
}
if (args.length >= 3) {
numPartition = Integer.parseInt(args[2]);
}
SparkConf sparkConf = new SparkConf().setAppName("JavaFPGrowthExample").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(sparkConf);
JavaRDD<ArrayList<String>> transactions = sc.textFile(inputFile).map(
new Function<String, ArrayList<String>>() {
@Override
public ArrayList<String> call(String s) {
return Lists.newArrayList(s.split(" "));
}
}
);
FPGrowthModel<String> model = new FPGrowth()
.setMinSupport(minSupport)
.setNumPartitions(numPartition)
.run(transactions);
for (FPGrowth.FreqItemset<String> s: model.freqItemsets().toJavaRDD().collect()) {
System.out.println("[" + Joiner.on(",").join(s.javaItems()) + "], " + s.freq());
}
sc.stop();
}
(4)测试结果
[s], 3
[s,x], 3
[s,x,z], 2
[s,z], 2
[r], 3
[r,x], 2
[r,z], 2
[y], 3
[y,s], 2
[y,s,x], 2
[y,s,x,z], 2
[y,s,z], 2
[y,x], 3
[y,x,z], 3
[y,t], 3
[y,t,s], 2
[y,t,s,x], 2
[y,t,s,x,z], 2
[y,t,s,z], 2
[y,t,x], 3
[y,t,x,z], 3
[y,t,z], 3
[y,z], 3
[q], 2
[q,y], 2
[q,y,x], 2
[q,y,x,z], 2
[q,y,t], 2
[q,y,t,x], 2
[q,y,t,x,z], 2
[q,y,t,z], 2
[q,y,z], 2
[q,x], 2
[q,x,z], 2
[q,t], 2
[q,t,x], 2
[q,t,x,z], 2
[q,t,z], 2
[q,z], 2
[x], 4
[x,z], 3
[t], 3
[t,s], 2
[t,s,x], 2
[t,s,x,z], 2
[t,s,z], 2
[t,x], 3
[t,x,z], 3
[t,z], 3
[p], 2
[p,r], 2
[p,r,z], 2
[p,z], 2
[z], 5