本文记录了在mac使用maven和eclipse搭建mahout小demo的过程。
一、eclipse安装
二、maven安装
请允许我跳过这两步,因为我开始开发的过程中,eclipse和maven都已经是现成安装好的。
三、用maven构建mahout项目
1. 新建项目
打开eclipse,通过菜单File > New > Project新建项目,选择Maven Project
第一页直接Next,
第二页选择maven-archetype-quickstart,
第三页输入GroupId和ArtifactId, 这个自己随便定就好。
点了Finish后,Eclipse便会创建如下一个工程:
2. 引入mahout包
在http://mvnrepository.com/artifact/org.apache.mahout/mahout-core上可以查找到相关的包mahout-core, mahout-math。
在pom.xml中插入这段依赖,如下图。
实际上经过我尝试,只要引入mahout-core包以后,mahout-math也会引入。
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-core</artifactId>
<version>0.6</version>
</dependency>
3. 编写代码运行程序
先添加工具类:recommendFactory
package com.mrh.mahout.project;
import java.io.File;
import java.io.IOException;
import java.util.List;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.DataModelBuilder;
import org.apache.mahout.cf.taste.eval.IRStatistics;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
import org.apache.mahout.cf.taste.eval.RecommenderIRStatsEvaluator;
import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.eval.GenericRecommenderIRStatsEvaluator;
import org.apache.mahout.cf.taste.impl.eval.RMSRecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.model.GenericBooleanPrefDataModel;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.ClusterSimilarity;
import org.apache.mahout.cf.taste.impl.recommender.FarthestNeighborClusterSimilarity;
import org.apache.mahout.cf.taste.impl.recommender.GenericBooleanPrefItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.recommender.GenericBooleanPrefUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.recommender.NearestNeighborClusterSimilarity;
import org.apache.mahout.cf.taste.impl.recommender.TreeClusteringRecommender;
import org.apache.mahout.cf.taste.impl.recommender.knn.KnnItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.recommender.knn.Optimizer;
import org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender;
import org.apache.mahout.cf.taste.impl.recommender.svd.Factorizer;
import org.apache.mahout.cf.taste.impl.recommender.svd.SVDRecommender;
import org.apache.mahout.cf.taste.impl.similarity.CityBlockSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.SpearmanCorrelationSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.UncenteredCosineSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
/**
*
* @author bsspirit@gmail.com
* @link http://blog.fens.me/mahout-recommendation-api/
*/
public final class RecommendFactory {
/**
* build Data model from file
*/
public static DataModel buildDataModel(String file) throws TasteException, IOException {
return new FileDataModel(new File(file));
}
public static DataModel buildDataModelNoPref(String file) throws TasteException, IOException {
return new GenericBooleanPrefDataModel(GenericBooleanPrefDataModel.toDataMap(new FileDataModel(new File(file))));
}
public static DataModelBuilder buildDataModelNoPrefBuilder() {
return new DataModelBuilder() {
public DataModel buildDataModel(FastByIDMap<PreferenceArray> trainingData) {
return new GenericBooleanPrefDataModel(GenericBooleanPrefDataModel.toDataMap(trainingData));
}
};
}
/**
* similarity
*/
public enum SIMILARITY {
PEARSON, EUCLIDEAN, COSINE, TANIMOTO, LOGLIKELIHOOD, SPEARMAN, CITYBLOCK, FARTHEST_NEIGHBOR_CLUSTER, NEAREST_NEIGHBOR_CLUSTER
}
public static UserSimilarity userSimilarity(SIMILARITY type, DataModel m) throws TasteException {
switch (type) {
case PEARSON:
return new PearsonCorrelationSimilarity(m);
case COSINE:
return new UncenteredCosineSimilarity(m);
case TANIMOTO:
return new TanimotoCoefficientSimilarity(m);
case LOGLIKELIHOOD:
return new LogLikelihoodSimilarity(m);
case SPEARMAN:
return new SpearmanCorrelationSimilarity(m);
case CITYBLOCK:
return new CityBlockSimilarity(m);
case EUCLIDEAN:
default:
return new EuclideanDistanceSimilarity(m);
}
}
public static ItemSimilarity itemSimilarity(SIMILARITY type, DataModel m) throws TasteException {
switch (type) {
case PEARSON:
return new PearsonCorrelationSimilarity(m);
case COSINE:
return new UncenteredCosineSimilarity(m);
case TANIMOTO:
return new TanimotoCoefficientSimilarity(m);
case LOGLIKELIHOOD:
return new LogLikelihoodSimilarity(m);
case CITYBLOCK:
return new CityBlockSimilarity(m);
case EUCLIDEAN:
default:
return new EuclideanDistanceSimilarity(m);
}
}
public static ClusterSimilarity clusterSimilarity(SIMILARITY type, UserSimilarity us) throws TasteException {
switch (type) {
case NEAREST_NEIGHBOR_CLUSTER:
return new NearestNeighborClusterSimilarity(us);
case FARTHEST_NEIGHBOR_CLUSTER:
default:
return new FarthestNeighborClusterSimilarity(us);
}
}
/**
* neighborhood
*/
public enum NEIGHBORHOOD {
NEAREST, THRESHOLD
}
public static UserNeighborhood userNeighborhood(NEIGHBORHOOD type, UserSimilarity s, DataModel m, double num) throws TasteException {
switch (type) {
case NEAREST:
return new NearestNUserNeighborhood((int) num, s, m);
case THRESHOLD:
default:
return new ThresholdUserNeighborhood(num, s, m);
}
}
/**
* recommendation
*/
public enum RECOMMENDER {
USER, ITEM
}
public static RecommenderBuilder userRecommender(final UserSimilarity us, final UserNeighborhood un, boolean pref) throws TasteException {
return pref ? new RecommenderBuilder() {
public Recommender buildRecommender(DataModel model) throws TasteException {
return new GenericUserBasedRecommender(model, un, us);
}
} : new RecommenderBuilder() {
public Recommender buildRecommender(DataModel model) throws TasteException {
return new GenericBooleanPrefUserBasedRecommender(model, un, us);
}
};
}
public static RecommenderBuilder itemRecommender(final ItemSimilarity is, boolean pref) throws TasteException {
return pref ? new RecommenderBuilder() {
public Recommender buildRecommender(DataModel model) throws TasteException {
return new GenericItemBasedRecommender(model, is);
}
} : new RecommenderBuilder() {
public Recommender buildRecommender(DataModel model) throws TasteException {
return new GenericBooleanPrefItemBasedRecommender(model, is);
}
};
}
public static RecommenderBuilder slopeOneRecommender() throws TasteException {
return new RecommenderBuilder() {
public Recommender buildRecommender(DataModel dataModel) throws TasteException {
return new SlopeOneRecommender(dataModel);
}
};
}
public static RecommenderBuilder itemKNNRecommender(final ItemSimilarity is, final Optimizer op, final int n) throws TasteException {
return new RecommenderBuilder() {
public Recommender buildRecommender(DataModel dataModel) throws TasteException {
return new KnnItemBasedRecommender(dataModel, is, op, n);
}
};
}
public static RecommenderBuilder svdRecommender(final Factorizer factorizer) throws TasteException {
return new RecommenderBuilder() {
public Recommender buildRecommender(DataModel dataModel) throws TasteException {
return new SVDRecommender(dataModel, factorizer);
}
};
}
public static RecommenderBuilder treeClusterRecommender(final ClusterSimilarity cs, final int n) throws TasteException {
return new RecommenderBuilder() {
public Recommender buildRecommender(DataModel dataModel) throws TasteException {
return new TreeClusteringRecommender(dataModel, cs, n);
}
};
}
public static void showItems(long uid, List<RecommendedItem> recommendations, boolean skip) {
if (!skip || recommendations.size() > 0) {
System.out.printf("uid:%s,", uid);
for (RecommendedItem recommendation : recommendations) {
System.out.printf("(%s,%f)", recommendation.getItemID(), recommendation.getValue());
}
System.out.println();
}
}
/**
* evaluator
*/
public enum EVALUATOR {
AVERAGE_ABSOLUTE_DIFFERENCE, RMS
}
public static RecommenderEvaluator buildEvaluator(EVALUATOR type) {
switch (type) {
case RMS:
return new RMSRecommenderEvaluator();
case AVERAGE_ABSOLUTE_DIFFERENCE:
default:
return new AverageAbsoluteDifferenceRecommenderEvaluator();
}
}
public static void evaluate(EVALUATOR type, RecommenderBuilder rb, DataModelBuilder mb, DataModel dm, double trainPt) throws TasteException {
System.out.printf("%s Evaluater Score:%s\n", type.toString(), buildEvaluator(type).evaluate(rb, mb, dm, trainPt, 1.0));
}
public static void evaluate(RecommenderEvaluator re, RecommenderBuilder rb, DataModelBuilder mb, DataModel dm, double trainPt) throws TasteException {
System.out.printf("Evaluater Score:%s\n", re.evaluate(rb, mb, dm, trainPt, 1.0));
}
/**
* statsEvaluator
*/
public static void statsEvaluator(RecommenderBuilder rb, DataModelBuilder mb, DataModel m, int topn) throws TasteException {
RecommenderIRStatsEvaluator evaluator = new GenericRecommenderIRStatsEvaluator();
IRStatistics stats = evaluator.evaluate(rb, mb, m, null, topn, GenericRecommenderIRStatsEvaluator.CHOOSE_THRESHOLD, 1.0);
// System.out.printf("Recommender IR Evaluator: %s\n", stats);
System.out.printf("Recommender IR Evaluator: [Precision:%s,Recall:%s]\n", stats.getPrecision(), stats.getRecall());
}
}
测试类
package com.mrh.mahout.project;
import java.io.File;
import java.io.IOException;
import java.util.List;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
public class RecommendTest {
final static int NEIGHBORHOOD_NUM = 2;
final static int RECOMMENDER_NUM = 3;
public static void main(String[] args) throws TasteException, IOException {
String file = "/Users/zjgy/Documents/dataset/item.csv";
DataModel dataModel = new FileDataModel(new File(file));
System.out.println(dataModel.toString());
userCF(dataModel);
// slopeOne(dataModel);
}
public static void userCF(DataModel dataModel) throws TasteException {
UserSimilarity userSimilarity = RecommendFactory.userSimilarity(RecommendFactory.SIMILARITY.EUCLIDEAN, dataModel);
UserNeighborhood userNeighborhood = RecommendFactory.userNeighborhood(RecommendFactory.NEIGHBORHOOD.NEAREST, userSimilarity, dataModel, NEIGHBORHOOD_NUM);
RecommenderBuilder recommenderBuilder = RecommendFactory.userRecommender(userSimilarity, userNeighborhood, true);
RecommendFactory.evaluate(RecommendFactory.EVALUATOR.AVERAGE_ABSOLUTE_DIFFERENCE, recommenderBuilder, null, dataModel, 0.7);
RecommendFactory.statsEvaluator(recommenderBuilder, null, dataModel, 2);
LongPrimitiveIterator iter = dataModel.getUserIDs();
while (iter.hasNext()) {
long uid = iter.nextLong();
List list = recommenderBuilder.buildRecommender(dataModel).recommend(uid, RECOMMENDER_NUM);
RecommendFactory.showItems(uid, list, true);
}
}
}
测试结果:
附数据集:
1,101,5.0
1,102,3.0
1,103,2.5
2,101,2.0
2,102,2.5
2,103,5.0
2,104,2.0
3,101,2.5
3,104,4.0
3,105,4.5
3,107,5.0
4,101,5.0
4,103,3.0
4,104,4.5
4,106,4.0
5,101,4.0
5,102,3.0
5,103,2.0
5,104,4.0
5,105,3.5
5,106,4.0