下面是一个基本的JVAVA程序, RecommenderIntro.java
package xyz.pl8.recommenderintro;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import java.io.File;
import java.util.List;
public class RecommenderIntro {
public static void main(String[] args){
try{
// intro.csv格式 userId,itemId,rating
DataModel model = new FileDataModel(new File("/home/hadoop/intro.csv"));
System.out.println(model);
// 用户相似度
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
// K近邻用户
UserNeighborhood neighborhood = new NearestNUserNeighborhood(3, similarity, model );
// 基于用户的推荐器
Recommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity);
// 推荐物品
List<RecommendedItem> recommendedItems = recommender.recommend(2, 2);
for (RecommendedItem item : recommendedItems){
System.out.println(item);
}
}catch (Exception e){
e.printStackTrace();
}
}
}
下面是基于物品的多线程批推荐
package xyz.pl8.userrecommendermovielens;
import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.precompute.FileSimilarItemsWriter;
import org.apache.mahout.cf.taste.impl.similarity.precompute.MultithreadedBatchItemSimilarities;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
import org.apache.mahout.cf.taste.similarity.precompute.BatchItemSimilarities;
import org.apache.mahout.cf.taste.similarity.precompute.SimilarItemsWriter;
import java.io.File;
import java.io.IOException;
public class BatchItemSimilaritiesIntro {
public static void main(String[] args) throws IOException {
if (args.length !=1 ){
System.err.println("Need dataset file as argument!");
System.exit(-1);
}
File resultFile = new File(System.getProperty("java.io.tmpdir"), "similarity.csv");
DataModel dataModel = new MovieLensDataModel(new File(args[0]));
ItemSimilarity similarity = new LogLikelihoodSimilarity(dataModel);
ItemBasedRecommender recommender = new GenericItemBasedRecommender(dataModel, similarity);
BatchItemSimilarities batchItemSimilarities = new MultithreadedBatchItemSimilarities(recommender, 5);
SimilarItemsWriter writer = new FileSimilarItemsWriter(resultFile);
int numSimilarities = batchItemSimilarities.computeItemSimilarities(Runtime.getRuntime().availableProcessors(), 1,writer);
System.out.println("Computed " + numSimilarities + " for " + " items and saved them to " + resultFile.getAbsolutePath());
}
}
package xyz.pl8.userrecommendermovielens;
import org.apache.commons.io.Charsets;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.common.iterator.FileLineIterable;
import org.omg.CORBA.PUBLIC_MEMBER;
import java.io.*;
import java.nio.charset.Charset;
import java.util.regex.Pattern;
public class MovieLensDataModel extends FileDataModel {
private static String COLON_DELIMITER = "::";
private static Pattern COLON_DELIMITTER_PATTERN = Pattern.compile(COLON_DELIMITER);
public MovieLensDataModel(File ratingsFile) throws IOException{
super(convertFile(ratingsFile));
}
public static File convertFile(File originalFile) throws IOException{
File resultFile = new File(System.getProperty("java.io.tmpdir"), "ratings.csv");
if (resultFile.exists()){
resultFile.delete();
}
try {
Writer writer = new OutputStreamWriter(new FileOutputStream(resultFile), Charsets.UTF_8);
for (String line : new FileLineIterable(originalFile, false)) {
int lastIndex = line.lastIndexOf(COLON_DELIMITER);
if (lastIndex < 0) {
throw new IOException("Invalid data!");
}
String subLine = line.substring(0, lastIndex);
String convertedSubLne = COLON_DELIMITTER_PATTERN.matcher(subLine).replaceAll(",");
lastIndex = convertedSubLne.lastIndexOf(",");
if (lastIndex <= 0) {
continue;
}
writer.write(convertedSubLne);
writer.write('\n');
}
writer.close();
}catch (Exception e){
resultFile.delete();
}
return resultFile;
}
}