转载自:http://f.dataguru.cn/thread-268570-1-1.html
public class ItemCFHadoop1 {
private static final String HDFS = "hdfs://192.168.1.202:9000";
public static void main(String[] args) throws Exception {
String localFile = "datafile/small2.csv";
String usersFile = "datafile/user.csv";
String inPath = HDFS + "/user/hdfs/userCF";
String inFile = inPath + "/small2.csv";
String outPath = HDFS + "/user/hdfs/userCF/result1/";
String outFile = outPath + "/part-r-00000";
String tmpPath = HDFS + "/tmp/" + System.currentTimeMillis();
JobConf conf = config();
HdfsDAO hdfs = new HdfsDAO(HDFS, conf);
hdfs.rmr(inPath);
hdfs.mkdirs(inPath);
hdfs.copyFile(localFile, inPath);
hdfs.ls(inPath);
hdfs.cat(inFile);
StringBuilder sb = new StringBuilder();
sb.append("--input ").append(inPath);
sb.append(" --output ").append(outPath);
sb.append(" --usersFile ").append(usersFile);
sb.append(" --booleanData true");//PearsonCorrelationSimilarity需要是false
sb.append(" --similarityClassname org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.EuclideanDistanceSimilarity");
//有错误sb.append(" --similarityClassname org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.VectorSimilarityMeasure");
//sb.append(" --similarityClassname org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.CityBlockSimilarity");
//sb.append(" --similarityClassname org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.CooccurrenceCountSimilarity");
//sb.append(" --similarityClassname org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.CosineSimilarity");
//有错误sb.append(" --similarityClassname org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.CountbasedMeasure.class");
//sb.append(" --similarityClassname org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.LoglikelihoodSimilarity");
//sb.append(" --similarityClassname org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.PearsonCorrelationSimilarity");
//sb.append(" --similarityClassname org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.TanimotoCoefficientSimilarity");
//有错误sb.append(" --similarityClassname org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.VectorSimilarityMeasures");
sb.append(" --tempDir ").append(tmpPath);
args = sb.toString().split(" ");
RecommenderJob job = new RecommenderJob();
job.setConf(conf);
job.run(args);
hdfs.cat(outFile);
}
public static JobConf config() {
JobConf conf = new JobConf(ItemCFHadoop1.class);
conf.setJobName("ItemCFHadoop");
conf.addResource("classpath:/hadoop/core-site.xml");
conf.addResource("classpath:/hadoop/hdfs-site.xml");
conf.addResource("classpath:/hadoop/mapred-site.xml");
return conf;
}
}