J2EE实现:
maven SpringMVC项目可部署到服务器,由于比较复杂就不再这里写了感兴趣的可访问
https://github.com/XVIIB/UCF-XVII
JavaSE实现
1:输入数据(26M大小的csv文件,包含1000000+条淘宝用户浏览购买信息):
用户id 商品id 评分
2:maven依赖:
http://mvnrepository.com/artifact/org.apache.mahout/mahout-core
http://mvnrepository.com/artifact/org.apache.mahout/mahout-integration
3:主程序代码(推荐系统主体)
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import java.io.File;
import java.io.IOException;
import java.util.List;
public class C {
public static void main(String[] args) {
//准备源数据
File file=new File("D:\\data\\user.csv");
DataModel dataModel= null;
System.out.println(file.toString());
//从文件读取并整理所有用户id
ArrayList<String> user=FileOperation.getUserId();
try {
//加载数据
dataModel = new FileDataModel(file);
//计算相似度这里采用欧式距离
UserSimilarity similarity=new EuclideanDistanceSimilarity(dataModel);
//基于5个邻居计算相似度
UserNeighborhood neighborhood=new NearestNUserNeighborhood(1000000,similarity,dataModel);
//基于用户协同推荐
Recommender recommender=new GenericUserBasedRecommender(dataModel,neighborhood,similarity);
long a=0;
File outfile=new File("F:\\out.csv");
FileWriter fileWriter=new FileWriter(outfile);
for (String item:user){
a= Long.parseLong(item);
System.out.println(a);
List<RecommendedItem> list=recommender.recommend(Long.parseLong(item),1);
if (list.size()==0) continue;
try {
//输出到本地文件
fileWriter.write(a+","+list.get(0).getItemID()+"\r\n");
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
fileWriter.close();
} catch (IOException e) {
System.out.println("filed");
e.printStackTrace();
} catch (TasteException e) {
System.out.println("filed");
e.printStackTrace();
}
}
}
用户id处理类:
public class FileOperation {
public static ArrayList getUserId(){
Set set=new TreeSet();
ArrayList list=null;
File file = new File("D:\\data\\user.csv");
try {
FileInputStream fos = new FileInputStream(file);
DataInputStream dos = new DataInputStream(fos);
BufferedReader br = new BufferedReader(new InputStreamReader(dos));
int a = 0;
String s = null;
String[] ss;
try {
do{
a++;
ss = br.readLine().split(",");
set.add(ss[0]);
}
while ((s=br.readLine()) != null) ;
} catch (IOException e) {
e.printStackTrace();
}
System.out.println("共发现 " + a);
System.out.println("去重后 " + set.size());
list=new ArrayList(set);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
return list;
}
}
4:实现效果:
输出为所有用户的推荐细则( 单机运行很慢大概用了一个小时)
数据输出格式为(用户id ,推荐商品)