mahout是apache下的一个java语言的开源大数据机器学习项目,与其他机器学习不同的是,他的算法多数是mapreduce方式写的可以在Hadoop上运行,并行化处理大规模数据
协同过滤在mahout里由一个叫taste的引擎提供的,它提供两种模式,一种以jar形式嵌入到程序里在进程里执行,另外一种是mapreduce job形式在hadoop上运行,这两种算法是
一样的,配置也类似
DataModel :DataModel是用户喜好信息的抽象接口,他的具体实现支持从任意类型的数据源抽取用户的喜好信息
taste默认提供JDBCDataModel和FileDataModle分别支持从数据库和文件读取用户的喜好信息
UserSimilarity和ItemSimilarity UserSimilarity用来定义两个用户间的相似度,他是基于协同过滤的推荐引擎的核心部分,可以用来计算用户的邻居,这里我们将当前用户口味相似的
用户称为他的邻居,ItemSimilarity类似的,计算内容之间的相似度
USERNeighborhood用于基于用户相似度的推荐方法中,推荐的内容是基于找到与当前用户喜好相似的邻居用户的方式产生的,UserNeighborhood定义了确定用户的方法,具体实现一般是基于UserSimilarity计算
得到的
Recommender:Recommender是推荐引擎的抽象接口taste中核心组件程序中,为它提供一个datamodel,它可以计算出不同用户的推荐内容,实际应用中主要使用它的实现类GenericUserBasedRecommender
或者GenericItemBasedRecommender,分别实现基于用户相似度推荐或者基于类容的推荐引擎
Itembased
GenericItemBasedRecommender
GenericBooleanPrefItemBasedRecommender
KnnItemBasedRecommender
UserBASED
GenericUserBasedRecommendar
enericBooleanPrefUserBasedRecommender
Model-based
SlopeOneRecommender
SVDRecommender
TreeClusteringRecommender
ItemAverageRecommender
准备数据
1,101,5
1,102,3
1,103,2.5
2,101,2
2,102,2.5
2,103,5
2,104,2
3,101,2.5
3,104,4
3,105,4.5
3,107,5
4,101,5
4,103,3
4,104,4.5
4,106,4
5,101,4
5,102,3
5,103,2
5,104,4
5,105,3.5
5,106,4
<dependencies>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-io</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>r09</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-core</artifactId>
<version>0.7</version>
</dependency>
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-integration</artifactId>
<version>0.7</version>
</dependency>
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-math</artifactId>
<version>0.7</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.6.1</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-jcl</artifactId>
<version>1.6.1</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.6.1</version>
</dependency>
<dependency>
<groupId>org.apache.mahout.uncommons.math</groupId>
<artifactId>uncommons-math</artifactId>
<version>1.0.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>0.20.204.0</version>
</dependency>
</dependencies>
//基于用户的推荐
package com.paohaijiao.mahout;
import java.io.File;
import java.util.List;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
/**
* Created by Administrator on 2016/12/10.
*/
public class UserItemRecommend {
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
//创建数据模型
DataModel dm = new FileDataModel(new File("F:\\hadoopKit\\data\\useritem.data"));
//使用User来推荐
UserSimilarity us=new PearsonCorrelationSimilarity(dm);
UserNeighborhood unb=new NearestNUserNeighborhood(3,us,dm);
//开始做推荐
Recommender re=new GenericUserBasedRecommender(dm, unb, us);
List<RecommendedItem> list=re.recommend(1,2);//为哪个用户推荐几个商品
for (RecommendedItem item:list) {
System.out.println(item);
}
}
}
//基于物品的推荐
package com.paohaijiao.mahout;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
import java.util.*;
import java.io.File;
/**
* Created by Administrator on 2016/12/10.
*/
public class ItemUserRecommend {
public static void main(String[] args) throws Exception {
DataModel model=new FileDataModel(new File("F:\\hadoopKit\\data\\useritem.data"));
ItemSimilarity similarity=new PearsonCorrelationSimilarity(model);
Recommender recommender=new GenericItemBasedRecommender(model,similarity);
List<RecommendedItem> list=recommender.recommend(1,1);
for(RecommendedItem item:list){
System.out.println(item);
}
}
}
综合运用,采用slopone
package com.paohaijiao.mahout;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import java.util.*;
import java.io.File;
/**
* Created by Administrator on 2016/12/10.
*/
public class Slopone {
public static void main(String[] args)throws Exception {
DataModel dm=new FileDataModel(new File("F:\\hadoopKit\\data\\useritem.data"));
Recommender recommender=new SlopeOneRecommender(dm);
List<RecommendedItem> list=recommender.recommend(1,1);
for (RecommendedItem item:list){
System.out.println(item);
}
}
}