Hadoop2.3.0上部署Mahout0.10,并测试单机版与分布式版个性化推荐程序

Hadoop2.3.0上部署Mahout0.10,并测试单机版与分布式版个性化推荐程序

1 Eclipse中Hadoop2.3.0及Mahout0.10相关jar包部署  

    Hadoop2以上需要使用Mahout0.10以上版本才可以直接运行,否则需要重新编译Mahout相关jar包。本文直接使用Mahout0.10版本,执行前在Eclipse中分别倒入Hadoop2.3.0和Mahout0.10相关jar包即可。Eclipse中Hadoop2.3.0jar包部署见上篇文章:eclipse中hadoop2.3.0环境部署及在eclipse中直接提交mapreduce任务,Eclipse中Mahout0.10jar包部署如下图所示:

114109_vvv7_617085.png

2 单机版个性化推荐源码

import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.IRStatistics;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderIRStatsEvaluator;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
import org.apache.mahout.cf.taste.impl.eval.GenericRecommenderIRStatsEvaluator;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;

/**
 * 产品推荐单机运行模式
 * 
 * @author hadoop
 *
 */
// 用户id 产品id 评分
// 1,101,5.0
// 1,102,3.0
// 1,103,2.5
// 2,101,2.0
// 2,102,2.5
// 2,103,5.0
// 2,104,2.0
// 3,101,2.5
// 3,104,4.0
// 3,105,4.5
// 3,107,5.0
// 4,101,5.0
// 4,103,3.0
// 4,104,4.5
// 4,106,4.0
// 5,101,4.0
// 5,102,3.0
// 5,103,2.0
// 5,104,4.0
// 5,105,3.5
// 5,106,4.0

public class UserCF {

    final static int NEIGHBORHOOD_NUM = 2;// 和相邻多少个用户进行关联求相似度
    final static int RECOMMENDER_NUM = 3;// 每个用户推荐产品的数量

    /**
     * @description DataModel负责存储和提供用户、项目、偏好的计算所需要的数据
     *              UserSimiliarity提供了一些基于某种算法的用户相似度度量的方法
     *              UserNeighborhood定义了一个和某指定用户相似的用户集合
     *              Recommender利用所有的组件来为一个用户产生一个推荐结果,另外他也提供了一系列的相关方法
     * @param args
     * @throws IOException
     * @throws TasteException
     */
    public static void main(String[] args) throws IOException, TasteException {
        String file = "E:/hadoop/mahout0.9_1jars/mahout_in1.txt";// 数据文件路径,可以是压缩文件
        DataModel model = new FileDataModel(new File(file));// 加载数据
        UserSimilarity user = new EuclideanDistanceSimilarity(model);// 计算用户相似度,权重值为(0,1]
        NearestNUserNeighborhood neighbor = new NearestNUserNeighborhood(
                NEIGHBORHOOD_NUM, user, model);// 寻找相似用户
        Recommender r = new GenericUserBasedRecommender(model, neighbor, user);
        LongPrimitiveIterator iter = model.getUserIDs();

        while (iter.hasNext()) {
            long uid = iter.nextLong();
            List<RecommendedItem> list = r.recommend(uid, RECOMMENDER_NUM);
            System.out.printf("uid:%s", uid);
            for (RecommendedItem ritem : list) {
                System.out.printf("(%s,%f)", ritem.getItemID(),
                        ritem.getValue());

            }
            System.out.println();
        }

        /**
         * 推荐结果评估
         */
        RecommenderIRStatsEvaluator evaluator = new GenericRecommenderIRStatsEvaluator();
        RecommenderBuilder recommenderBuilder = new RecommenderBuilder() {
            @Override
            public Recommender buildRecommender(DataModel model)
                    throws TasteException {
                UserSimilarity similarity = new PearsonCorrelationSimilarity(
                        model);
                UserNeighborhood neighborhood = new NearestNUserNeighborhood(2,
                        similarity, model);
                return new GenericUserBasedRecommender(model, neighborhood,
                        similarity);
            }
        };

        IRStatistics stats = evaluator.evaluate(recommenderBuilder, null,
                model, null, 2,
                GenericRecommenderIRStatsEvaluator.CHOOSE_THRESHOLD, 1.0);

        System.out.println("查准率: " + stats.getPrecision());//查准率
        System.out.println("召回率: " + stats.getRecall());//召回率
        
    }
}

    运行结果:

uid:1(104,4.274336)(106,4.000000)
uid:2(105,4.055916)
uid:3(103,3.360987)(102,2.773169)
uid:4(102,3.000000)
uid:5
查准率: 0.75
召回率: 1.0

3 分布式版个性化推荐源码

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.cf.taste.hadoop.item.RecommenderJob;
import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.CityBlockSimilarity;
import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.CooccurrenceCountSimilarity;
import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.CosineSimilarity;
import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.EuclideanDistanceSimilarity;
import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.LoglikelihoodSimilarity;
import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.CosineSimilarity;
import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.TanimotoCoefficientSimilarity;

public class MahoutJobTest {
    public static void main(String args[]) throws Exception{
        Configuration conf= new Configuration();
        conf.set("fs.default.name", "hdfs://192.168.1.100:9000");
        conf.set("hadoop.job.user", "hadoop");
        conf.set("mapreduce.framework.name", "yarn");
        conf.set("mapreduce.jobtracker.address", "192.168.1.101:9001");
        conf.set("yarn.resourcemanager.hostname", "192.168.1.101");
        conf.set("yarn.resourcemanager.admin.address", "192.168.1.101:8033");
        conf.set("yarn.resourcemanager.address", "192.168.1.101:8032");
        conf.set("yarn.resourcemanager.resource-tracker.address", "192.168.1.101:8031");
        conf.set("yarn.resourcemanager.scheduler.address", "192.168.1.101:8030");
        
        String[] str ={
                 "-i","hdfs://192.168.1.100:9000/data/test_in/mahout_in1.csv", 
                 "-o","hdfs://192.168.1.100:9000/data/test_out/mahout_out_CityBlockSimilarity/rec001",  
                 "-n","3",
                 "-b","false",
                 
                 //mahout自带的相似类列表
//                 SIMILARITY_COOCCURRENCE(CooccurrenceCountSimilarity.class),
//                 SIMILARITY_LOGLIKELIHOOD(LoglikelihoodSimilarity.class),
//                 SIMILARITY_TANIMOTO_COEFFICIENT(TanimotoCoefficientSimilarity.class),
//                 SIMILARITY_CITY_BLOCK(CityBlockSimilarity.class),
//                 SIMILARITY_COSINE(CityBlockSimilarity.class),
//                 SIMILARITY_PEARSON_CORRELATION(CosineSimilarity.class),
//                 SIMILARITY_EUCLIDEAN_DISTANCE(EuclideanDistanceSimilarity.class);
                 "-s","SIMILARITY_CITY_BLOCK",  
                 
                 "--maxPrefsPerUser","70",
                 "--minPrefsPerUser","2",  
                 "--maxPrefsInItemSimilarity","70",  
                 "--outputPathForSimilarityMatrix","hdfs://192.168.1.100:9000/data/test_out/mahout_out_CityBlockSimilarity/matrix/rec001",
                 "--tempDir","hdfs://192.168.1.100:9000/data/test_out/mahout_out_CityBlockSimilarity/temp/rec001"
                 }; 
        
        ToolRunner.run(conf, new RecommenderJob(), str);
    }
}


转载于:https://my.oschina.net/mkh/blog/472508

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
好的,以下是Hadoop平台伪分布式安装、配置与测试的步骤: 1. 搭建虚拟机环境 首先需要在本地计算机上安装虚拟机软件,如VirtualBox、VMware等。然后创建虚拟机,选择合适的操作系统,推荐使用Linux系统,如CentOS、Ubuntu等。 2. 安装Java环境 Hadoop需要Java环境的支持,因此需要在虚拟机上安装Java。可以通过以下命令安装: ``` sudo apt-get update sudo apt-get install default-jdk ``` 3. 下载Hadoop并解压 在虚拟机上下载Hadoop,并解压到指定目录下。可以从Hadoop官网上下载最新的本。 ``` wget https://downloads.apache.org/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz tar -xzvf hadoop-3.3.1.tar.gz ``` 4. 配置Hadoop 接下来需要进行Hadoop的配置,修改hadoop-env.sh、core-site.xml、hdfs-site.xml、mapred-site.xml、yarn-site.xml等配置文件。具体配置可以参考Hadoop官方文档。 5. 启动Hadoop 配置完成后,可以启动Hadoop,并进行测试。可以使用以下命令启动Hadoop: ``` cd hadoop-3.3.1/bin ./start-all.sh ``` 6. 测试Hadoop 启动Hadoop后,可以进行测试。可以通过以下命令创建一个测试文件,并进行MapReduce操作: ``` echo "Hello World" > input.txt bin/hadoop fs -mkdir /input bin/hadoop fs -put input.txt /input bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.1.jar wordcount /input /output ``` 执行完成后,可以查看输出结果: ``` bin/hadoop fs -cat /output/part-r-00000 ``` 以上就是Hadoop平台伪分布式安装、配置与测试的步骤。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值