不多说,直接上干货!
首先,别在windows下搭建什么,安装什么Cygwin啊!直接在linux,对于企业里推荐用CentOS6.5,在学校里用Ubuntu。
Mahout安装所需软件清单:
软件 版本 说明
操作系统 CentOS6.5 64位
JDK jdk1.7.0_79
Hadoop 2.6.0
Mahout mahout-distribution-0.8
为什么采用这个版本,而不是0.9及其以后的版本,是因为差别有点大,比如fpg关联规则算法。以及网上参考资料少
说在前面的话,
关于Mahout的安装配置,这里介绍两种方式:其一,下载源码(直接下载源码或者通过svn下载源码都可以),然后使用Maven进行编译;其二,下载完整包进行解压缩。这里我使用的是完整包进行解压缩安装。
一、 mahout-distribution-0.8.tar.gz的下载
http://archive.apache.org/dist/mahout/0.8/
我这里,以稳定版本mahout-0.9为例
当然,这里也可以使用wget命令在线下载,很简单,不多说。
二、 mahout-distribution-0.8.tar.gz的安装
1、先新建好目录
我一般喜欢在/usr/loca/下新建
[root@djt002 local]# pwd/usr/local
[root@djt002 local]# ll
total72drwxr-xr-x. 2 root root 4096 Sep 23 2011bin
drwxr-xr-x. 2 hadoop hadoop 4096 Mar 14 06:19data
drwxr-xr-x. 3 hadoop hadoop 4096 Feb 21 23:10elasticsearch
drwxr-xr-x. 2 root root 4096 Sep 23 2011etc
drwxr-xr-x. 3 hadoop hadoop 4096 Jan 17 17:14flume
drwxr-xr-x. 2 root root 4096 Sep 23 2011games
drwxr-xr-x. 3 hadoop hadoop 4096 Jan 16 23:33hadoop
drwxr-xr-x. 3 hadoop hadoop 4096 Mar 16 18:26hbase
drwxr-xr-x. 4 hadoop hadoop 4096 Mar 14 17:48hive
drwxr-xr-x. 2 root root 4096 Sep 23 2011include
drwxr-xr-x. 3 hadoop hadoop 4096 Jan 16 23:25jdk
drwxr-xr-x. 2 root root 4096 Sep 23 2011lib
drwxr-xr-x. 2 root root 4096 Sep 23 2011lib64
drwxr-xr-x. 2 root root 4096 Sep 23 2011libexec
drwxr-xr-x. 2 root root 4096 Sep 23 2011sbin
drwxr-xr-x. 5 root root 4096 Jan 16 20:09share
drwxr-xr-x. 4 hadoop hadoop 4096 Mar 17 23:33sqoop
drwxr-xr-x. 2 root root 4096 Sep 23 2011src
[root@djt002 local]# mkdir mahout
[root@djt002 local]# ll
total76drwxr-xr-x. 2 root root 4096 Sep 23 2011bin
drwxr-xr-x. 2 hadoop hadoop 4096 Mar 14 06:19data
drwxr-xr-x. 3 hadoop hadoop 4096 Feb 21 23:10elasticsearch
drwxr-xr-x. 2 root root 4096 Sep 23 2011etc
drwxr-xr-x. 3 hadoop hadoop 4096 Jan 17 17:14flume
drwxr-xr-x. 2 root root 4096 Sep 23 2011games
drwxr-xr-x. 3 hadoop hadoop 4096 Jan 16 23:33hadoop
drwxr-xr-x. 3 hadoop hadoop 4096 Mar 16 18:26hbase
drwxr-xr-x. 4 hadoop hadoop 4096 Mar 14 17:48hive
drwxr-xr-x. 2 root root 4096 Sep 23 2011include
drwxr-xr-x. 3 hadoop hadoop 4096 Jan 16 23:25jdk
drwxr-xr-x. 2 root root 4096 Sep 23 2011lib
drwxr-xr-x. 2 root root 4096 Sep 23 2011lib64
drwxr-xr-x. 2 root root 4096 Sep 23 2011libexec
drwxr-xr-x 2 root root 4096 Apr 7 00:21mahout
drwxr-xr-x. 2 root root 4096 Sep 23 2011sbin
drwxr-xr-x. 5 root root 4096 Jan 16 20:09share
drwxr-xr-x. 4 hadoop hadoop 4096 Mar 17 23:33sqoop
drwxr-xr-x. 2 root root 4096 Sep 23 2011src
[root@djt002 local]# chown-R hadoop:hadoop mahout
[root@djt002 local]# ll
total76drwxr-xr-x. 2 root root 4096 Sep 23 2011bin
drwxr-xr-x. 2 hadoop hadoop 4096 Mar 14 06:19data
drwxr-xr-x. 3 hadoop hadoop 4096 Feb 21 23:10elasticsearch
drwxr-xr-x. 2 root root 4096 Sep 23 2011etc
drwxr-xr-x. 3 hadoop hadoop 4096 Jan 17 17:14flume
drwxr-xr-x. 2 root root 4096 Sep 23 2011games
drwxr-xr-x. 3 hadoop hadoop 4096 Jan 16 23:33hadoop
drwxr-xr-x. 3 hadoop hadoop 4096 Mar 16 18:26hbase
drwxr-xr-x. 4 hadoop hadoop 4096 Mar 14 17:48hive
drwxr-xr-x. 2 root root 4096 Sep 23 2011include
drwxr-xr-x. 3 hadoop hadoop 4096 Jan 16 23:25jdk
drwxr-xr-x. 2 root root 4096 Sep 23 2011lib
drwxr-xr-x. 2 root root 4096 Sep 23 2011lib64
drwxr-xr-x. 2 root root 4096 Sep 23 2011libexec
drwxr-xr-x 2 hadoop hadoop 4096 Apr 7 00:21mahout
drwxr-xr-x. 2 root root 4096 Sep 23 2011sbin
drwxr-xr-x. 5 root root 4096 Jan 16 20:09share
drwxr-xr-x. 4 hadoop hadoop 4096 Mar 17 23:33sqoop
drwxr-xr-x. 2 root root 4096 Sep 23 2011src
[root@djt002 local]#
2、上传mahout压缩包
[root@djt002 local]# su hadoop
[hadoop@djt002 local]$ cd mahout/[hadoop@djt002 mahout]$ pwd/usr/local/mahout
[hadoop@djt002 mahout]$ ll
total0[hadoop@djt002 mahout]$ rz
[hadoop@djt002 mahout]$ ll
total67628
-rw-r--r-- 1 hadoop hadoop 69248331 Apr 6 16:09 mahout-distribution-0.8.tar.gz
[hadoop@djt002 mahout]$
3、解压
[hadoop@djt002 mahout]$ pwd/usr/local/mahout
[hadoop@djt002 mahout]$ ll
total67628
-rw-r--r-- 1 hadoop hadoop 69248331 Apr 6 16:09 mahout-distribution-0.8.tar.gz
[hadoop@djt002 mahout]$ tar -zxvfmahout-distribution-0.9.tar.gz
4、删除压缩包和赋予用户组
[hadoop@djt002 mahout]$ pwd/usr/local/mahout
[hadoop@djt002 mahout]$ ll
total67632drwxrwxr-x 7 hadoop hadoop 4096 Apr 7 00:25 mahout-distribution-0.8
-rw-r--r-- 1 hadoop hadoop 69248331 Apr 6 16:09 mahout-distribution-0.8.tar.gz
[hadoop@djt002 mahout]$ rm mahout-distribution-0.9.tar.gz
[hadoop@djt002 mahout]$ ll
total4drwxrwxr-x 7 hadoop hadoop 4096 Apr 7 00:25 mahout-distribution-0.8[hadoop@djt002 mahout]$
5、mahout的配置
[root@djt002 mahout-distribution-0.8]# pwd/usr/local/mahout/mahout-distribution-0.8[root@djt002 mahout-distribution-0.8]# vim /etc/profile
#mahout
export MAHOUT_HOME=/usr/local/mahout/mahout-distribution-0.8export MAHOUT_HOME_CONF_DIR=/usr/local/mahout/mahout-distribution-0.8/conf
export PAT