master node
1.环境准备nat配置
ifconfig
cd /etc/sysconfig/network-scripts/
vim ifcfg-eth0
2.vim ~/.bashrc
//注意:这个文件,是整个集群共享的。所以只需要在master节点上改动一次。
export JAVA_HOME=/usr/local/src/jdk1.6.0_45
export CLASSPATH=.:$CLASSPATH:$JAVA_HOME/lib
export PATH=$PATH:$JAVA_HOME/bin:/usr/local/src/hadoop-1.2.1/bin
#hadoop conf
export HADOOP_HOME=/usr/local/src/hadoop-1.2.1
export HADOOP_CONF_DIR=$HADOOP_HOME/conf
export PATH=$PATH:$HADOOP_HOME/bin
export HADOOP_HOME_WARN_SUPPRESS=not_null
#mahout conf
export MAHOUT_HOME=/usr/local/src/mahout-distribution-0.9
export MAHOUT_CONF_DIR=$MAHOUT_HOME/conf
#export PATH=$MAHOUT_HOME/conf:$MAHOUT_HOME/bin:$PATH
#zookeeper conf
export ZOOKEEPER_HOME=/usr/local/src/zookeeper-3.4.5
export PATH=$MAHOUT_HOME/conf:$MAHOUT_HOME/bin:$ZOOKEEPER_HOME/bin:$PATH
export LC_ALL="zh_CN.UTF-8"
export LANG="zh_CN.UTF-8"
export LC_CTYPE="zh_CN.UTF-8"
3.配置Hosts主机IP映射——vim /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.127.10 master
192.168.127.11 slave1
192.168.127.12 slave2
4.vim /etc/sysconfig/network
#master
NETWORKING=yes
HOSTNAME=master
5.vim core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/src/hadoop-1.2.1/tmp</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://192.168.127.10:9000</value>
</property>
</configuration>
6.vim hadoop-env.sh
# Set Hadoop-specific environment variables here.
# The only required environment variable is JAVA_HOME. All others are
# optional. When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.
# The java implementation to use. Required.
# export JAVA_HOME=/usr/lib/j2sdk1.5-sun
# Extra Java CLASSPATH elements. Optional.
# export HADOOP_CLASSPATH=
# The maximum amount of heap to use, in MB. Default is 1000.
# export HADOOP_HEAPSIZE=2000
# Extra Java runtime options. Empty by default.
# export HADOOP_OPTS=-server
# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
# export HADOOP_TASKTRACKER_OPTS=
# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
# export HADOOP_CLIENT_OPTS
# Extra ssh options. Empty by default.
# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"
# Where log files are stored. $HADOOP_HOME/logs by default.
# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
# File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default.
# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
# host:path where hadoop code should be rsync'd from. Unset by default.
# export HADOOP_MASTER=master:/home/$USER/src/hadoop
# Seconds to sleep between slave commands. Unset by default. This
# can be useful in large clusters, where, e.g., slave rsyncs can
# otherwise arrive faster than the master can service them.
# export HADOOP_SLAVE_SLEEP=0.1
# The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by
# the users that are going to run the hadoop daemons. Otherwise there is
# the potential for a symlink attack.
# export HADOOP_PID_DIR=/var/hadoop/pids
# A string representing this instance of hadoop. $USER by default.
# export HADOOP_IDENT_STRING=$USER
# The scheduling priority for daemon processes. See 'man nice'.
# export HADOOP_NICENESS=10
export JAVA_HOME=/usr/local/src/jdk1.6.0_45
7.vim mapred-site.xml
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>http://192.168.127.10:9001</value>
</property>
</configuration>
8.vim masters
master
9.vim slaves
slave1
slave2
10.vim hdfs-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
</configuration>
11.关闭防火墙
#方法1
/etc/init.d/iptables stop
iptables -L -n
chkconfig iptables off
#方法2
service iptables stop
service iptables status
12.免密钥登录
1).在所有节点终端生成公钥,命令如下(一路点击回车生成公钥)
生成的密钥在.ssh 目录下,并进入.ssh 目录
ssh-keygen -t rsa
cd .ssh/
2).合成并分发公钥:只需在 master 节点操作。复制所有节点的公钥文件合并到 authorized_keys.
[hadoop@master .ssh]$ cat /home/hadoop/.ssh/id_rsa.pub >> authorized_keys
[hadoop@master .ssh]$ ssh slave1 cat /home/hadoop/.ssh/id_rsa.pub >>authorized_keys
[hadoop@master .ssh]$ ssh slave2 cat /home/hadoop/.ssh/id_rsa.pub >>authorized_keys
3).将authorized_keys 文件复制到slave 节点,(如果提示输入 yes/no 的时候,输入 yes,回车),命令如下:
[hadoop@master .ssh]$ scp authorized_keys hadoop@slave1:/home/hadoop/.ssh/
[hadoop@master .ssh]$ scp authorized_keys hadoop@slave2:/home/hadoop/.ssh/
4).将 known_hosts 文件复制到slave 节点,命令如下:
[hadoop@master .ssh]$ scp known_hosts hadoop@slave1:/home/hadoop/.ssh/
[hadoop@master .ssh]$ scp known_hosts hadoop@slave2:/home/hadoop/.ssh/
5).修改所有节点authorized_keys 文件的权限,命令如下:
[hadoop@slave .ssh]$ chmod 600 authorized_keys
6).修改完权限后,验证免密钥登录。在 Master 机器上执行下面的命令:
[hadoop@master ~]$ ssh slave1
13.配置内网 NTP
14.配置hosts
只有配置了hosts,才能用web查看hadoop集群运行状态。
#添加下面3行
192.168.172.10 master
192.168.172.11 slave1
192.168.172.12 slave2
下次接着更新步骤13……
大数据开发这个系列,是本着复习温故的态度来写这博客的,在3个月前开始自学Hadoop开发,中途各种坑。上周重装电脑系统后,于今天再次重新搭建Hadoop系统,发现还是有很多坑得踩,写这博客,也方便以后的开发。温故而知新。
坑1——从节点上没有DataNode
原因:hadoop namenode -format这条命令执行>1次,这条命令使用最好不要超过1次,不要多次格式化。当我们使用hadoop namenode -format格式化namenode时,会在namenode数据文件夹(这个文件夹为自己配置文件中dfs.name.dir的路径)中保存一个current/VERSION文件,记录clusterID,datanode中保存的current/VERSION文件中的clustreID的值是第一次格式化保存的clusterID,这样,datanode和namenode之间的ID不一致
坑2——从节点上DataNode不能启动为什么(与坑1一样,转自百度知道)
这样的情况一般有2种可能:
1、datanode启动失败。可以像楼上说的倒datanode机器上执行jps命令查看datanode进程是否已经启动。如果jps不能正常显示,可以使用ps -ax | grep hadoop
2、datanode进程已经启动,仍在运行。出现这种情况是由于namenode和datanode中保存的namespaceID不同所引起的。知道的可能情况是启动过集群后,又重新执行了hadoop namenode -format导致的。解决方法是删掉datanode配置的dfs.data.dir目录,不过这样一来所有文件就都没有了。要慎重。
那具体报什么错误呢,查看下日志吧(PS:这点很重要,学会看logs)
坑3——执行”hadoop fs -ls /”时报错No route to host
原因:防火墙没有成功关闭
坑4——执行”hadoop fs -ls /”时报错 Call to master/192.168.127.10:9001 failed on connection exception: java.net.ConnectException: Connection refused
解决方案1:执行命令./hadoop namenode -format
解决方案2:虚拟机重启试试
解决方案3:防火墙没有成功关闭,重新关闭防火墙
坑5——hadoop集群时间与真实时间不同步
不良影响:不方便查看logs,排错。一般,logs 里面会提供很多有用的错误信息,学会看logs排错很重要。后面开始写mapreduce程序时,加标记(打log,比如在程序中加print语句),调试代码&track error能力很重要!
解决方案:步骤13.配置内网 NTP