一、系统配置
1、systemctl stop firewalld & systemctl disable firewalld
2、systemctl start ntpd & systemctl enable ntpd
3、vi /etc/sysconfig/selinux -> SELINUX=disabled
4、vi /etc/hosts -> add ip - hostname map
5、vi /etc/security/limits.conf -> ulimit -a |grep -v unlimited
二、配置免密(master主机执行,/root/.ssh目录下)
1、root@master]# ssh-keygen -t rsa //三次回车
2、root@master]# ssh_copy_id slave
三、配置hadoop
// 六个配置文件core-site.xml、hdfs-site.xml、mapred-site.xml、yarn-site.xml、hadoop-env.sh、workers
1、[root@master]# cd $HADOOP_HOME/etc/hadoop
2、[root@master]# vi core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://node215:8020</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/data/hdfs/tmp</value>
</property>
</configuration>
3、[root@master]# vi hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
<description>设置副本数</description>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///data/hdfs/name</value>
<description>设置存放NameNode的文件路径</description>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///data/hdfs/data</value>
<description>设置存放DataNode的文件路径</description>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
<description>权限关闭</description>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:50090</value>
<description>SNN路径</description>
</property>
</configuration>
4、[root@master]# vi mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
<property>
<name>mapreduce.map.output.compress</name>
<value>true</value>
</property>
<property>
<name>mapreduce.map.output.compress.codec</name>
<value>org.apache.hadoop.io.compress.SnappyCodec</value>
</property>
</configuration>
5、[root@master]# vi yarn-site.xml
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
6、[root@master]# vi workers
slave
7、[root@master]# vi hadoop-env.sh (设置root用户为hdfs & yarn管理用户)
export JAVA_HOME=/usr/local/java/jdk1.8.0_291
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
//备注:上述环境变量缺失的情况下,集群启动报如下异常
Starting namenodes on [master]
ERROR: Attempting to operate on hdfs namenode as root
ERROR: but there is no HDFS_NAMENODE_USER defined. Aborting operation.
Starting datanodes
ERROR: Attempting to operate on hdfs datanode as root
ERROR: but there is no HDFS_DATANODE_USER defined. Aborting operation.
Starting secondary namenodes [master]
ERROR: Attempting to operate on hdfs secondarynamenode as root
ERROR: but there is no HDFS_SECONDARYNAMENODE_USER defined. Aborting operation.
8、复制master $HADOOP_HOME/etc/hadoop/* 到slave节点$HADOOP_HOME/etc/hadoop/
9、[root@master]# hdfs namenode -format //格式化hdfs文件系统
10、[root@master]# $HADOOP_HOME/sbin/start-dfs.sh && $HADOOP_HOME/sbin/start-yarn.sh
11、[root@master]# jps //检查目标进程是否启动
12、[root@master]# $HADOOP_HOME/logs/hadoop-*-namenode-*.log //检查启动日志