一、虚拟机设置
1. 搭建虚拟机
2. 确定hostname (master, slave1 salve2)
3. 设置网络
4. 设置hosts
vi /etc/hosts
10.211.55.10 master
10.211.55.9 slave1
10.211.55.8 slave2
5. 关闭防火墙
service iptables stop
chkconfig iptables off
6. 关闭selinux
vi /etc/selinux/config
7. 配置ssh免密登录(使用hadoop用户wachoo)
ssh-keygen -t rsa
ssh-copy-id wachoo@master
ssh-copy-id wachoo@slave1
ssh-copy-id wachoo@slave2
二、JDK1.8安装
1. 下载安装包
scp software/jdk-8u121-linux-x64.rpm wachoo@master:/home/wachoo/software
2. 安装
sudo rpm -ivh jdk-8u121-linux-x64.rpm
3. 设置环境变量
sudo vi ~/.bashrc
export JAVA_HOME=/usr/java/jdk1.8.0_121
export PATH=$PATH:$JAVA_HOME/bin
三、安装Hadoop
1. 下载、解压安装包
Index of /dist/hadoop/core/hadoop-3.1.1
sudo wget https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/core/hadoop-3.1.3/hadoop-3.1.3.tar.gz
tar -xvf hadoop-3.1.3.tar.gz -C /usr/hadoop/
2. 配置hadoop环境变量
vi /etc/profile
#在配置文件最后一行添加如下配置
# HADOOP_HOME
export HADOOP_HOME=/usr/hadoop/hadoop-3.1.3
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
# 运行如下命令刷新环境变量
source /etc/profile
# 进行测试是否成功
hadoop version
3. 修改配置文件
#在/usr/hadoop目录下创建目录
mkdir /usr/hadoop/tmp /usr/hadoop/mr
mkdir -p /usr/hadoop/hdfs/namenode /usr/hadoop/hdfs/datanode
mkdir -p /usr/hadoop/yarn/nodemanager /usr/hadoop/yarn/logs
cd $HADOOP_HOME/etc/hadoop
# 修改文件如下:
# workers hadoop-env.sh
# core-site.xml hdfs-site.xml yarn-site.xml mapred-site.xml
#1. 删除localhost,添加从节点主机名,例如:
vi workers
slave1
slave2
#2. 在 #JAVA_HOME=/usr/java/testing hdfs dfs -ls一行下面添加如下代码
vi hadoop-env.sh
export JAVA_HOME=/usr/java/jdk1.8.0_121
export HADOOP_HOME=/usr/hadoop/hadoop-3.1.3
export HDFS_NAMENODE_USER=wachoo
export HDFS_DATANODE_USER=wachoo
export HDFS_SECONDARYNAMENODE_USER=wachoo
export YARN_RESOURCEMANAGER_USER=wachoo
export YARN_NODEMANAGER_USER=wachoo
#3.修改core-site.xml
vi core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
<description>namenode节点地址与端口</description>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/hadoop/tmp</value>
<description>临时文件存储目录</description>
</property>
</configuration>
#4.修改hdfs-site.xml
vi hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/hadoop/hdfs/namenode</value>
<description>
Path on the local filesystem where theNameNode stores the namespace and transactions logs persistently.
</description>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/hadoop/hdfs/datanode</value>
<description>
Comma separated list of paths on the localfilesystem of a DataNode where it should store itsblocks.
</description>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:9001</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
<description>need not permissions</description>
</property>
</configuration>
#5.修改yarn-site.xml
vi yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>512</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>512</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>file:/usr/hadoop/yarn/nodemanager</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>file:/usr/hadoop/yarn/logs</value>
</property>
</configuration>
#6.修改mapred-site.xml
vi mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>512</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx480M</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>512</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx480M</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/usr/hadoop/hadoop-3.1.3</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/usr/hadoop/hadoop-3.1.3</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/usr/hadoop/hadoop-3.1.3</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>512</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>512</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>
/usr/hadoop/hadoop-3.1.3/etc/hadoop,
/usr/hadoop/hadoop-3.1.3/share/hadoop/common/*,
/usr/hadoop/hadoop-3.1.3/share/hadoop/common/lib/*,
/usr/hadoop/hadoop-3.1.3/share/hadoop/hdfs/*,
/usr/hadoop/hadoop-3.1.3/share/hadoop/hdfs/lib/*,
/usr/hadoop/hadoop-3.1.3/share/hadoop/mapreduce/*,
/usr/hadoop/hadoop-3.1.3/share/hadoop/mapreduce/lib/*,
/usr/hadoop/hadoop-3.1.3/share/hadoop/yarn/*,
/usr/hadoop/hadoop-3.1.3/share/hadoop/yarn/lib/*
</value>
<description>设置MapReduce资源调度类路径,如果不设置可能会报错</description>
</property>
</configuration>
4. 启动(master)
#1. 格式化
hdfs namenode -format
# cd $HADOOP_HOME/sbin
#2. 启动HDFS
start-dfs.sh
#3. 启动YARN
start-yarn.sh
# 查看进程
jps
5. 验证
hdfs fds -ls /
cd /hadoop-3.1.3/share/hadoop/mapreduce
hadoop jar hadoop-mapreduce-examples-3.1.3.jar pi 5 12
6. 重启清除
# cd $HADOOP_HOME/sbin
#2. 启动HDFS
stop-all.sh
rm -rf /usr/hadoop/tmp/*
hadoop namenode -format
start-all.sh