1、安装virtualBox,安装好ubuntu操作系统,用户名统一hadoop;hostname分别为ubuntu1(master节点)、ubuntu2(slave节点)、ubuntu3(slave节点),然后选择使用net网络,更新apt源 sudo apt update
2、安装jdk
sudo apt-get install openjdk-8-jdk
验证jdk是否安装成功
java -version
3、关闭防火墙
sudo ufw disabled
4、在三台主机hosts分别添加如下解析内容,ip地址填写主机分配到的实际ip,可用ifconfig查看
10.0.2.15 ubuntu1
10.0.2.4 ubuntu2
10.0.2.5 ubuntu3
5、三台主机分别使用hadoop用户登录设置三台主机免密登录,分别执行
ssh-keygen -t rsa
复制两台slave的公钥到master上
$scp hadoop@ubuntu2:~/.ssh/id_rsa.pub ~/.ssh/id_rsa.pub.2
$scp hadoop@ubuntu3:~/.ssh/id_rsa.pub ~/.ssh/id_rsa.pub.3
合并公钥到authorized_keys
$cat ~/.ssh/id_rsa.pub ~/.ssh/id_rsa.pub.2 ~/.ssh/id_rsa.pub.3 >> authorized_keys
# 发布authorized_keys到salve服务器
$scp ~/.ssh/authorized_keys hadoop@ubuntu2:~/.ssh/
$scp ~/.ssh/authorized_keys hadoop@ubuntu3:~/.ssh/
# 测试免密登录
$ssh hadoop@ubuntu2
$ssh hadoop@ubuntu3
6、主节点创建软件包目录,代码目录,数据目录
mkdir -p ~/hadoop/data
mkdir ~/hadoop/name
mkdir ~/hadoop/tmp
7、安装hadoop
$tar -xzvf hadoop-*.tar.gz
$cd hadoop-*
$vi /home/hadoop/hadoop-*/etc/hadoop/hadoop-env.sh
修改 export JAVA_HOME={JAVA_HOME} 为 export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64,java_home 可用ls -lrt /etc/alternatives/java 查看
修改核心组件core-site.xml
$vi /home/hadoop/hadoop-*/etc/hadoop/core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/home/hadoop/hadoop/tmp</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://ubuntu1:9000</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
</configuration>
配置hdfs-site.xml
$vi /home/hadoop/hadoop-*/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>ubuntu1:9001</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/home/hadoop/hadoop/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/home/hadoop/hadoop/dfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.web.ugi</name>
<value>supergroup</value>
</property>
</configuration>
配置yarn-site.xml
$vi /home/hadoop/hadoop-*/etc/hadoop/hadoop/yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>ubuntu1:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>ubuntu1:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>ubuntu1:8035</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>ubuntu1:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>ubuntu1:8088</value>
</property>
</configuration>
配置mapred-site.xml
$vi /home/hadoop/hadoop-*/etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
</configuration>
配置workers
$vi workers
hadoop@ubuntu1
hadoop@ubuntu2
hadoop@ubuntu3
8、发布hadoop到slave
$scp -r ~/hadoop-* hadoop@ubuntu2:~/
$scp -r ~/hadoop-* hadoop@ubuntu3:~/
9、添加配置项
$sudo vi /etc/profile
export HADOOP_HOME=$HOME/hadoop-3.2.1
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS=-Djava.library.path=$HADOOP_HOME/lib
$source /etc/profile
10、三台集群分别执行格式化
$hdfs namenode -format
11、启动
$start-all.sh
12、查看进程
$jps
13、查看webUI ,直接浏览器打开http://ubuntu1:9870(3.*版本),由于使用的是虚拟机net模式,所以需要在虚拟机管理->全局设定->网络->选择使用的网络名称->编辑->添加端口转发->填写本地ip和端口,虚拟机的ip和端口完成转发,即可使用宿主机的ip开webUI管理页面
14、测试Yarn是否正常:打开 http://ubuntu1:8088
15、验证集群
$hadoop jar /home/hadoop/hadoop-*/share/hadoop/mapreduce/hadoopmapreduce-examples-*.jar pi 10 10
成功的话可以看见PI的预估值 Estimated value of PI is 3.20000000