进入apache官网,查看hadoop的版本,下载tar包(这里使用的是2.7.2版本)
也可以进入国内开源镜像网站下载tar包
1.解压tar包
tar -zxvf hadoop-2.7.2.tar.gz
2.进入解压之后的hadoop文件
cd hadoop-2.7.2
3.查看文件安装路径
pwd
#我的安装路径
/opt/hadoop-2.7.2
4.配置环境变量
打开 /etc/profile
文件
vim /etc/profile
添加以下内容
#HADOOP_HOME
export HADOOP_HOME=/opt/hadoop-2.7.2
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
使配置环境生效
source /etc/profile
查看是否生效
hadoop version
要是source之后没有生效,1.可能没有配置jdk的环境变量 2.reboot虚拟机
修改配置文件,以下配置文件都在 hadoop-2.7.2/etc/hadoop下
5.进入hadoop的配置文件
cd hadoop-2.7.2/etc/hadoop
6.配置:hadoop-env.sh
修改
# The java implementation to use.
export JAVA_HOME=/usr/java/jdk1.8.0_221-amd64
# 定义一些变量
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
export HDFS_JOURNALNODE_USER=root
export HDFS_ZKFC_USER=root
7.配置core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<!-- 核心的hdfs协议访问方式 董事会
指定hdfs的nameservice为ns
-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns</value>
</property>
<!-- 所有hadoop的存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop-2.7.2/data/hadoop-${user.name}</value>
</property>
<!-- 告诉hadoop,zookeeper放哪了 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>node1:2181,node2:2181,node3:2181</value>
</property>
</configuration>
8.配置 yarn-site.xml
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!-- Site specific YARN configuration properties -->
<!-- 配置yarn -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
<!-- yarn开启ha -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!-- yarn董事会的名字 -->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>ns-yarn</value>
</property>
<!-- 董事会列表 -->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!-- hostname,webapp-->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>node1</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>node1:8088</value>
</property>
<!-- 第二台 -->
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>node2</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>node2:8088</value>
</property>
<!-- zookeeper -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>node1:2181,node2:2181,node3:2181</value>
</property>
</configuration>
9.配置hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 副本数;默认3个 -->
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<!-- 权限检查 -->
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<!-- dfs.namenode.name.dir:namenode的目录放的路径在hadoop.tmp.dir之上做了修改 file://${hadoop.tmp.dir}/dfs/name
dfs.datanode.data.dir:namenode的目录放的路径在hadoop.tmp.dir之上做了修改 file://${hadoop.tmp.dir}/dfs/data -->
<!-- 为nameservice起一个别名 董事会 -->
<property>
<name>dfs.nameservices</name>
<value>node</value>
</property>
<!-- 董事会的成员 -->
<property>
<name>dfs.ha.namenodes.node</name>
<value>nn1,nn2</value>
</property>
<!-- 配置每一个攻事会成员 每一个配置的时候得有rpc(底层),http(上层==网页) -->
<property>
<name>dfs.namenode.rpc-address.node.nn1</name>
<value>node1:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.node.nn1</name>
<value>node1:50070</value>
</property>
<!-- 第二个成员 -->
<property>
<name>dfs.namenode.rpc-address.node.nn2</name>
<value>node2:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.jh.nn2</name>
<value>node2:50070</value>
</property>
<!-- journalnode:负责hadoop与zk进行沟通 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://node2:8485;node3:8485;node4:8485/node</value>
</property>
<!-- 哪个类决定了自动切换 哪个namenode是活着的(active) -->
<property>
<name>dfs.client.failover.proxy.provider.node</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- journal的存储位置 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/opt/hadoop-2.7.2/data/journal/</value>
</property>
<!-- 大哥挂了,自动切换到二哥上 启动故障转移 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- (ssh免密码登录) -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
</configuration>
10.配置mapred-site.xml
cp mapred-site.xml.template mapred-site.xml
vim mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- yarn -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- ctrl+shift+/ -->
<!-- <property>
<name>mapreduce.application.classpath</name>
<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
</property>
-->
<!-- 一旦启动了yarn,建议换成必须设置最大内存 -->
<property>
<name>mapreduce.map.memory.mb</name>
<value>200</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx200M</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>200</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx200M</value>
</property>
</configuration>
11.配置slaves
内容如下
#群起的服务器
node1
node2
node3
node4
注意:
以上步骤做完之后
rsync -r hadoop-2.7.2到各个节点,我这里是node2、node3、node4
12.启动zookeeper
bin/zkServer.sh start
13.启动journalnode
#在node2,node3,node4上启动
sbin/hadoop-daemon.sh start journalnode
14.格式化namenode
#在node1上格式化
bin/hdfs namenode -format
15.把刚才格式化后的元数据拷贝到另外一个namenode上
# 一定要进入到/opt/hadoop-2.7.2/data中
scp -r hadoop-root/ node2:`pwd`
16.启动namenode
sbin/hadoop-daemon.sh start namenode
bin/hdfs --daemon start namenode
17.在没有格式化的namenode上执行:
#在node2上执行
bin/hdfs namenode -bootstrapStandby
18.启动第二个namenode
sbin/hadoop-daemon.sh start namenode
bin/hdfs --daemon start namenode
19.在其中一个节点上初始化zkfc(一定要启动zookeeper)
bin/hdfs zkfc -formatZK
20.重新启动hdfs,yarn
#关闭hdfs,yarn
sbin/stop-dfs.sh
sbin/stop-yarn.sh
#启动hdfs,yarn
sbin/start-dfs.sh
sbin/start-yarn.sh
#启动所有
sbin/start-all.sh
#关闭所有
sbin/stop-all.sh
要是第二个resourcemanager没有启动,可以直接启动
sbin/start-yarn.sh
21.配置好之后的web端效果
hdfs的web端口号50070
yarn的web端口号8088
------安--装--成--功---------