首先下载好Hadoop的安装包,并解压
1.配置hadoop-env.sh中的java_home
修改为自己jdk的路径
修改hadoop-env.sh中的java_home
export JAVA_HOME=/opt/jdk1.8.0_181/
2.配置core-site.xml\
<!--指定hadoop运行时产生文件的存储目录-->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop-2.7.2/data/tmp</value>
</property>
<property>
<name>fs.trash.interval</name>
<value>1</value>
</property>
<property>
<name>hadoop.http.staticuser.user</name>
<value>CrazyJack</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<!--指定自己的zk集群-->
<property>
<name>ha.zookeeper.quorum</name>
<value>jh01:2181,jh02:2181,jh03:2181</value>
</property>
3.配置hdfs-site.xml
<!--指定HDFS副本的数量-->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.namenode.checkpoint.period</name>
<value>120</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/hadoopdata/dfs/name</value>
</property>
<!--指定hdfs文件系统的数据块存放目录-->
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/hadoopdata/dfs/data</value>
</property>
<property>
<name>dfs.hosts</name>
<value>/opt/hadoop-2.7.2/etc/hadoop/dfs.hosts</value>
</property>
<property>
<name>dfs.hosts.exclude</name>
<value>/opt/hadoop-2.7.2/etc/hadoop/dfs.hosts.exclude</value>
</property>
<!--完全分布式集群名称-->
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<!--集群中NameNode节点都有哪些-->
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
</property>
<!--nn1的RPC通信地址-->
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>jh01:8020</value>
</property>
<!--nn2的RPC通信地址-->
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>jh02:8020</value>
</property>
<!--nn1的http通信地址-->
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>jh01:50070</value>
</property>
<!--nn2的http通信地址-->
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>jh02:50070</value>
</property>
<!--指定NameNode元数据在JournalNode上的存放位置-->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://jh01:8485;jh02:8485;jh03:8485/mycluster</value>
</property>
<!--配置隔离机制,即同一时刻只能有一台服务器对外响应-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<!--使用隔离机制时需要ssh无秘钥登录-->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/CrazyJack/.ssh/id_rsa</value>
</property>
<!--声明journalnode服务器存储目录-->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/opt/hadoopdata/jn</value>
</property>
<!--关闭权限检查-->
<property>
<name>dfs.permissions.enable</name>
<value>false</value>
</property>
<!--访问代理类:client,mycluster,active配置失败自动切换实现方式-->
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
4.配置 mapred-site.xml
<!--指定mr运行在yarn上-->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
5.配置yarn-site.xml
<!--开启RM高可用-->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!--指定RM的clusterid-->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yrc</value>
</property>
<!--指定RM的名字-->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!--分别指定RM的地址-->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>jh02</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>jh03</value>
</property>
<!--指定zk集群地址-->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>jh01:2181,jh02:2181,jh03:2181</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
6.配置slaves,把节点的主机名写入
jh01
jh02
jh03
发送前先配置好hosts文件,才能发送
7.远程发送到其他节点
scp -r /opt/hadoop-2.7.2 jh02:/opt/
scp -r /opt/hadoop-2.7.2 jh03:/opt/
8.启动zk服务和journalnode服务
zkServer.sh start
hadoop-daemons.sh start journalnode
9.格式化namenode 第一台机器
hdfs namenode -format
10.格式化ZKFC第一台机器
hdfs zkfc -formatZK
启动服务测试
start-dfs.sh
yarn-daemons.sh start nodemanger
咱的resourcemanager在jh02,jh03上装的,没有启动起来,可以单独启动
jh02:
yarn-daemon.sh start resourcemanager
jh03:
yarn-daemon.sh start resourcemanager