- 使用xshell+xftp连接虚拟机
- 创建文件夹h3cu
- mkdir h3cu
- 使用xftp上传Hadoop、Spark、Zookeeper
- 解压Hadoop、Spark、Zookeeper、JDK
- tar zxf /h3cu/hadoop-2.6.0.tar.gz -C /usr/local/src
- tar zxf /h3cu/spark-2.0.2-bin-hadoop2.6.tgz -C /usr/local/src
- tar zxf /h3cu/jdk-8u231-linux-x64.tar.gz -C /usr/local/src
- tar zxf /h3cu/zookeeper-3.4.5.tar.gz -C /usr/local/src
- 重命名Hadoop、Spark、Zookeeper、JDK
- cd /usr/local/src
- mv hadoop-2.6.0/ hadoop
- mv spark-2.0.2-bin-hadoop2.6/ spark
- mv jdk1.8.0_231/ jdk
- mv zookeeper-3.4.5/ zookeeper
- 配置环境变量
- vi ~/.bash_profile
- export HADOOP_HOME=/usr/local/src/hadoop
- export SPARK_HOME=/usr/local/src/spark
- export JAVA_HOME=/usr/local/src/jdk
- export ZOOKEEPER_HOME=/usr/local/src/zookeeper
- export PATH=$HADOOP_HOME/bin:$PATH
- export PATH=$HADOOP_HOME/sbin:$PATH
- export PATH=$SPARK_HOME/bin:$PATH
- export PATH=$JAVA_HOME/bin:$PATH
- export PATH=$ZOOKEEPER_HOME/bin:$PATH
- 验证环境变量
- source ~/.bash_profile
- hdfs
- zkServer.sh
- java -version
- hadoop
- 注意:按Tab自动补全,没有补全表示没有配置成功
- vi ~/.bash_profile
- 配置Zookeeper
- cd /usr/local/src/zookeeper/conf/
- cp zoo_sample.cfg zoo.cfg
- vi zoo.cfg
- 修改成自己的zookeeper路径
- dataDir=/usr/local/src/zookeeper/data
- 最后一行添加
- server.1=master:2888:3888
- server.2=slave1:2888:3888
- server.3=slave2:2888:3888
- 配置spark-env.sh
- cd /usr/local/src/spark/conf/
- cp spark-env.sh.template spark-env.sh
- vi spark-env.sh
- #!/usr/bin/env bash 下添加
- export JAVA_HOME=/usr/local/src/jdk
- export SPARK_HOME=/usr/local/src/spark
- export HADOOP_CONF_DIR=/usr/local/src/hadoop/etc/hadoop
- 配置slaves
- cp slaves.template slaves
- vi slaves
- 在最后一行添加主机名称
- master
- slave1
- slave2
- 配置hadoop-env.sh
- cd /usr/local/src/hadoop/etc/hadoop/
- vi hadoop-env.sh
- 修改jdk路径
- export JAVA_HOME=/usr/local/src/jdk
- 配置core-site.xml
- cd /usr/local/src/hadoop/etc/hadoop/
- vi core-site.xml
- !-- 指定hdfs的nameservice为ns--
- <property>
- <name>fs.defaultFS</name>
- <value>hdfs://ns</value>
- </property>
- !--指定hadoop数据临时存放目录--
- <property>
- <name>hadoop.tmp.dir</name>
- <value>/usr/local/src/hadoop/tmp</value>
- </property>
- !--指定zookeeper地址--
- <property>
- <name>ha.zookeeper.quorum</name>
- <value>master:2181,slave1:2181,slave2:2181</value>
- </property>
- 配置hdfs-site.xml
- 配置较多放下面文档里了(请下载后打开)
- 配置mapred-site.xml
- 配置yarn-site.xml
- 配置slaves
- hadoop里面需要配置六个文件。
- 复制Hadoop、Spark、Zookeeper、JDK、.bash_profile
- 将文件传输至另外两台虚拟机
- cd /usr/local/src/
- scp -r spark/ hadoop/ zookeeper/ jdk/ slave1:/usr/local/src/
- scp -r spark/ hadoop/ zookeeper/ jdk/ slave2:/usr/local/src/
- 传输变量
- scp ~/.bash_profile slave1:~/
- scp ~/.bash_profile slave2:~/
- 将文件传输至另外两台虚拟机
- 配置Zookeeper的myid
- 在zookeeper文件夹下创建data文件夹
- mkdir zookeeper/data
- 在master写入id为1的文件
- echo 1 > zookeeper/data/myid
- 另外两台同上/写入文件分别为slave1 = 2 slave2 = 3
- ssh slave1
- cd /usr/local/src/
- mkdir zookeeper/data
- echo 2 > zookeeper/data/myid
- ------------------------------
- ssh slave2
- cd /usr/local/src/
- mkdir zookeeper/data
- echo 3 > zookeeper/data/myid
- 回到主机master
- 三台机子分别启动zookeeper
- zkServer.sh start
- zkServer.sh status 查看状态三台
- 在zookeeper文件夹下创建data文件夹
- 启动所有的journalnode
- hdfs namenode -format
- hadoop-daemon.sh start namenode
- ssh slave1
- hdfs namenode -bootstrapStandby
- hadoop-daemon.sh start namenode
- ssh master
- 格式化并启动两个namenode
- master下
- hdfs namenode -format
- hadoop-daemon.sh start namenode
- slave1下
- hdfs namenode -bootstrapStandby
- hadoop-daemon.sh start namenode
- master下
- 格式化ZKFC(在主NameNode-master主机)
- hdfs zkfc -formatZK
- 启动集群的HDFS和YARN(在主NameNode),并查看jps进程
- start-dfs.sh
- start-yarn.sh
- jps
- 2449 DFSZKFailoverController
- 3041 NodeManager
- 1911 NameNode
- 2952 ResourceManager
- 3064 Jps
- 1289 QuorumPeerMain
- 1743 JournalNode
- 2159 DataNode
- 一共存在八个节点
- 在slave1节点上启动备用resourcemanager,并查看jps进程
- ssh slave1
- yarn-daemon.sh start resourcemanager
- jps 节点同上
- 打开主、备namenode网页
- ip:50070
- 打开主、备resourcemanager网页
- ip:8088
- 1. 终止active的namenode进程,并查看jps进程
- hadoop-daemon.sh start namenode
- namenode切到备节点,发现已切换为active
- 重启刚才终止的namenode,并查看jps进程