单机基于docker搭建hadoop2.7.1 spark1.7 高可用集群

19 篇文章 0 订阅
10 篇文章 0 订阅

获取ubuntu镜像

sudo docker pull ubuntu

分别下载 spark1.7 hadoop2.7.1 scala1.1 zookeeper3.4.6 jdk1.8 解压后放置本地文件夹用于挂载到容器中

并在文件夹下创建文件
authorized_keys
hosts

本例目录使用/home/docker/config

启动容器

sudo docker run --name installspark -v /home/docker/config/:/config -it ubuntu:14.04


安装

启动后在容器内/config文件夹下能够看到放置的安装文件

安装jdk,scala :

vim ~/.bashrc
追加:
/usr/sbin/sshd
cat /config/hosts > /etc/hosts
cat /config/authorized_keys > /root/.ssh/authorized_keys
export JAVA_HOME=/usr/lib/jvm/java-8-sun
export PATH=${JAVA_HOME}/bin:$PATH
export HADOOP_HOME=/opt/hadoop
export PATH=${HADOOP_HOME}/bin:$PATH
export SCALA_HOME=/opt/scala 
export PATH=${SCALA_HOME}/bin:$PATH
export SPARK_HOME=/opt/spark
export PATH=${SPARK_HOME}/bin:$PATH

拷贝 spark/hadoop/zookeeper到/opt下


安装hadoop:

创建文件夹:/opt/hadoop/namenode /opt/hadoop/datanode /opt/hadoop/tmp /opt/hadoop/journal
root@nn1:/opt/hadoop/etc/hadoop# vim hadoop-env.sh
修改:
export JAVA_HOME=/usr/lib/jvm/java-8-sun
root@nn1:/opt/hadoop/etc/hadoop# vim core-site.xml 
添加:
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns1</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop/tmp</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>dnzk1:2181,dnzk2:2181,dnzk3:2181</value>
</property>

root@nn1:/opt/hadoop/etc/hadoop#vim hdfs-site.xml
添加:
<property>
 <name>dfs.datanode.data.dir</name>
 <value>file:/opt/hadoop/datanode</value>
 </property>
 <property>
 <name>dfs.namenode.name.dir</name>
 <value>file:/opt/hadoop/namenode</value>
 </property>
<property>
<name>dfs.nameservices</name>
<value>ns1</value>
</property>
<property>
<name>dfs.ha.namenodes.ns1</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>nn1:9000</value>
</property>
<property>
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>nn1:50070</value>
</property>
<property>
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>nn2:9000</value>
</property>
<property>
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>nn2:50070</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://dnzk1:8485;dnzk2:8485;dnzk3:8485/ns1</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/opt/hadoop/journal</value>
</property>
<property>
        <name>dfs.journalnode.http-address</name>
        <value>0.0.0.0:8480</value>
</property>
<property>
        <name>dfs.journalnode.rpc-address</name>
        <value>0.0.0.0:8485</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>
        sshfence
        shell(/bin/true)
</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<property>
        <name>dfs.permissions</name>
        <value>false</value>
</property>


vim mapred-site.xml
添加:

<property>
	<name>mapreduce.framework.name</name>
	<value>yarn</value>
</property>

root@nn1:/opt/hadoop# vim /opt/hadoop/etc/hadoop/yarn-site.xml

添加:

<property>
                <name>yarn.resourcemanager.ha.enabled</name>
                <value>true</value>
        </property>
        <property>
                <name>yarn.resourcemanager.cluster-id</name>
                <value>RM_HA_ID</value>
        </property>
        <property>
                <name>yarn.resourcemanager.ha.rm-ids</name>
                <value>rm1,rm2</value>
        </property>
        <property>
                <name>yarn.resourcemanager.hostname.rm1</name>
                <value>rm1</value>
        </property>
        <property>
                <name>yarn.resourcemanager.hostname.rm2</name>
                <value>rm2</value>
        </property>
        <property>
                <name>yarn.resourcemanager.recovery.enabled</name>
                <value>true</value>
        </property><pre name="code" class="html">        <property>
<name>yarn.resourcemanager.store.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value> </property> <property> <name>yarn.resourcemanager.zk-address</name> <value>dnzk1:2181,dnzk2:2181,dnzk3:2181</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property>
 
 

root@nn1:/opt/hadoop# vim /opt/hadoop/etc/hadoop/slaves

添加:

dnzk1
dnzk2
dnzk3


安装spark

root@nn1:/opt/spark/conf# vim spark-env.sh
添加:
export SPARK_MASTER_IP=nn1   
export SPARK_WORKER_MEMORY=256m
export JAVA_HOME=/usr/lib/jvm/java-8-sun
export SCALA_HOME=/opt/scala
export SPARK_HOME=/opt/spark
export HADOOP_CONF_DIR=/opt/hadoop/etc/hadoop
export SPARK_LIBRARY_PATH=$$SPARK_HOME/lib
export SCALA_LIBRARY_PATH=$SPARK_LIBRARY_PATH
export SPARK_WORKER_CORES=1
export SPARK_WORKER_INSTANCES=1
export SPARK_MASTER_PORT=7077

root@nn1:/opt/spark/conf# vim slaves

添加:

nn1
nn2
rm1
rm2
dnzk1
dnzk2
dnzk3

安装zookeeper

创建文件夹 /opt/zookeeper/tmp 
创建文件 /opt/zookeeper/tmp/myid
echo 1 > /opt/zookeeper/tmp/myid
root@nn1:/opt/zookeeper/conf# vim zoo.cfg
修改

dataDir=/opt/zookeeper/tmp
server.1=dnzk1:2888:3888
server.2=dnzk2:2888:3888
server.3=dnzk3:2888:3888


生成密钥

ssh-keygen -t dsa

追加id_dsa.pub到宿主机的/home/docker/config/authorized_keys文件
root@nn1:/opt/hadoop# cat ~/.ssh/id_dsa.pub

执行

sudo docker commit -m "namenode1" installspark ubuntu:ns1



修改本地宿主机/home/docker/config/hosts文件
添加

172.17.0.11	nn1
172.17.0.12     nn2
172.17.0.13     rm1
172.17.0.14     rm2
172.17.0.15     dnzk1
172.17.0.16     dnzk2
172.17.0.17     dnzk3


启动docker

sudo docker run --name dnzk1 -h dnzk1 --net=none -p 2185:2181 -p 50075:50070 -p 9005:9000 -p 8485:8485 -p 7075:7077 -p 2885:2888 -v /home/docker/config/:/config -it spark1_7-hadoop2_7_1-scala1_1:basic
sudo docker run --name dnzk2 -h dnzk2 --net=none -p 2186:2181 -p 50076:50070 -p 9006:9000 -p 8486:8485 -p 7076:7077 -p 2886:2888 -v /home/docker/config/:/config -it spark1_7-hadoop2_7_1-scala1_1:basic
sudo docker run --name dnzk3 -h dnzk3 --net=none -p 2186:2181 -p 50076:50070 -p 9006:9000 -p 8486:8485 -p 7076:7077 -p 2887:2888 -v /home/docker/config/:/config -it spark1_7-hadoop2_7_1-scala1_1:basic
sudo docker run --name nn1 -h nn1 --net=none -p 2181:2181 -p 50071:50070 -p 9001:9000 -p 8481:8485 -p 7071:7077 -p 2881:2888 -v /home/docker/config/:/config -it spark1_7-hadoop2_7_1-scala1_1:basic
sudo docker run --name nn2 -h nn2 --net=none -p 2182:2181 -p 50072:50070 -p 9002:9000 -p 8482:8485 -p 7072:7077 -p 2882:2888 -v /home/docker/config/:/config -it spark1_7-hadoop2_7_1-scala1_1:basic
sudo docker run --name rm1 -h rm1 --net=none -p 2183:2181 -p 50073:50070 -p 9003:9000 -p 8483:8485 -p 7073:7077 -p 2883:2888 -v /home/docker/config/:/config -it spark1_7-hadoop2_7_1-scala1_1:basic
sudo docker run --name rm2 -h rm2 --net=none -p 2184:2181 -p 50074:50070 -p 9004:9000 -p 8484:8485 -p 7074:7077 -p 2884:2888 -v /home/docker/config/:/config -it spark1_7-hadoop2_7_1-scala1_1:basic
dnzk2(执行echo 2 > /opt/zookeeper/tmp/myid),dnzk2(执行echo 3 > /opt/zookeeper/tmp/myid)

配置网络

sudo pipework docker0 -i eth0 nn1 172.17.0.11/16
sudo pipework docker0 -i eth0 nn2 172.17.0.12/16
sudo pipework docker0 -i eth0 rm1 172.17.0.13/16
sudo pipework docker0 -i eth0 rm2 172.17.0.14/16
sudo pipework docker0 -i eth0 dnzk1 172.17.0.15/16
sudo pipework docker0 -i eth0 dnzk2 172.17.0.16/16
sudo pipework docker0 -i eth0 dnzk3 172.17.0.17/16



启动hadoop集群

在dnzk1/dnzk2/dnzk3上启动zookeeper和 hadoop journal
/opt/zookeeper/bin/zkServer.sh start
/opt/hadoop/sbin/hadoop-daemon.sh start journalnode

在nn1上格式化zookeeper启动和format hadoop
/opt/hadoop/bin/hdfs namenode -format
scp -r /opt/hadoop/namenode/ nn2:/opt/hadoop/
/opt/hadoop/bin/hdfs namenode -bootstrapStandby
 
/opt/hadoop/bin/hdfs zkfc -formatZK
 
/opt/hadoop/sbin/start-dfs.sh
 
 
 在rm1上启动yarn 
/opt/hadoop/sbin/start-yarn.sh
 在rm2上启动 
/opt/hadoop/sbin/yarn-daemon.sh start resourcemanager
 

启动spark

/opt/spark/sbin/start-all.sh




查看:
http://172.17.0.11:50070 (active)

http://172.17.0.12:50070(standby)


启动后集群服务情况

nn1        172.17.0.11        jdk、hadoop                   NameNode、DFSZKFailoverController(zkfc)

nn2        172.17.0.12        jdk、hadoop                   NameNode、DFSZKFailoverController(zkfc)

rm1        172.17.0.13        jdk、hadoop                   ResourceManager

rm2        172.17.0.14        jdk、hadoop                   ResourceManager

dnzk1      172.17.0.15        jdk、hadoop、zookeeper        DataNode、NodeManager、JournalNode、QuorumPeerMain

dnzk2      172.17.0.16        jdk、hadoop、zookeeper        DataNode、NodeManager、JournalNode、QuorumPeerMain

dnzk3      172.17.0.17        jdk、hadoop、zookeeper        DataNode、NodeManager、JournalNode、QuorumPeerMain


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值