1、下载
wget https://mirrors.bfsu.edu.cn/apache/hadoop/common/hadoop-3.1.4/hadoop-3.1.4.tar.gz
wget https://mirrors.bfsu.edu.cn/apache/hive/hive-3.1.2/apache-hive-3.1.2-bin.tar.gz
wget https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.49/mysql-connector-java-5.1.49.jar
2、解压
tar -xvf hadoop-3.1.4.tar.gz && tar -xvf apache-hive-3.1.2-bin.tar.gz
mv hadoop-3.1.4 hadoop && mv apache-hive-3.1.2-bin hive
3、hadoop配置
hadoop/etc/hadoop 目录下
1、hadoop-env.sh 新增如下配置:
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
export HADOOP_HOME=/hadoop
export JAVA_HOME=/jdk
export HADOOP_HEAPSIZE_MAX=4096
2、yarn-env.sh 新增如下配置:
export YARN_NODEMANAGER_HEAPSIZE=4096
3、hdfs-site.xml configuration节点新增如下配置:
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file://${hadoop.tmp.dir}/dfs/name</value>
</property>
<property>
<name>dfs.datanode.name.dir</name>
<value>file://${hadoop.tmp.dir}/dfs/data</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
4、core-site.xml configuration节点新增如下配置:
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop-master:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/data/tmp/dir</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
5、mapred-site.xml configuration节点新增如下配置:
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/hadoop</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/hadoop</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/hadoop</value>
</property>
6、yarn-site.xml configuration节点新增如下配置:
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop-master</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
7、workers 为DataNode/NodeManager配置,配置如下:
hadoop-slaver1
hadoop-slaver2
4、hive配置
1、hadoop/share/hadoop/common/lib/guava-27.0-jre.jar 替换 hive/lib/guava-19.0.jar
2、hive/lib下增加mysql的jdbc驱动jar包(1、下载 已经有了)
3、复制hive-env.sh.template -> hive-env.sh,新增配置如下
export HADOOP_HEAPSIZE=4096
HADOOP_HOME=/hadoop
4、复制hive-default.xml.template -> hive-site.xml,修改配置如下
#配置使用mysql作元数据中心,修改如下节点的value
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://mysql:3306/hive?createDatabaseIfNotExist=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>hive.server2.authentication</name>
<value>NONE</value>
</property>
<property>
<name>hive.exec.dynamic.partition.mode</name>
<value>nonstrict</value>
</property>
新增配置如下
<property>
<name>system:java.io.tmpdir</name>
<value>/hive/tmp</value>
</property>
<property>
<name>system:user.name</name>
<value>hadoop</value>
</property>
5、docker-compose配置master-2slaver
1、Dockerfile配置如下(hive、hadoop、jdk8【这个自己下】、init.sh、start.sh在同一个目录)
FROM docker.io/centos:centos7
RUN yum -y install openssh-server openssh-clients expect \
&& echo "JAVA_HOME=/jdk">>/etc/profile \
&& echo "HADOOP_HOME=/hadoop">>/etc/profile \
&& echo "HIVE_HOME=/hive">>/etc/profile \
&& echo "PATH=\$PATH:\$HADOOP_HOME/bin:\$HADOOP_HOME/sbin:\$HIVE_HOME/bin:\$JAVA_HOME/bin">>/etc/profile \
&& echo "export PATH">>/etc/profile \
&& source /etc/profile \
&& echo "source /etc/profile ">>/root/.bashrc \
&& touch /log.log \
&& chmod 777 /log.log \
&& echo "success" > /log.log
COPY hadoop* /hadoop/
COPY *hive* /hive/
COPY jdk* /jdk/
COPY start.sh /start.sh
COPY init.sh /init.sh
RUN ssh-keygen -A \
&& ssh-keygen -t rsa -f ~/.ssh/id_rsa -P ""
RUN sh init.sh
ENV slaver "hadoop-slaver0,hadoop-slaver1"
ENV master "false"
CMD ["sh","-c","sh /start.sh $master $slaver & tail -f /log.log"]
2、init.sh 配置如下
#!/bin/sh
pass=hadoop@DOCKER#2020
expect<<EOF
set timeout 3
spawn passwd
expect {
"*password:*" { send "$pass\r";exp_continue }
"*password:*" { send "$pass\r";exp_continue }
}
EOF
3、start.sh 配置如下
#!/bin/sh
# hadoop-master 为主节点域名
sleep 5
echo "start">> /log.log
/usr/sbin/sshd
source /etc/profile
master=$1
slaver=$2
echo "">/hadoop/etc/hadoop/workers
#服务器IP地址数组
length=$(echo "$slaver" |wc -L)
if [ $length -gt 0 ]
then
pass=hadoop@DOCKER#2020
str=$slaver
arr=(${str//,/ })
for ip in ${arr[@]}
do
echo "$ip">>/hadoop/etc/hadoop/workers
echo "ssh-copy-id $ip" >>/log.log
expect<<EOF
set timeout 3
spawn ssh-copy-id $ip
expect {
"*yes/no*" { send "yes\r";exp_continue }
"*password*" { send "$pass\r";exp_continue }
}
EOF
done
fi
#配置master节点免密
expect<<EOF
set timeout 3
spawn ssh-copy-id hadoop-master
expect {
"*yes/no*" { send "yes\r";exp_continue }
"*password*" { send "$pass\r";exp_continue }
}
EOF
# if [ "$master" = "true" ] ;then hadoop namenode -format ; fi;
# schematool -dbType mysql -initSchema
# nohup hive --service hiveserver2 &
3、docker-compose.yml 配置如下
version: "3"
services:
hadoop-master:
image: hadoop:latest
restart: always
hostname: hadoop-master
container_name: hadoop-master
environment:
master: "true"
slaver: hadoop-slaver1,hadoop-slaver2
ports:
- 29870:9870
- 28088:8088
- 29888:19888
- 20000:10000
- 20002:10002
volumes:
- ./hadoop/master/hadoop-tmp:/hadoop/tmp
- ./hadoop/master/hive-tmp:/hive/tmp
hadoop-slaver1:
image: hadoop:latest
restart: always
hostname: hadoop-slaver1
container_name: hadoop-slaver1
environment:
master: "false"
slaver: hadoop-slaver1,hadoop-slaver2
volumes:
- ./hadoop/slaver1/hadoop-tmp:/hadoop/tmp
- ./hadoop/slaver1/hive-tmp:/hive/tmp
hadoop-slaver2:
image: hadoop:latest
restart: always
hostname: hadoop-slaver2
container_name: hadoop-slaver2
environment:
master: "false"
slaver: hadoop-slaver1,hadoop-slaver2
volumes:
- ./hadoop/slaver2/hadoop-tmp:/hadoop/tmp
- ./hadoop/slaver2/hive-tmp:/hive/tmp
6、启动
docker-compose up -d --build
登录到master容器,start.sh 最后有3行注释了,
【hadoop namenode -format】hdfs格式化
【hdfs dfs -chmod -R 777 /tmp/hadoop-yarn】权限修改(建议后续hive-jdbc连接报错后,执行相关语句)
【schematool -dbType mysql -initSchema】hive数据库初始化(有个mysql数据库依赖,具体看hive配置)
【nohup hive --service hiveserver2 &】hive启动hiveserver2(jdbc连接和webui)
验证:登录到各个容器下,输入jps,master会有NameNode、SecondaryNameNode、ResourceManager进程(如果master不作为workers的话),slaver节点会有DataNode、NodeManager。
hadoop-master:29870 namenode
hadoop-master:28088 hadoop
hadoop-master:20000 jdbc连接(无用户名密码校验)
hadoop-master:20002 webui