概要
目前程序员的业务普遍要结合国产化,可能是互联网大厂不景气下ToB的一个业务方向,我所在的公司普遍要求Arm架构Kylin系统的机器上实现国产化
本问介绍国产化Kylin V10上搭建hadoop集群的步骤
最终实现在k8s 集群上
hadoop-hadoop-yarn-rm 1 节点
hadoop-hadoop-yarn-proxyserver 1 节点
hadoop-hadoop-yarn-nm 3节点
hadoop-hadoop-hdfs-nn 1节点
hadoop-hadoop-hdfs-dn 3节点
整体架构流程
uname -a 查看我使用的机器系统
Linux node42 4.19.90-52.36.v2207.ky10.aarch64 #4 SMP Thu Mar 14 22:54:32 CST 2024 aarch64 aarch64 aarch64 GNU/Linux
参考开源项目里定义的启动bootstrap.sh
#!/usr/bin/env sh
source /etc/profile
wait_for() {
if [ -n "$1" ] && [ -n "$2" ]; then
echo "Waiting for $1 to listen on $2..."
while ! nc -z $1 $2; do
echo "waiting..."
echo "Host: $1, Port: $2"
sleep 1s
done
else
echo "Error: Both host and port must be provided."
fi
}
start_hdfs_namenode() {
namenode_dir=`grep -A1 'dfs.namenode.name.dir' ${HADOOP_HOME}/etc/hadoop/hdfs-site.xml |tail -1|sed 's/<value>//'|sed 's/<\/value>//'`
if [ ! -d ${namenode_dir}/current ];then
${HADOOP_HOME}/bin/hdfs namenode -format
fi
${HADOOP_HOME}/bin/hdfs --loglevel INFO --daemon start namenode
tail -f ${HADOOP_HOME}/logs/*namenode*.log
}
start_hdfs_datanode() {
wait_for $1 $2
${HADOOP_HOME}/bin/hdfs --loglevel INFO --daemon start datanode
tail -f ${HADOOP_HOME}/logs/*datanode*.log
}
start_yarn_resourcemanager() {
${HADOOP_HOME}/bin/yarn --loglevel INFO --daemon start resourcemanager
tail -f ${HADOOP_HOME}/logs/*resourcemanager*.log
}
start_yarn_nodemanager() {
wait_for $1 $2
${HADOOP_HOME}/bin/yarn --loglevel INFO --daemon start nodemanager
tail -f ${HADOOP_HOME}/logs/*nodemanager*.log
}
start_yarn_proxyserver() {
wait_for $1 $2
${HADOOP_HOME}/bin/yarn --loglevel INFO --daemon start proxyserver
tail -f ${HADOOP_HOME}/logs/*proxyserver*.log
}
start_mr_historyserver() {
wait_for $1 $2
${HADOOP_HOME}/bin/mapred --loglevel INFO --daemon start historyserver
tail -f ${HADOOP_HOME}/logs/*historyserver*.log
}
start_hive_metastore() {
if [ ! -f ${HIVE_HOME}/formated ];then
schematool -initSchema -dbType mysql --verbose > ${HIVE_HOME}/formated
fi
$HIVE_HOME/bin/hive --service metastore
}
start_hive_hiveserver2() {
$HIVE_HOME/bin/hive --service hiveserver2
}
case $1 in
hadoop-hdfs-nn)
start_hdfs_namenode $2 $3
;;
hadoop-hdfs-dn)
start_hdfs_datanode $2 $3
;;
hadoop-yarn-rm)
start_yarn_resourcemanager $2 $3
;;
hadoop-yarn-nm)
start_yarn_nodemanager $2 $3
;;
hadoop-yarn-proxyserver)
start_yarn_proxyserver $2 $3
;;
hadoop-mr-historyserver)
start_mr_historyserver $2 $3
;;
hive-metastore)
start_hive_metastore $2 $3
;;
hive-hiveserver2)
start_hive_hiveserver2 $2 $3
;;
*)
echo "请输入正确的服务启动命令~"
;;
技术细节
构建arm镜像需要的基本组件(自己百度下载arm版hadoop,jdk, hive 和spark, flink)
FROM centos:centos7.9.2009
RUN rm -f /etc/localtime && ln -sv /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo "Asia/Shanghai" > /etc/timezone
USER root
RUN export LANG=zh_CN.UTF-8
# 创建用户和用户组,跟yaml编排里的user: 10000:10000
RUN groupadd --system --gid=10000 hadoop && useradd --system --home-dir /home/hadoop --uid=10000 --gid=hadoop hadoop
# 安装sudo
#RUN mkdir -p /etc/sudoers ; yum -y install sudo ; chmod 640 /etc/sudoers
# 给hadoop添加sudo权限
RUN echo "hadoop ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
RUN yum -y install net-tools telnet wget nc expect which bind-utils
#RUN mkdir /opt/apache/
RUN mkdir -p /opt/apache/hadoop/logs
# 安装 JDK
ADD jdk-8u421-linux-aarch64.tar.gz /opt/apache/
ENV JAVA_HOME /opt/apache/jdk1.8.0_421
ENV PATH $JAVA_HOME/bin:$PATH
# 配置 Hadoop
ENV HADOOP_VERSION 3.3.6
ADD hadoop-${HADOOP_VERSION}.tar.gz /opt/apache/
ENV HADOOP_HOME /opt/apache/hadoop-${HADOOP_VERSION}
RUN ln -s $HADOOP_HOME /opt/apache/hadoop
ENV HADOOP_COMMON_HOME=${HADOOP_HOME} \
HADOOP_HDFS_HOME=${HADOOP_HOME} \
HADOOP_MAPRED_HOME=${HADOOP_HOME} \
HADOOP_YARN_HOME=${HADOOP_HOME} \
HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop \
PATH=${PATH}:${HADOOP_HOME}/bin
# 配置Hive
ENV HIVE_VERSION 3.1.3
ADD apache-hive-${HIVE_VERSION}-bin.tar.gz /opt/apache/
ENV HIVE_HOME=/opt/apache/hive
ENV PATH=$HIVE_HOME/bin:$PATH
RUN ln -s /opt/apache/apache-hive-${HIVE_VERSION}-bin ${HIVE_HOME}
# 配置spark
ENV SPARK_VERSION 3.3.2
ADD spark-${SPARK_VERSION}-bin-hadoop3.tgz /opt/apache/
ENV SPARK_HOME=/opt/apache/spark
ENV PATH=$SPARK_HOME/bin:$PATH
RUN ln -s /opt/apache/spark-${SPARK_VERSION}-bin-hadoop3 ${SPARK_HOME}
# 配置 flink
ENV FLINK_VERSION 1.17.0
ADD flink-${FLINK_VERSION}-bin-scala_2.12.tgz /opt/apache/
ENV FLINK_HOME=/opt/apache/flink
ENV PATH=$FLINK_HOME/bin:$PATH
RUN ln -s /opt/apache/flink-${FLINK_VERSION} ${FLINK_HOME}
# 创建namenode、datanode存储目录
RUN mkdir -p /opt/apache/hadoop/data/{hdfs,yarn} /opt/apache/hadoop/data/hdfs/namenode /opt/apache/hadoop/data/hdfs/datanode/data{1..3} /opt/apache/hadoop/data/yarn/{local-dirs,log-dirs,apps}
COPY bootstrap.sh /opt/apache/
COPY config/* ${HADOOP_HOME}/etc/hadoop/
# hive config
COPY hive-config/* ${HIVE_HOME}/conf/
COPY mysql-connector-java-5.1.49/mysql-connector-java-5.1.49-bin.jar ${HIVE_HOME}/lib/
RUN sudo mkdir -p /home/hadoop/ && sudo chown -R hadoop:hadoop /home/hadoop/
#RUN yum -y install which
ENV ll "ls -l"
RUN chown -R hadoop:hadoop /opt/apache
WORKDIR /opt/apache
构建镜像
docker build -t hadoop_hive:arm .
这里直接给出hadoop的部署的yaml文件
---
# Source: hadoop/templates/hdfs/hdfs-dn-pdb.yaml
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: hadoop-hadoop-hdfs-dn
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hdfs-dn
spec:
selector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hdfs-dn
minAvailable: 1
---
# Source: hadoop/templates/hdfs/hdfs-nn-pdb.yaml
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: hadoop-hadoop-hdfs-nn
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hdfs-nn
spec:
selector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hdfs-nn
minAvailable: 1
---
# Source: hadoop/templates/hive/hive-hiveserver2-pdb.yaml
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: hadoop-hadoop-hive-hiveserver2
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hive-hiveserver2
spec:
selector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hive-hiveserver2
minAvailable: 1
---
# Source: hadoop/templates/hive/hive-metastore-pdb.yaml
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: hadoop-hadoop-hive-metastore
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hive-metastore
spec:
selector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hive-metastore
minAvailable: 1
---
# Source: hadoop/templates/mr-historyserver/mr-historyserver-pdb.yaml
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: hadoop-hadoop-mr-historyserver
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: mr-historyserver
spec:
selector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: mr-historyserver
minAvailable: 1
---
# Source: hadoop/templates/yarn-proxyserver/yarn-proxyserver-pdb.yaml
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: hadoop-hadoop-yarn-proxyserver
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-proxyserver
spec:
selector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-proxyserver
minAvailable: 1
---
# Source: hadoop/templates/yarn/yarn-nm-pdb.yaml
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: hadoop-hadoop-yarn-nm
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-nm
spec:
selector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-nm
minAvailable: 1
---
apiVersion: v1
kind: ConfigMap
metadata:
name: hadoop-hadoop-hdfs-dn-0
namespace: hadoop
data:
hdfs-site.xml: |
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration><property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property><property>
<name>dfs.datanode.use.datanode.hostname</name>
<value>false</value>
</property>
<property>
<name>dfs.client.use.datanode.hostname</name>
<value>false</value>
</property>
<property>
<name>dfs.datanode.hostname</name>
<value>hadoop-hadoop-hdfs-dn-0.hadoop-hadoop-hdfs-dn.hadoop.svc.cluster.local</value>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
<property>
<name>dfs.datanode.http.address</name>
<value>0.0.0.0:9864</value>
</property>
<property>
<name>dfs.datanode.address</name>
<value>0.0.0.0:9866</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/apache/hadoop-3.3.6/data/hdfs/datanode/data1,/opt/apache/hadoop-3.3.6/data/hdfs/datanode/data2,/opt/apache/hadoop-3.3.6/data/hdfs/datanode/data3</value>
<description>DataNode directory</description>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/apache/hadoop-3.3.6/data/hdfs/namenode</value>
<description>NameNode directory for namespace and transaction logs storage.</description>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
<!-- Bind to all interfaces -->
<property>
<name>dfs.namenode.rpc-bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>dfs.namenode.servicerpc-bind-host</name>
<value>0.0.0.0</value>
</property>
<!-- /Bind to all interfaces -->
</configuration>
---
apiVersion: v1
kind: ConfigMap
metadata:
name: hadoop-hadoop-hdfs-dn-1
namespace: hadoop
data:
hdfs-site.xml: |
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration><property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property><property>
<name>dfs.datanode.use.datanode.hostname</name>
<value>false</value>
</property>
<property>
<name>dfs.client.use.datanode.hostname</name>
<value>false</value>
</property>
<property>
<name>dfs.datanode.hostname</name>
<value>hadoop-hadoop-hdfs-dn-1.hadoop-hadoop-hdfs-dn.hadoop.svc.cluster.local</value>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
<property>
<name>dfs.datanode.http.address</name>
<value>0.0.0.0:9864</value>
</property>
<property>
<name>dfs.datanode.address</name>
<value>0.0.0.0:9866</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/apache/hadoop-3.3.6/data/hdfs/datanode/data1,/opt/apache/hadoop-3.3.6/data/hdfs/datanode/data2,/opt/apache/hadoop-3.3.6/data/hdfs/datanode/data3</value>
<description>DataNode directory</description>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/apache/hadoop-3.3.6/data/hdfs/namenode</value>
<description>NameNode directory for namespace and transaction logs storage.</description>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
<!-- Bind to all interfaces -->
<property>
<name>dfs.namenode.rpc-bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>dfs.namenode.servicerpc-bind-host</name>
<value>0.0.0.0</value>
</property>
<!-- /Bind to all interfaces -->
</configuration>
---
apiVersion: v1
kind: ConfigMap
metadata:
name: hadoop-hadoop-hdfs-dn-2
namespace: hadoop
data:
hdfs-site.xml: |
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration><property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property><property>
<name>dfs.datanode.use.datanode.hostname</name>
<value>false</value>
</property>
<property>
<name>dfs.client.use.datanode.hostname</name>
<value>false</value>
</property>
<property>
<name>dfs.datanode.hostname</name>
<value>hadoop-hadoop-hdfs-dn-2.hadoop-hadoop-hdfs-dn.hadoop.svc.cluster.local</value>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
<property>
<name>dfs.datanode.http.address</name>
<value>0.0.0.0:9864</value>
</property>
<property>
<name>dfs.datanode.address</name>
<value>0.0.0.0:9866</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/apache/hadoop-3.3.6/data/hdfs/datanode/data1,/opt/apache/hadoop-3.3.6/data/hdfs/datanode/data2,/opt/apache/hadoop-3.3.6/data/hdfs/datanode/data3</value>
<description>DataNode directory</description>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/apache/hadoop-3.3.6/data/hdfs/namenode</value>
<description>NameNode directory for namespace and transaction logs storage.</description>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
<!-- Bind to all interfaces -->
<property>
<name>dfs.namenode.rpc-bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>dfs.namenode.servicerpc-bind-host</name>
<value>0.0.0.0</value>
</property>
<!-- /Bind to all interfaces -->
</configuration>
---
# Source: hadoop/templates/yarn/yarn-rm-pdb.yaml
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: hadoop-hadoop-yarn-rm
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-rm
spec:
selector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-rm
minAvailable: 1
---
# Source: hadoop/templates/hadoop-configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: hadoop-hadoop
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
data:
core-site.xml: |
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop-hadoop-hdfs-nn-0.hadoop-hadoop-hdfs-nn.hadoop.svc.cluster.local:9000/</value>
<description>NameNode URI</description>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
</configuration>
hdfs-site.xml: |
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration><property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property><property>
<name>dfs.datanode.use.datanode.hostname</name>
<value>false</value>
</property>
<property>
<name>dfs.client.use.datanode.hostname</name>
<value>false</value>
</property>
<!--
<property>
<name>dfs.datanode.hostname</name>
<value>example.com</value>
</property>
-->
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
<property>
<name>dfs.datanode.http.address</name>
<value>0.0.0.0:9864</value>
</property>
<property>
<name>dfs.datanode.address</name>
<value>0.0.0.0:9866</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/apache/hadoop-3.3.6/data/hdfs/datanode/data1,/opt/apache/hadoop-3.3.6/data/hdfs/datanode/data2,/opt/apache/hadoop-3.3.6/data/hdfs/datanode/data3</value>
<description>DataNode directory</description>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/apache/hadoop-3.3.6/data/hdfs/namenode</value>
<description>NameNode directory for namespace and transaction logs storage.</description>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
<!-- Bind to all interfaces -->
<property>
<name>dfs.namenode.rpc-bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>dfs.namenode.servicerpc-bind-host</name>
<value>0.0.0.0</value>
</property>
<!-- /Bind to all interfaces -->
</configuration>
mapred-site.xml: |
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop-hadoop-mr-historyserver-0:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop-hadoop-mr-historyserver-0:19888</value>
</property>
</configuration>
yarn-site.xml: |
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop-hadoop-yarn-rm-headless</value>
</property>
<!-- Bind to all interfaces -->
<property>
<name>yarn.resourcemanager.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.nodemanager.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.timeline-service.bind-host</name>
<value>0.0.0.0</value>
</property>
<!-- /Bind to all interfaces -->
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<description>List of directories to store localized files in.</description>
<name>yarn.nodemanager.local-dirs</name>
<value>/opt/apache/hadoop-3.3.6/data/yarn/local-dirs</value>
</property>
<property>
<description>Where to store container logs.</description>
<name>yarn.nodemanager.log-dirs</name>
<value>/opt/apache/hadoop-3.3.6/data/yarn/log-dirs</value>
</property>
<property>
<description>Where to aggregate logs to.</description>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/opt/apache/hadoop-3.3.6/data/yarn/apps</value>
</property>
<property>
<name>yarn.web-proxy.address</name>
<value>hadoop-hadoop-yarn-proxyserver-0:9111</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>
/opt/apache/hadoop-3.3.6/etc/hadoop,
/opt/apache/hadoop-3.3.6/share/hadoop/common/*,
/opt/apache/hadoop-3.3.6/share/hadoop/common/lib/*,
/opt/apache/hadoop-3.3.6/share/hadoop/hdfs/*,
/opt/apache/hadoop-3.3.6/share/hadoop/hdfs/lib/*,
/opt/apache/hadoop-3.3.6/share/hadoop/mapreduce/*,
/opt/apache/hadoop-3.3.6/share/hadoop/mapreduce/lib/*,
/opt/apache/hadoop-3.3.6/share/hadoop/yarn/*,
/opt/apache/hadoop-3.3.6/share/hadoop/yarn/lib/*
</value>
</property>
</configuration>
dfs-hosts.includes: |
hadoop-hadoop-hdfs-dn-0.hadoop-hadoop-hdfs-dn.hadoop.svc.cluster.local
hadoop-hadoop-hdfs-dn-1.hadoop-hadoop-hdfs-dn.hadoop.svc.cluster.local
hadoop-hadoop-hdfs-dn-2.hadoop-hadoop-hdfs-dn.hadoop.svc.cluster.local
dfs-hosts.excludes: |
yarn-hosts.includes: |
hadoop-hadoop-yarn-nm-0.hadoop-hadoop-yarn-nm.hadoop.svc.cluster.local
hadoop-hadoop-yarn-nm-1.hadoop-hadoop-yarn-nm.hadoop.svc.cluster.local
hadoop-hadoop-yarn-nm-2.hadoop-hadoop-yarn-nm.hadoop.svc.cluster.local
yarn-hosts.excludes: |
---
# Source: hadoop/templates/hive/hive-configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: hadoop-hadoop-hive
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop-hive
data:
hive-site.xml: |
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 配置hdfs存储目录 -->
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive_remote/warehouse</value>
</property>
<property>
<name>hive.metastore.local</name>
<value>false</value>
</property>
<!-- 所连接的 MySQL 数据库的地址,hive_local是数据库,程序会自动创建,自定义就行 -->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://192.168.110.125:30653/hive_metastore?createDatabaseIfNotExist=true&useSSL=false&serverTimezone=Asia/Shanghai</value>
</property>
<!-- MySQL 驱动 -->
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<!--<value>com.mysql.cj.jdbc.Driver</value>-->
<value>com.mysql.jdbc.Driver</value>
</property>
<!-- mysql连接用户 -->
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<!-- mysql连接密码 -->
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>qwe123</value>
</property>
<!--元数据是否校验-->
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
<property>
<name>system:user.name</name>
<value>root</value>
<description>user name</description>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://hadoop-hadoop-hive-metastore-0.hadoop-hadoop-hive-metastore:9083</value>
</property>
<!-- host -->
<property>
<name>hive.server2.thrift.bind.host</name>
<value>0.0.0.0</value>
<description>Bind host on which to run the HiveServer2 Thrift service.</description>
</property>
<!-- hs2端口 默认是10000-->
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
</property>
<property>
<name>hive.server2.active.passive.ha.enable</name>
<value>true</value>
</property>
</configuration>
---
# Source: hadoop/templates/hdfs/hdfs-dn-svc-headless.yaml
# A headless service to create DNS records
apiVersion: v1
kind: Service
metadata:
name: hadoop-hadoop-hdfs-dn-headless
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hdfs-dn
spec:
ports:
- name: webhdfs
port: 9864
clusterIP: None
selector:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hdfs-dn
---
# Source: hadoop/templates/hdfs/hdfs-dn-svc.yaml
# A headless service to create DNS records
apiVersion: v1
kind: Service
metadata:
name: hadoop-hadoop-hdfs-dn
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hdfs-nn
spec:
ports:
- name: webhdfs
port: 9864
nodePort: 30864
type: NodePort
selector:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hdfs-dn
---
# Source: hadoop/templates/hdfs/hdfs-nn-svc-headless.yaml
# A headless service to create DNS records
apiVersion: v1
kind: Service
metadata:
name: hadoop-hadoop-hdfs-nn-headless
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hdfs-nn
spec:
ports:
- name: dfs
port: 9000
protocol: TCP
- name: webhdfs
port: 9870
clusterIP: None
selector:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hdfs-nn
---
# Source: hadoop/templates/hdfs/hdfs-nn-svc.yaml
# A headless service to create DNS records
apiVersion: v1
kind: Service
metadata:
name: hadoop-hadoop-hdfs-nn
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hdfs-nn
spec:
ports:
- name: dfs
port: 9000
protocol: TCP
nodePort: 30900
- name: webhdfs
port: 9870
nodePort: 30870
type: NodePort
selector:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hdfs-nn
---
# Source: hadoop/templates/hive/hive-hiveserver2-svc-headless.yaml
# A headless service to create DNS records
apiVersion: v1
kind: Service
metadata:
name: hadoop-hadoop-hive-hiveserver2-headless
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hive-hiveserver2
spec:
ports:
- port: 10000
name: web
clusterIP: None
selector:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hive-hiveserver2
---
# Source: hadoop/templates/hive/hive-hiveserver2-svc.yaml
# Service to access the yarn web ui
apiVersion: v1
kind: Service
metadata:
name: hadoop-hadoop-hive-hiveserver2
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hive-hiveserver2
spec:
ports:
- port: 10000
name: web
nodePort: 30000
type: NodePort
selector:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hive-hiveserver2
---
# Source: hadoop/templates/hive/hive-metastore-svc-headless.yaml
# A headless service to create DNS records
apiVersion: v1
kind: Service
metadata:
name: hadoop-hadoop-hive-metastore-headless
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hive-metastore
spec:
ports:
- port: 9083
name: web
clusterIP: None
selector:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hive-metastore
---
# Source: hadoop/templates/hive/hive-metastore-svc.yaml
# Service to access the yarn web ui
apiVersion: v1
kind: Service
metadata:
name: hadoop-hadoop-hive-metastore
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hive-metastore
spec:
ports:
- port: 9083
name: web
nodePort: 31183
type: NodePort
selector:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hive-metastore
---
# Source: hadoop/templates/mr-historyserver/mr-historyserver-svc-headless.yaml
# A headless service to create DNS records
apiVersion: v1
kind: Service
metadata:
name: hadoop-hadoop-mr-historyserver-headless
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: mr-historyserver
spec:
ports:
- port: 19888
name: web
clusterIP: None
selector:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: mr-historyserver
---
# Source: hadoop/templates/mr-historyserver/mr-historyserver-svc.yaml
# Service to access the yarn web ui
apiVersion: v1
kind: Service
metadata:
name: hadoop-hadoop-mr-historyserver
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: mr-historyserver
spec:
ports:
- port: 19888
name: web
nodePort: 30888
type: NodePort
selector:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: mr-historyserver
---
# Source: hadoop/templates/yarn-proxyserver/yarn-proxyserver-svc-headless.yaml
# A headless service to create DNS records
apiVersion: v1
kind: Service
metadata:
name: hadoop-hadoop-yarn-proxyserver-headless
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-proxyserver
spec:
ports:
- port: 9111
name: web
clusterIP: None
selector:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-proxyserver
---
# Source: hadoop/templates/yarn-proxyserver/yarn-proxyserver-svc.yaml
# Service to access the yarn web ui
apiVersion: v1
kind: Service
metadata:
name: hadoop-hadoop-yarn-proxyserver
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-proxyserver
spec:
ports:
- port: 9111
name: web
nodePort: 30911
type: NodePort
selector:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-proxyserver
---
# Source: hadoop/templates/yarn/yarn-nm-svc-headless.yaml
# A headless service to create DNS records
apiVersion: v1
kind: Service
metadata:
name: hadoop-hadoop-yarn-nm-headless
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-nm
spec:
ports:
- port: 8042
name: web
clusterIP: None
selector:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-nm
---
# Source: hadoop/templates/yarn/yarn-nm-svc.yaml
# Service to access the yarn web ui
apiVersion: v1
kind: Service
metadata:
name: hadoop-hadoop-yarn-nm
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-nm
spec:
ports:
- port: 8042
name: web
nodePort: 30042
type: NodePort
selector:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-nm
---
# Source: hadoop/templates/yarn/yarn-rm-svc-headless.yaml
# A headless service to create DNS records
apiVersion: v1
kind: Service
metadata:
name: hadoop-hadoop-yarn-rm-headless
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-rm
spec:
ports:
- port: 8088
name: web
clusterIP: None
selector:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-rm
---
# Source: hadoop/templates/yarn/yarn-rm-svc.yaml
# Service to access the yarn web ui
apiVersion: v1
kind: Service
metadata:
name: hadoop-hadoop-yarn-rm
namespace: hadoop
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-rm
spec:
ports:
- port: 8088
name: web
nodePort: 30088
type: NodePort
selector:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-rm
---
# Source: hadoop/templates/hdfs/hdfs-dn-statefulset.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: hadoop-hadoop-hdfs-dn
namespace: hadoop
annotations:
checksum/config: 36697e54c58cb45f147f354343692c9d77539232b308ec5f728d9c5e361acd54
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hdfs-dn
spec:
selector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hdfs-dn
serviceName: hadoop-hadoop-hdfs-dn
replicas: 3
template:
metadata:
labels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hdfs-dn
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 5
podAffinityTerm:
topologyKey: "kubernetes.io/hostname"
labelSelector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hdfs-dn
terminationGracePeriodSeconds: 0
initContainers:
- name: wait-for-nn
image: "harbor.yourdomain.com:8443/hadoop/hadoop_hive:arm"
command: ['sh', '-c', "until curl -m 3 -sI http://hadoop-hadoop-hdfs-nn-0.hadoop-hadoop-hdfs-nn.hadoop.svc.cluster.local:9870 | egrep --silent 'HTTP/1.1 200 OK|HTTP/1.1 302 Found'; do echo waiting for nn; sleep 1; done"]
containers:
- name: hdfs-dn
image: "harbor.yourdomain.com:8443/hadoop/hadoop_hive:arm"
imagePullPolicy: "IfNotPresent"
command: ["sh","-c","/opt/apache/bootstrap.sh hadoop-hdfs-dn"]
env:
- name: POD_INDEX
valueFrom:
fieldRef:
fieldPath: metadata.labels['statefulset.kubernetes.io/pod-name']
resources:
limits:
cpu: 1000m
memory: 2048Mi
requests:
cpu: 1000m
memory: 1024Mi
readinessProbe:
httpGet:
path: /
port: 9864
initialDelaySeconds: 10
timeoutSeconds: 2
livenessProbe:
httpGet:
path: /
port: 9864
initialDelaySeconds: 10
timeoutSeconds: 2
volumeMounts:
- name: core-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: hdfs-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
- name: mapred-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/mapred-site.xml
subPath: mapred-site.xml
- name: yarn-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-site.xml
subPath: yarn-site.xml
- name: dfs-hosts-includes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/dfs-hosts.includes
subPath: dfs-hosts.includes
- name: dfs-hosts-excludes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/dfs-hosts.excludes
subPath: dfs-hosts.excludes
- name: yarn-hosts-includes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-hosts.includes
subPath: yarn-hosts.includes
- name: yarn-hosts-excludes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-hosts.excludes
subPath: yarn-hosts.excludes
- name: dfs1
mountPath: /opt/apache/hdfs/datanode1
- name: dfs2
mountPath: /opt/apache/hdfs/datanode2
- name: dfs3
mountPath: /opt/apache/hdfs/datanode3
securityContext:
runAsUser: 10000
privileged: true
volumes:
- name: core-site
configMap:
name: hadoop-hadoop
- name: mapred-site
configMap:
name: hadoop-hadoop
- name: yarn-site
configMap:
name: hadoop-hadoop
- name: dfs-hosts-includes
configMap:
name: hadoop-hadoop
- name: dfs-hosts-excludes
configMap:
name: hadoop-hadoop
- name: yarn-hosts-includes
configMap:
name: hadoop-hadoop
- name: yarn-hosts-excludes
configMap:
name: hadoop-hadoop
- name: dfs1
hostPath:
path: /opt/bigdata/servers/hadoop/dn/data/data1
type: Directory
- name: dfs2
hostPath:
path: /opt/bigdata/servers/hadoop/dn/data/data2
type: Directory
- name: dfs3
hostPath:
path: /opt/bigdata/servers/hadoop/dn/data/data3
type: Directory
- name: hdfs-site
configMap:
name: hadoop-hadoop
---
# Source: hadoop/templates/hdfs/hdfs-nn-statefulset.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: hadoop-hadoop-hdfs-nn
namespace: hadoop
annotations:
checksum/config: 36697e54c58cb45f147f354343692c9d77539232b308ec5f728d9c5e361acd54
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hdfs-nn
spec:
selector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hdfs-nn
serviceName: hadoop-hadoop-hdfs-nn
replicas: 1
template:
metadata:
labels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hdfs-nn
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 5
podAffinityTerm:
topologyKey: "kubernetes.io/hostname"
labelSelector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hdfs-nn
terminationGracePeriodSeconds: 0
containers:
- name: hdfs-nn
image: "harbor.yourdomain.com:8443/hadoop/hadoop_hive:arm"
imagePullPolicy: "IfNotPresent"
command: ["sh","-c","/opt/apache/bootstrap.sh hadoop-hdfs-nn"]
resources:
limits:
cpu: 1000m
memory: 2048Mi
requests:
cpu: 1000m
memory: 1024Mi
readinessProbe:
httpGet:
path: /
port: 9870
initialDelaySeconds: 10
timeoutSeconds: 2
livenessProbe:
httpGet:
path: /
port: 9870
initialDelaySeconds: 10
timeoutSeconds: 2
volumeMounts:
- name: core-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: hdfs-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
- name: mapred-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/mapred-site.xml
subPath: mapred-site.xml
- name: yarn-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-site.xml
subPath: yarn-site.xml
- name: dfs-hosts-includes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/dfs-hosts.includes
subPath: dfs-hosts.includes
- name: dfs-hosts-excludes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/dfs-hosts.excludes
subPath: dfs-hosts.excludes
- name: yarn-hosts-includes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-hosts.includes
subPath: yarn-hosts.includes
- name: yarn-hosts-excludes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-hosts.excludes
subPath: yarn-hosts.excludes
- name: nn1
mountPath: /opt/apache/hadoop-3.3.6/data/hdfs/namenode
securityContext:
runAsUser: 10000
privileged: true
volumes:
- name: core-site
configMap:
name: hadoop-hadoop
- name: hdfs-site
configMap:
name: hadoop-hadoop
- name: mapred-site
configMap:
name: hadoop-hadoop
- name: yarn-site
configMap:
name: hadoop-hadoop
- name: dfs-hosts-includes
configMap:
name: hadoop-hadoop
- name: dfs-hosts-excludes
configMap:
name: hadoop-hadoop
- name: yarn-hosts-includes
configMap:
name: hadoop-hadoop
- name: yarn-hosts-excludes
configMap:
name: hadoop-hadoop
- name: nn1
hostPath:
path: /opt/bigdata/servers/hadoop/nn/data/data1
type: Directory
---
# Source: hadoop/templates/hive/hive-hiveserver2-statefulset.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: hadoop-hadoop-hive-hiveserver2
namespace: hadoop
annotations:
checksum/config: 36697e54c58cb45f147f354343692c9d77539232b308ec5f728d9c5e361acd54
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hive-hiveserver2
spec:
selector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hive-hiveserver2
serviceName: hadoop-hadoop-hive-hiveserver2
replicas: 1
template:
metadata:
labels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hive-hiveserver2
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 5
podAffinityTerm:
topologyKey: "kubernetes.io/hostname"
labelSelector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hive-hiveserver2
terminationGracePeriodSeconds: 0
initContainers:
- name: wait-hive-metastore
image: "harbor.yourdomain.com:8443/hadoop/hadoop_hive:arm"
command: ['sh', '-c', "until (echo 'q')|telnet -e 'q' hadoop-hadoop-hive-metastore.hadoop.svc.cluster.local 9083 >/dev/null 2>&1; do echo waiting for hive metastore; sleep 1; done"]
containers:
- name: hive-hiveserver2
image: "harbor.yourdomain.com:8443/hadoop/hadoop_hive:arm"
imagePullPolicy: "IfNotPresent"
ports:
- containerPort: 10000
name: web
command: ["sh","-c","/opt/apache/bootstrap.sh hive-hiveserver2"]
resources:
limits:
cpu: 1000m
memory: 1024Mi
requests:
cpu: 1000m
memory: 1024Mi
readinessProbe:
tcpSocket:
port: 10000
initialDelaySeconds: 5
timeoutSeconds: 2
livenessProbe:
tcpSocket:
port: 10000
initialDelaySeconds: 10
timeoutSeconds: 2
volumeMounts:
- name: core-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: hdfs-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
- name: mapred-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/mapred-site.xml
subPath: mapred-site.xml
- name: yarn-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-site.xml
subPath: yarn-site.xml
- name: dfs-hosts-includes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/dfs-hosts.includes
subPath: dfs-hosts.includes
- name: dfs-hosts-excludes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/dfs-hosts.excludes
subPath: dfs-hosts.excludes
- name: yarn-hosts-includes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-hosts.includes
subPath: yarn-hosts.includes
- name: yarn-hosts-excludes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-hosts.excludes
subPath: yarn-hosts.excludes
- name: hive-site
mountPath: /opt/apache/hive/conf/hive-site.xml
subPath: hive-site.xml
securityContext:
runAsUser: 10000
privileged: true
volumes:
- name: core-site
configMap:
name: hadoop-hadoop
- name: hdfs-site
configMap:
name: hadoop-hadoop
- name: mapred-site
configMap:
name: hadoop-hadoop
- name: yarn-site
configMap:
name: hadoop-hadoop
- name: dfs-hosts-includes
configMap:
name: hadoop-hadoop
- name: dfs-hosts-excludes
configMap:
name: hadoop-hadoop
- name: yarn-hosts-includes
configMap:
name: hadoop-hadoop
- name: yarn-hosts-excludes
configMap:
name: hadoop-hadoop
- name: hive-site
configMap:
name: hadoop-hadoop-hive
---
# Source: hadoop/templates/hive/hive-metastore-statefulset.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: hadoop-hadoop-hive-metastore
namespace: hadoop
annotations:
checksum/config: 36697e54c58cb45f147f354343692c9d77539232b308ec5f728d9c5e361acd54
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hive-metastore
spec:
selector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hive-metastore
serviceName: hadoop-hadoop-hive-metastore
replicas: 1
template:
metadata:
labels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hive-metastore
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 5
podAffinityTerm:
topologyKey: "kubernetes.io/hostname"
labelSelector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: hive-metastore
terminationGracePeriodSeconds: 0
initContainers:
- name: wait-for-nn
image: "harbor.yourdomain.com:8443/hadoop/hadoop_hive:arm"
command: ['sh', '-c', "until curl -m 3 -sI http://hadoop-hadoop-hdfs-nn-0.hadoop-hadoop-hdfs-nn.hadoop.svc.cluster.local:9870 | egrep --silent 'HTTP/1.1 200 OK|HTTP/1.1 302 Found'; do echo waiting for nn; sleep 1; done"]
containers:
- name: hive-metastore
image: "harbor.yourdomain.com:8443/hadoop/hadoop_hive:arm"
imagePullPolicy: "IfNotPresent"
ports:
- containerPort: 9083
name: web
command: ["sh","-c","/opt/apache/bootstrap.sh hive-metastore"]
resources:
limits:
cpu: 1000m
memory: 2048Mi
requests:
cpu: 1000m
memory: 1024Mi
readinessProbe:
tcpSocket:
port: 9083
initialDelaySeconds: 5
timeoutSeconds: 2
livenessProbe:
tcpSocket:
port: 9083
initialDelaySeconds: 10
timeoutSeconds: 2
volumeMounts:
- name: core-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: hdfs-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
- name: mapred-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/mapred-site.xml
subPath: mapred-site.xml
- name: yarn-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-site.xml
subPath: yarn-site.xml
- name: dfs-hosts-includes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/dfs-hosts.includes
subPath: dfs-hosts.includes
- name: dfs-hosts-excludes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/dfs-hosts.excludes
subPath: dfs-hosts.excludes
- name: yarn-hosts-includes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-hosts.includes
subPath: yarn-hosts.includes
- name: yarn-hosts-excludes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-hosts.excludes
subPath: yarn-hosts.excludes
- name: hive-site
mountPath: /opt/apache/hive/conf/hive-site.xml
subPath: hive-site.xml
securityContext:
runAsUser: 10000
privileged: true
volumes:
- name: core-site
configMap:
name: hadoop-hadoop
- name: hdfs-site
configMap:
name: hadoop-hadoop
- name: mapred-site
configMap:
name: hadoop-hadoop
- name: yarn-site
configMap:
name: hadoop-hadoop
- name: dfs-hosts-includes
configMap:
name: hadoop-hadoop
- name: dfs-hosts-excludes
configMap:
name: hadoop-hadoop
- name: yarn-hosts-includes
configMap:
name: hadoop-hadoop
- name: yarn-hosts-excludes
configMap:
name: hadoop-hadoop
- name: hive-site
configMap:
name: hadoop-hadoop-hive
---
# Source: hadoop/templates/mr-historyserver/mr-historyserver-statefulset.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: hadoop-hadoop-mr-historyserver
namespace: hadoop
annotations:
checksum/config: 36697e54c58cb45f147f354343692c9d77539232b308ec5f728d9c5e361acd54
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: mr-historyserver
spec:
selector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: mr-historyserver
serviceName: hadoop-hadoop-mr-historyserver
replicas: 1
template:
metadata:
labels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: mr-historyserver
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 5
podAffinityTerm:
topologyKey: "kubernetes.io/hostname"
labelSelector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: mr-historyserver
terminationGracePeriodSeconds: 0
initContainers:
- name: wait-for-rm
image: "harbor.yourdomain.com:8443/hadoop/hadoop_hive:arm"
command: ['sh', '-c', "until curl -m 3 -sI http://hadoop-hadoop-yarn-rm-0.hadoop-hadoop-yarn-rm.hadoop.svc.cluster.local:8088 | egrep --silent 'HTTP/1.1 200 OK|HTTP/1.1 302 Found'; do echo waiting for rm; sleep 1; done"]
containers:
- name: mr-historyserver
image: "harbor.yourdomain.com:8443/hadoop/hadoop_hive:arm"
imagePullPolicy: "IfNotPresent"
ports:
- containerPort: 19888
name: web
command: ["sh","-c","/opt/apache/bootstrap.sh hadoop-mr-historyserver"]
resources:
limits:
cpu: 1000m
memory: 1024Mi
requests:
cpu: 1000m
memory: 1024Mi
readinessProbe:
httpGet:
path: /ws/v1/history/info
port: 19888
initialDelaySeconds: 5
timeoutSeconds: 2
livenessProbe:
httpGet:
path: /ws/v1/history/info
port: 19888
initialDelaySeconds: 10
timeoutSeconds: 2
volumeMounts:
- name: core-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: hdfs-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
- name: mapred-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/mapred-site.xml
subPath: mapred-site.xml
- name: yarn-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-site.xml
subPath: yarn-site.xml
- name: dfs-hosts-includes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/dfs-hosts.includes
subPath: dfs-hosts.includes
- name: dfs-hosts-excludes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/dfs-hosts.excludes
subPath: dfs-hosts.excludes
- name: yarn-hosts-includes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-hosts.includes
subPath: yarn-hosts.includes
- name: yarn-hosts-excludes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-hosts.excludes
subPath: yarn-hosts.excludes
securityContext:
runAsUser: 10000
privileged: true
volumes:
- name: core-site
configMap:
name: hadoop-hadoop
- name: hdfs-site
configMap:
name: hadoop-hadoop
- name: mapred-site
configMap:
name: hadoop-hadoop
- name: yarn-site
configMap:
name: hadoop-hadoop
- name: dfs-hosts-includes
configMap:
name: hadoop-hadoop
- name: dfs-hosts-excludes
configMap:
name: hadoop-hadoop
- name: yarn-hosts-includes
configMap:
name: hadoop-hadoop
- name: yarn-hosts-excludes
configMap:
name: hadoop-hadoop
---
# Source: hadoop/templates/yarn-proxyserver/yarn-proxyserver-statefulset.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: hadoop-hadoop-yarn-proxyserver
namespace: hadoop
annotations:
checksum/config: 36697e54c58cb45f147f354343692c9d77539232b308ec5f728d9c5e361acd54
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-proxyserver
spec:
selector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-proxyserver
serviceName: hadoop-hadoop-yarn-proxyserver
replicas: 1
template:
metadata:
labels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-proxyserver
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 5
podAffinityTerm:
topologyKey: "kubernetes.io/hostname"
labelSelector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-proxyserver
terminationGracePeriodSeconds: 0
initContainers:
- name: wait-for-rm
image: "harbor.yourdomain.com:8443/hadoop/hadoop_hive:arm"
command: ['sh', '-c', "until curl -m 3 -sI http://hadoop-hadoop-yarn-rm-0.hadoop-hadoop-yarn-rm.hadoop.svc.cluster.local:8088 | egrep --silent 'HTTP/1.1 200 OK|HTTP/1.1 302 Found'; do echo waiting for rm; sleep 1; done"]
containers:
- name: yarn-proxyserver
image: "harbor.yourdomain.com:8443/hadoop/hadoop_hive:arm"
imagePullPolicy: "IfNotPresent"
ports:
- containerPort: 9111
name: web
command: ["sh","-c","/opt/apache/bootstrap.sh hadoop-yarn-proxyserver"]
resources:
limits:
cpu: 1000m
memory: 1024Mi
requests:
cpu: 1000m
memory: 1024Mi
readinessProbe:
tcpSocket:
port: 9111
initialDelaySeconds: 5
timeoutSeconds: 2
livenessProbe:
tcpSocket:
port: 9111
initialDelaySeconds: 10
timeoutSeconds: 2
volumeMounts:
- name: core-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: hdfs-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
- name: mapred-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/mapred-site.xml
subPath: mapred-site.xml
- name: yarn-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-site.xml
subPath: yarn-site.xml
- name: dfs-hosts-includes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/dfs-hosts.includes
subPath: dfs-hosts.includes
- name: dfs-hosts-excludes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/dfs-hosts.excludes
subPath: dfs-hosts.excludes
- name: yarn-hosts-includes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-hosts.includes
subPath: yarn-hosts.includes
- name: yarn-hosts-excludes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-hosts.excludes
subPath: yarn-hosts.excludes
securityContext:
runAsUser: 10000
privileged: true
volumes:
- name: core-site
configMap:
name: hadoop-hadoop
- name: hdfs-site
configMap:
name: hadoop-hadoop
- name: mapred-site
configMap:
name: hadoop-hadoop
- name: yarn-site
configMap:
name: hadoop-hadoop
- name: dfs-hosts-includes
configMap:
name: hadoop-hadoop
- name: dfs-hosts-excludes
configMap:
name: hadoop-hadoop
- name: yarn-hosts-includes
configMap:
name: hadoop-hadoop
- name: yarn-hosts-excludes
configMap:
name: hadoop-hadoop
---
# Source: hadoop/templates/yarn/yarn-nm-statefulset.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: hadoop-hadoop-yarn-nm
namespace: hadoop
annotations:
checksum/config: 36697e54c58cb45f147f354343692c9d77539232b308ec5f728d9c5e361acd54
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-nm
spec:
selector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-nm
serviceName: hadoop-hadoop-yarn-nm
replicas: 3
template:
metadata:
labels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-nm
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 5
podAffinityTerm:
topologyKey: "kubernetes.io/hostname"
labelSelector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-nm
terminationGracePeriodSeconds: 0
initContainers:
- name: wait-for-rm
image: "harbor.yourdomain.com:8443/hadoop/hadoop_hive:arm"
command: ['sh', '-c', "until curl -m 3 -sI http://hadoop-hadoop-yarn-rm-0.hadoop-hadoop-yarn-rm.hadoop.svc.cluster.local:8088 | egrep --silent 'HTTP/1.1 200 OK|HTTP/1.1 302 Found'; do echo waiting for rm; sleep 1; done"]
containers:
- name: yarn-nm
image: "harbor.yourdomain.com:8443/hadoop/hadoop_hive:arm"
imagePullPolicy: "IfNotPresent"
ports:
- containerPort: 8042
name: web
command: ["sh","-c","/opt/apache/bootstrap.sh hadoop-yarn-nm hadoop-hadoop-yarn-rm.hadoop.svc.cluster.local 8088"]
resources:
limits:
cpu: 1000m
memory: 2048Mi
requests:
cpu: 1000m
memory: 1024Mi
readinessProbe:
httpGet:
path: /node
port: 8042
initialDelaySeconds: 10
timeoutSeconds: 2
livenessProbe:
httpGet:
path: /node
port: 8042
initialDelaySeconds: 10
timeoutSeconds: 2
env:
- name: MY_CPU_LIMIT
valueFrom:
resourceFieldRef:
containerName: yarn-nm
resource: limits.cpu
divisor: 1
- name: MY_MEM_LIMIT
valueFrom:
resourceFieldRef:
containerName: yarn-nm
resource: limits.memory
divisor: 1M
volumeMounts:
- name: core-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: hdfs-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
- name: mapred-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/mapred-site.xml
subPath: mapred-site.xml
- name: yarn-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-site.xml
subPath: yarn-site.xml
- name: dfs-hosts-includes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/dfs-hosts.includes
subPath: dfs-hosts.includes
- name: dfs-hosts-excludes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/dfs-hosts.excludes
subPath: dfs-hosts.excludes
- name: yarn-hosts-includes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-hosts.includes
subPath: yarn-hosts.includes
- name: yarn-hosts-excludes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-hosts.excludes
subPath: yarn-hosts.excludes
securityContext:
runAsUser: 10000
privileged: true
volumes:
- name: core-site
configMap:
name: hadoop-hadoop
- name: hdfs-site
configMap:
name: hadoop-hadoop
- name: mapred-site
configMap:
name: hadoop-hadoop
- name: yarn-site
configMap:
name: hadoop-hadoop
- name: dfs-hosts-includes
configMap:
name: hadoop-hadoop
- name: dfs-hosts-excludes
configMap:
name: hadoop-hadoop
- name: yarn-hosts-includes
configMap:
name: hadoop-hadoop
- name: yarn-hosts-excludes
configMap:
name: hadoop-hadoop
---
# Source: hadoop/templates/yarn/yarn-rm-statefulset.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: hadoop-hadoop-yarn-rm
namespace: hadoop
annotations:
checksum/config: 36697e54c58cb45f147f354343692c9d77539232b308ec5f728d9c5e361acd54
labels:
app.kubernetes.io/name: hadoop
helm.sh/chart: hadoop-1.2.0
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-rm
spec:
selector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-rm
serviceName: hadoop-hadoop-yarn-rm
replicas: 1
template:
metadata:
labels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-rm
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 5
podAffinityTerm:
topologyKey: "kubernetes.io/hostname"
labelSelector:
matchLabels:
app.kubernetes.io/name: hadoop
app.kubernetes.io/instance: hadoop
app.kubernetes.io/component: yarn-rm
terminationGracePeriodSeconds: 0
containers:
- name: yarn-rm
image: "harbor.yourdomain.com:8443/hadoop/hadoop_hive:arm"
imagePullPolicy: "IfNotPresent"
ports:
- containerPort: 8088
name: web
command: ["sh","-c","/opt/apache/bootstrap.sh hadoop-yarn-rm"]
resources:
limits:
cpu: 1000m
memory: 1024Mi
requests:
cpu: 1000m
memory: 1024Mi
readinessProbe:
httpGet:
path: /ws/v1/cluster/info
port: 8088
initialDelaySeconds: 5
timeoutSeconds: 2
livenessProbe:
httpGet:
path: /ws/v1/cluster/info
port: 8088
initialDelaySeconds: 10
timeoutSeconds: 2
volumeMounts:
- name: core-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: hdfs-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
- name: mapred-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/mapred-site.xml
subPath: mapred-site.xml
- name: yarn-site
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-site.xml
subPath: yarn-site.xml
- name: dfs-hosts-includes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/dfs-hosts.includes
subPath: dfs-hosts.includes
- name: dfs-hosts-excludes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/dfs-hosts.excludes
subPath: dfs-hosts.excludes
- name: yarn-hosts-includes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-hosts.includes
subPath: yarn-hosts.includes
- name: yarn-hosts-excludes
mountPath: /opt/apache/hadoop-3.3.6/etc/hadoop/yarn-hosts.excludes
subPath: yarn-hosts.excludes
securityContext:
runAsUser: 10000
privileged: true
volumes:
- name: core-site
configMap:
name: hadoop-hadoop
- name: hdfs-site
configMap:
name: hadoop-hadoop
- name: mapred-site
configMap:
name: hadoop-hadoop
- name: yarn-site
configMap:
name: hadoop-hadoop
- name: dfs-hosts-includes
configMap:
name: hadoop-hadoop
- name: dfs-hosts-excludes
configMap:
name: hadoop-hadoop
- name: yarn-hosts-includes
configMap:
name: hadoop-hadoop
- name: yarn-hosts-excludes
configMap:
name: hadoop-hadoop
部署后查看nn 节点端口, hadoop 部署成功
对于大文件需要多线程的方式分块上传再合并为一个文件,已经投产效率不错
def upload_file_to_hdfs(file):
host, port = HDFS_HOST, HDFS_PORT
path = '/upload'
script_directory = os.path.dirname(os.path.realpath(__file__))
local_file_path = os.path.join(script_directory, '../file_upload', file.filename)
file.save(local_file_path)
hdfs_file_path = os.path.join(path, file.filename)
client = InsecureClient(url=f'http://{host}:{int(port)}', user='hadoop')
file_size = os.path.getsize(local_file_path)
chunk_size = 5 * 1024 * 1024 # 每块 5MB
num_chunks = (file_size + chunk_size - 1) // chunk_size # 计算分块数量
futures = []
with concurrent.futures.ThreadPoolExecutor() as executor:
for offset in range(0, file_size, chunk_size):
futures.append(
executor.submit(upload_hdfs_chunk, client, hdfs_file_path, local_file_path, offset, chunk_size))
concurrent.futures.wait(futures)
# 合并上传的分块
merge_hdfs_chunks(client, hdfs_file_path, num_chunks)
return hdfs_file_path
def merge_hdfs_chunks(client, hdfs_file_path, num_chunks):
# 创建一个新的文件并写入
with client.write(hdfs_file_path, overwrite=True) as writer:
for i in range(num_chunks):
part_file_path = f"{hdfs_file_path}.part{i}"
try:
with client.read(part_file_path) as reader:
writer.write(reader.read())
# 删除已合并的分块文件
client.delete(part_file_path)
except Exception as e:
print(f"Error processing {part_file_path}: {e}")
print(f'Merged file created at: {hdfs_file_path}')
小结
成功实现了国产arm kylin的hadoop搭建和使用python 多线程分块方式上传文件到hdfs