1,创建工作目录
比如:/home/hadoop
需要配置2个文件(data是挂载目录,会自动创建)
2,hadoop.env
内容不用改,基本是默认配置,后续修改配置在这修改就行了,配置详情自己百度下
CORE_CONF_fs_defaultFS=hdfs://namenode:8020
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*
HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false
YARN_CONF_yarn_log___aggregation___enable=true
YARN_CONF_yarn_resourcemanager_recovery_enabled=true
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
YARN_CONF_yarn_timeline___service_enabled=true
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
YARN_CONF_yarn_timeline___service_hostname=historyserver
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
YARN_CONF_yarn_resourcemanager_resource___tracker_address=resourcemanager:8031
3,docker-compose.yml
version: "3"
services:
namenode:
image: bde2020/hadoop-namenode:2.0.0-hadoop3.2.1-java8
restart: always
container_name: namenode
hostname: namenode
volumes:
- /home/hadoop/data/hadoop_namenode:/hadoop/dfs/name
environment:
- CLUSTER_NAME=test
env_file:
- ./hadoop.env
ports:
- "9870:9870"
resourcemanager:
image: bde2020/hadoop-resourcemanager:2.0.0-hadoop3.1.2-java8
restart: always
container_name: resourcemanager
hostname: resourcemanager
ports:
- "5888:5888"
depends_on:
- namenode
- datanode1
- datanode2
- datanode3
env_file:
- ./hadoop.env
environment:
- YARN_CONF_yarn_resourcemanager_webapp_address=0.0.0.0:5888
historyserver:
image: bde2020/hadoop-historyserver:2.0.0-hadoop3.1.2-java8
restart: always
container_name: historyserver
hostname: historyserver
depends_on:
- namenode
- datanode1
- datanode2
- datanode3
volumes:
- /home/hadoop/data/hadoop_historyserver:/hadoop/yarn/timeline
env_file:
- ./hadoop.env
ports:
- "8188:8188"
nodemanager1:
image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.1.2-java8
restart: always
container_name: nodemanager1
hostname: nodemanager1
depends_on:
- namenode
- datanode1
- datanode2
- datanode3
env_file:
- ./hadoop.env
ports:
- "8042:8042"
datanode1:
image: bde2020/hadoop-datanode:2.0.0-hadoop3.1.2-java8
restart: always
container_name: datanode1
hostname: datanode1
depends_on:
- namenode
ports:
- "5642:5642"
volumes:
- /home/hadoop/data/hadoop_datanode1:/hadoop/dfs/data
env_file:
- ./hadoop.env
environment:
- HDFS_CONF_dfs_datanode_address=0.0.0.0:5640
- HDFS_CONF_dfs_datanode_ipc_address=0.0.0.0:5641
- HDFS_CONF_dfs_datanode_http_address=0.0.0.0:5642
datanode2:
image: bde2020/hadoop-datanode:2.0.0-hadoop3.1.2-java8
restart: always
container_name: datanode2
hostname: datanode2
depends_on:
- namenode
ports:
- "5645:5645"
volumes:
- /home/hadoop/data/hadoop_datanode2:/hadoop/dfs/data
env_file:
- ./hadoop.env
environment:
- HDFS_CONF_dfs_datanode_address=0.0.0.0:5643
- HDFS_CONF_dfs_datanode_ipc_address=0.0.0.0:5644
- HDFS_CONF_dfs_datanode_http_address=0.0.0.0:5645
datanode3:
image: bde2020/hadoop-datanode:2.0.0-hadoop3.1.2-java8
restart: always
container_name: datanode3
hostname: datanode3
depends_on:
- namenode
ports:
- "5648:5648"
volumes:
- /home/hadoop/data/hadoop_datanode3:/hadoop/dfs/data
env_file:
- ./hadoop.env
environment:
- HDFS_CONF_dfs_datanode_address=0.0.0.0:5646
- HDFS_CONF_dfs_datanode_ipc_address=0.0.0.0:5647
- HDFS_CONF_dfs_datanode_http_address=0.0.0.0:5648
1,可以先把hadoop镜像pull下来,镜像名在文件里指定了,有5个
比如:docker pullbde2020/hadoop-datanode:2.0.0-hadoop3.1.2-java8
2,可以修改对应端口
各服务器的web页面端口:
namenode:默认9870
resourcemanager:默认8088,因为和其他项目的端口冲突,这里我设置为5888,那端口映射也需要改为5888
ports:
- "5888:5888"
environment:
- YARN_CONF_yarn_resourcemanager_webapp_address=0.0.0.0:5888
historyserver:默认8188
nodemanager:默认8042
datanode1:这里需要注意下,3.X版本的默认端口由50075变成了9864,这里不使用默认端口的意义是在同一机器起多个 datanode,暴露端口需要不同
datanode2:同上,端口不同即可
datanode3:同上,端口不同即可
3,挂载目录自己选择,这里我挂载到/home/hadoop/data下面
4,启动
在工作目录下输入docker-compose up即可启动
其他命令
5,web端访问
直接通过宿主机IP+对于服务器端口即可访问
这里注意下:在hdfs页面查看文件内容、下载文件,需要在本地window的hosts文件添加以下数据节点映射地址
hosts文件路径:C:WindowsSystem32driversetc
在末尾添加以下内容:
宿主机IP datanode1
宿主机IP datanode2
宿主机IP datanode3
搞定