hadoop集群搭建(基于docker-compose)

1,创建工作目录

比如:/home/hadoop

需要配置2个文件(data是挂载目录,会自动创建)

2,hadoop.env

内容不用改,基本是默认配置,后续修改配置在这修改就行了,配置详情自己百度下

CORE_CONF_fs_defaultFS=hdfs://namenode:8020
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*

HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false

YARN_CONF_yarn_log___aggregation___enable=true
YARN_CONF_yarn_resourcemanager_recovery_enabled=true
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
YARN_CONF_yarn_timeline___service_enabled=true
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
YARN_CONF_yarn_timeline___service_hostname=historyserver
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
YARN_CONF_yarn_resourcemanager_resource___tracker_address=resourcemanager:8031

3,docker-compose.yml

version: "3"

services:
  namenode:
    image: bde2020/hadoop-namenode:2.0.0-hadoop3.2.1-java8
    restart: always
    container_name: namenode
    hostname: namenode
    volumes:
      - /home/hadoop/data/hadoop_namenode:/hadoop/dfs/name
    environment:
      - CLUSTER_NAME=test
    env_file:
      - ./hadoop.env
    ports:
      - "9870:9870"

  resourcemanager:
    image: bde2020/hadoop-resourcemanager:2.0.0-hadoop3.1.2-java8
    restart: always
    container_name: resourcemanager
    hostname: resourcemanager
    ports:
      - "5888:5888"
    depends_on:
      - namenode
      - datanode1
      - datanode2
      - datanode3
    env_file:
      - ./hadoop.env
    environment:
      - YARN_CONF_yarn_resourcemanager_webapp_address=0.0.0.0:5888

  historyserver:
    image: bde2020/hadoop-historyserver:2.0.0-hadoop3.1.2-java8
    restart: always
    container_name: historyserver
    hostname: historyserver
    depends_on:
      - namenode
      - datanode1
      - datanode2
      - datanode3
    volumes:
      - /home/hadoop/data/hadoop_historyserver:/hadoop/yarn/timeline
    env_file:
      - ./hadoop.env
    ports:
      - "8188:8188"

  nodemanager1:
    image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.1.2-java8
    restart: always
    container_name: nodemanager1
    hostname: nodemanager1
    depends_on:
      - namenode
      - datanode1
      - datanode2
      - datanode3
    env_file:
      - ./hadoop.env
    ports:
      - "8042:8042"

  datanode1:
    image: bde2020/hadoop-datanode:2.0.0-hadoop3.1.2-java8
    restart: always
    container_name: datanode1
    hostname: datanode1
    depends_on:
      - namenode
    ports:
      - "5642:5642"
    volumes:
      - /home/hadoop/data/hadoop_datanode1:/hadoop/dfs/data
    env_file:
      - ./hadoop.env
    environment:
      - HDFS_CONF_dfs_datanode_address=0.0.0.0:5640
      - HDFS_CONF_dfs_datanode_ipc_address=0.0.0.0:5641
      - HDFS_CONF_dfs_datanode_http_address=0.0.0.0:5642

  datanode2:
    image: bde2020/hadoop-datanode:2.0.0-hadoop3.1.2-java8
    restart: always
    container_name: datanode2
    hostname: datanode2
    depends_on:
      - namenode
    ports:
      - "5645:5645"
    volumes:
      - /home/hadoop/data/hadoop_datanode2:/hadoop/dfs/data
    env_file:
      - ./hadoop.env
    environment:
      - HDFS_CONF_dfs_datanode_address=0.0.0.0:5643
      - HDFS_CONF_dfs_datanode_ipc_address=0.0.0.0:5644
      - HDFS_CONF_dfs_datanode_http_address=0.0.0.0:5645

  datanode3:
    image: bde2020/hadoop-datanode:2.0.0-hadoop3.1.2-java8
    restart: always
    container_name: datanode3
    hostname: datanode3
    depends_on:
      - namenode
    ports:
      - "5648:5648"
    volumes:
      - /home/hadoop/data/hadoop_datanode3:/hadoop/dfs/data
    env_file:
      - ./hadoop.env
    environment:
      - HDFS_CONF_dfs_datanode_address=0.0.0.0:5646
      - HDFS_CONF_dfs_datanode_ipc_address=0.0.0.0:5647
      - HDFS_CONF_dfs_datanode_http_address=0.0.0.0:5648

1,可以先把hadoop镜像pull下来,镜像名在文件里指定了,有5个

比如:docker pullbde2020/hadoop-datanode:2.0.0-hadoop3.1.2-java8

2,可以修改对应端口

各服务器的web页面端口:

namenode:默认9870

resourcemanager:默认8088,因为和其他项目的端口冲突,这里我设置为5888,那端口映射也需要改为5888

ports:
  - "5888:5888"
environment:
  - YARN_CONF_yarn_resourcemanager_webapp_address=0.0.0.0:5888

historyserver:默认8188

nodemanager:默认8042

datanode1:这里需要注意下,3.X版本的默认端口由50075变成了9864,这里不使用默认端口的意义是在同一机器起多个 datanode,暴露端口需要不同

datanode2:同上,端口不同即可

datanode3:同上,端口不同即可

3,挂载目录自己选择,这里我挂载到/home/hadoop/data下面

4,启动

在工作目录下输入docker-compose up即可启动

其他命令

5,web端访问

直接通过宿主机IP+对于服务器端口即可访问

这里注意下:在hdfs页面查看文件内容、下载文件,需要在本地window的hosts文件添加以下数据节点映射地址

hosts文件路径:C:WindowsSystem32driversetc

在末尾添加以下内容:

宿主机IP datanode1

宿主机IP datanode2

宿主机IP datanode3

搞定

评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值