docker部署hadoop及spark

version: '3'

services:
  spark:
    image: s1mplecc/spark-hadoop:3
    hostname: master
    environment:
      - SPARK_MODE=master
      - SPARK_RPC_AUTHENTICATION_ENABLED=no
      - SPARK_RPC_ENCRYPTION_ENABLED=no
      - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
      - SPARK_SSL_ENABLED=no
    volumes:
      - /data/docker-data/hadoop-spark/spark/share:/opt/share
    ports:
      - '8880:8080'
      - '4440:4040'
      - '8888:8088'
      - '8042:8042'
      - '9870:9870'
      - '19888:19888'
      - '7077:7077'
    extra_hosts:
      - "worker1:127.0.0.1"
      - "worker2:127.0.0.1"
      - "master:127.0.0.1"
  spark-worker-1:
    image: s1mplecc/spark-hadoop:3
    hostname: worker1
    environment:
      - SPARK_MODE=worker
      - SPARK_MASTER_URL=spark://master:7077
      - SPARK_WORKER_MEMORY=1G
      - SPARK_WORKER_CORES=1
      - SPARK_RPC_AUTHENTICATION_ENABLED=no
      - SPARK_RPC_ENCRYPTION_ENABLED=no
      - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
      - SPARK_SSL_ENABLED=no
    volumes:
      - /data/docker-data/hadoop-spark/spark/share:/opt/share
    ports:
      - '8881:8081'
    extra_hosts:
      - "worker1:127.0.0.1"
      - "worker2:127.0.0.1"
      - "master:127.0.0.1"
  spark-worker-2:
    image: s1mplecc/spark-hadoop:3
    hostname: worker2
    environment:
      - SPARK_MODE=worker
      - SPARK_MASTER_URL=spark://master:7077
      - SPARK_WORKER_MEMORY=1G
      - SPARK_WORKER_CORES=1
      - SPARK_RPC_AUTHENTICATION_ENABLED=no
      - SPARK_RPC_ENCRYPTION_ENABLED=no
      - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
      - SPARK_SSL_ENABLED=no
    volumes:
      - /data/docker-data/hadoop-spark/spark/share:/opt/share
    ports:
      - '8882:8081'
    extra_hosts:
      - "worker1:127.0.0.1"
      - "worker2:127.0.0.1"
      - "master:127.0.0.1"



# docker-compose -f docker_compose_spark_hadoop.yml up -d
# docker-compose -f docker_compose_spark_hadoop.yml down

#Subscribe to project updates by watching https://github.com/bitnami/bitnami-docker-spark
#Submit issues and feature requests at https://github.com/bitnami/bitnami-docker-spark/issues

# work Spark Command: /opt/bitnami/java/bin/java -cp /opt/bitnami/spark/conf/:/opt/bitnami/spark/jars/*:/opt/hadoop/etc/hadoop/ -Xmx1g org.apache.spark.deploy.worker.Worker --webui-port 8081 spark://master:7077
# master Spark Command: /opt/bitnami/java/bin/java -cp /opt/bitnami/spark/conf/:/opt/bitnami/spark/jars/*:/opt/hadoop/etc/hadoop/ -Xmx1g org.apache.spark.deploy.master.Master --host master --port 7077 --webui-port 8080
# https://zhuanlan.zhihu.com/p/421375012?utm_medium=social&utm_oi=840347507367215104
# https://github.com/s1mplecc/spark-hadoop-docker


#由于 Spark 使用了 Hadoop 的客户端依赖库,所以 Spark 安装包会指定依赖的 Hadoop 特定版本,如 spark-3.1.2-bin-hadoop3.2.tgz。而 bitnami/spark 镜像中只包含 Hadoop 客户端,并不包含服务器端。因此,如果需要使用 HDFS 和 YARN 功能,还需要部署 Hadoop 集群。
#将 Hadoop 部署在 Spark 集群上,可以避免不必要的网络通信,并且面向磁盘的 HDFS 与面向内存的 Spark 天生互补。因此,考虑在 bitnami/spark 镜像基础上构建安装有 Hadoop 的新镜像。
# sc._gateway.jvm.org.apache.hadoop.util.VersionInfo.getVersion()   # 3.3.4


# 针对:ValueError: Cannot run multiple SparkContexts at once; existing SparkContext(app=PySparkShell, master=local[])
# 先执行
#  sc.stop()
# 然后再执行:
#  from pyspark import SparkConf
#  conf = SparkConf().setAppName('My App')
#  sc = SparkContext(conf=conf)
#
#  count = sc.range(1, 1000 * 1000 * 100).filter(lambda x: x > 100).count()
#  print('count: ', count)




# ./start-hadoop.sh
#Starting OpenBSD Secure Shell server: sshd.
#Starting namenodes on [master]
#Starting secondary namenodes [master]
#Starting resourcemanager
#Starting nodemanagers

#  https://zhuanlan.zhihu.com/p/401967378

# https://mirrors.tuna.tsinghua.edu.cn   这个网页下载速度快一点
# 这个下载速度也快一点:  http://archive.apache.org/dist/hadoop/core/

  • 11
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
可以通过以下步骤在 Docker部署 HadoopSpark: 1. 安装 DockerDocker Compose。 2. 创建一个 Docker Compose 文件 `docker-compose.yml`: ```yaml version: '3' services: namenode: image: sequenceiq/hadoop-docker:2.7.0 container_name: namenode hostname: namenode ports: - "50070:50070" - "9000:9000" volumes: - ./data:/hadoop/dfs/name - ./config/core-site.xml:/etc/hadoop/core-site.xml - ./config/hdfs-site.xml:/etc/hadoop/hdfs-site.xml environment: - CLUSTER_NAME=hadoop - NODE_TYPE=NAMENODE datanode: image: sequenceiq/hadoop-docker:2.7.0 container_name: datanode hostname: datanode volumes: - ./data:/hadoop/dfs/data - ./config/core-site.xml:/etc/hadoop/core-site.xml - ./config/hdfs-site.xml:/etc/hadoop/hdfs-site.xml environment: - CLUSTER_NAME=hadoop - NODE_TYPE=DATANODE depends_on: - namenode resourcemanager: image: sequenceiq/spark:1.6.0 container_name: resourcemanager hostname: resourcemanager ports: - "8088:8088" - "8042:8042" - "4040:4040" volumes: - ./config/core-site.xml:/etc/hadoop/core-site.xml - ./config/hdfs-site.xml:/etc/hadoop/hdfs-site.xml - ./config/yarn-site.xml:/etc/hadoop/yarn-site.xml environment: - CLUSTER_NAME=hadoop - NODE_TYPE=RESOURCEMANAGER depends_on: - namenode - datanode nodemanager: image: sequenceiq/spark:1.6.0 container_name: nodemanager hostname: nodemanager volumes: - ./config/core-site.xml:/etc/hadoop/core-site.xml - ./config/hdfs-site.xml:/etc/hadoop/hdfs-site.xml - ./config/yarn-site.xml:/etc/hadoop/yarn-site.xml environment: - CLUSTER_NAME=hadoop - NODE_TYPE=NODEMANAGER depends_on: - namenode - datanode - resourcemanager historyserver: image: sequenceiq/spark:1.6.0 container_name: historyserver hostname: historyserver ports: - "18080:18080" volumes: - ./config/core-site.xml:/etc/hadoop/core-site.xml - ./config/hdfs-site.xml:/etc/hadoop/hdfs-site.xml - ./config/yarn-site.xml:/etc/hadoop/yarn-site.xml environment: - CLUSTER_NAME=hadoop - NODE_TYPE=HISTORYSERVER depends_on: - namenode - datanode - resourcemanager - nodemanager ``` 3. 创建一个文件夹 `config`,将 HadoopSpark 的配置文件 `core-site.xml`、`hdfs-site.xml` 和 `yarn-site.xml` 放入其中。这些文件可以从官方网站下载。 4. 运行以下命令启动容器: ```bash docker-compose up -d ``` 5. 在浏览器中访问 `http://localhost:50070/` 可以查看 Hadoop 的 web 界面,访问 `http://localhost:8088/` 可以查看 YARN 的 web 界面,访问 `http://localhost:18080/` 可以查看 Spark 的 web 界面。 至此,HadoopSpark 已经在 Docker部署完成。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值