实验步骤
- 建立master slave1 slave2相互之间的免密远程连接
- 在master服务器上的Linux本地安装并配置Spark、Scala
- 向slaves添加master slave1 slave2
- 向spark-env.sh文件添加相应变量
- 将主节点文件拷贝到所有从节点。
- 在master节点上启动Hadoop
代码
配置主机与IP的映射
vi /etc/hosts
ssh-keygen -t rsa
ssh-copy-id -i master
ssh-copy-id -i slave1
ssh-copy-id -i slave2
- mkdir -p /data/spark9 cd /data/spark9/
- cd /data/spark9/
- wget http://192.168.1.100:60000/allfiles/spark9/scala-2.10.4.tgz
- wget http://192.168.1.100:60000/allfiles/spark9/spark-1.6.0-bin-hadoop2.6.tgz
- cd /data/spark9/
- tar -xzvf /data/spark9/scala-2.10.4.tgz -C /apps/
- tar -xzvf /data/spark9/spark-1.6.0-bin-hadoop2.6.tgz -C /apps/
- cd /apps
- mv /apps/scala-2.10.4/ /apps/scala
- mv /apps/spark-1.6.0-bin-hadoop2.6/ /apps/spark
- vim ~/.bashrc
- #scala
- export SCALA_HOME=/apps/scala
- export PATH=$SCALA_HOME/bin:$PATH
- #spark
- export SPARK_HOME=/apps/spark
- export PATH=$SPARK_HOME/bin:$PATH
- source ~/.bashrc
- scala -version
- cd /apps/spark/conf
- mv slaves.template slaves
- vim slaves
- master
- slave1
- slave2
- mv /apps/spark/conf/spark-env.sh.template /apps/spark/conf/spark-env.sh
- vim /apps/spark/conf/spark-env.sh
- HADOOP_CONF_DIR=/apps/hadoop/etc/hadoop
- JAVA_HOME=/apps/java
- SCALA_HOME=/apps/scala
- SPARK_MASTER_IP=master
- SPARK_MASTER_PORT=7077
- SPARK_MASTER_WEBUI_PORT=8080
- SPARK_WORKER_CORES=1
- SPARK_WORKER_MEMORY=1g
- SPARK_WORKER_PORT=7078
- SPARK_WORKER_WEBUI_PORT=8081
- SPARK_EXECUTOR_INSTANCES=1
- scp -r /apps/scala/ slave1:/apps/
- scp -r /apps/spark/ slave1:/apps
- scp -r /apps/scala/ slave2:/apps/
- scp -r /apps/spark/ slave2:/apps/
- 追加环境变量至slave1 slave2
- vim ~/.bashrc
- #scala
- export SCALA_HOME=/apps/scala
- export PATH=$SCALA_HOME/bin:$PATH
- #spark
- export SPARK_HOME=/apps/spark
- export PATH=$SPARK_HOME/bin:$PATH
- source ~/.bashrc
- cd /apps/Hadoop/sbin
- ./start-all.sh
- 查看master slave1 slave2节点jsp是否添加master/worker进程