大数据集群配置-spark

spark 组件配置

0.准备工作

  1. 配置主机名和ip
# master 查看ip
hostname master && bash
echo master > /etc/hostname
ip a
# slave1 查看ip
hostname master && bash
echo slave1 > /etc/hostname
ip a
# slave2 查看ip
hostname master && bash
echo slave2 > /etc/hostname
ip a
# 配置ip映射
vi /etc/hosts
# 例如,ip不代表你本机ip,记得更换
192.168.10.3 master
192.168.10.4 slave1
192.168.10.5 slave2
# 三台都需要配置hosts文件
  1. 配置免密
# 查看ssh进程
ps - ef | grep ssh
# master
# 生成密钥ssh-keygen -t rsa 一直回车
# 复制key文件ssh-copy-id 输入yes 输入密码
ssh-keygen -t rsa
ssh-copy-id master
ssh-copy-id slave1
ssh-copy-id slave2
# slave1
ssh-keygen -t rsa
ssh-copy-id master
ssh-copy-id slave1
ssh-copy-id slave2
# slave2
ssh-keygen -t rsa
ssh-copy-id master
ssh-copy-id slave1
ssh-copy-id slave2
  1. 测试免密
ssh slave1
  1. 关闭防火墙
# master slave1 slave2
systemctl stop firewalld
systemctl disable firewalld
软件名称软件包名
scalascala-2.11.8.tar.gz
sparkspark-2.0.0-bin-hadoop2.7.tgz
jdkjdk-8u65-linux-x64.tar.gz
hadoophadoop-2.7.1.tar.gz
CentOSCentOS 7.4(mini 英文版本)

1. hadoop组件安装

测试是否安装

#master
zkServer.sh start
#slave1
zkServer.sh start
#slalve2
zkServer.sh start

#master
start-all.sh
jps

如果已经安装就不需要装了直接安装scala和spark!!!

  • 需前置 Hadoop 环境,并检查 Hadoop 环境是否可用,截图并保存结果;

规定了的话看ha配置,没规定进行下面的普通配置

[root@master software]# tar -zxvf /opt/software/hadoop-2.7.1.tar.gz -C /usr/local/src/
[root@master software]# tar -zxvf /opt/software/jdk-8u65-linux-x64.tar.gz -C /usr/local/src/
[root@master software]# cd /usr/local/src/
# 改名
[root@master src]# mv hadoop-2.7.1/ hadoop
[root@master src]# mv jdk1.8.0_65 java
[root@master src]# cd hadoop/etc/hadoop/
  • 更改配置文件
[root@master hadoop]# vi core-site.xml 
vi core-site.xml
<configuration>
        <property>
                <name>fs.defaultFS</name>
                <value>hdfs://master:9000</value>
        </property>
        <property>
                <name>hadoop.tmp.dir</name>
                <value>/usr/local/src/hadoop/tmp</value>
        </property>
</configuration>

[root@master hadoop]# vi hdfs-site.xml 
<configuration>
        <property>
               <name>mapreduce.framework.name</name>
               <value>yarn</value>
        </property>
        <property>
               <name>mapreduce.jobhistory.address</name>
               <value>master:10020</value>
        </property>
        <property>
               <name>mapreduce.jobhistory.webapp.address</name>
               <value>master:19888</value>
        </property>
</configuration>

[root@master hadoop]# vi hdfs-site.xml 
<configuration>
        <property>
                <name>dfs.replication</name>
                <value>3</value>
        </property>
        <property>
                <name>dfs.namenode.secondary.http-address</name>
                <value>slave1:50090</value>
        </property>
        <property>
                <name>dfs.namenode.name.dir</name>
                <value>file:/usr/local/src/hadoop/dfs/name</value>
        </property>
        <property>
                <name>dfs.datanode.data.dir</name>
                <value>file:/usr/local/src/hadoop/dfs/data</value>
        </property>
</configuration>

[root@master hadoop]# vi yarn-site.xml 
<configuration>
<!-- Site specific YARN configuration properties -->
        <property>
                <name>yarn.resourcemanager.hostname</name>
                <value>master</value>
        </property>
        <property>
                <name>yarn.nodemanager.aux-services</name>
                <value>mapreduce_shuffle</value>
        </property>
        <property>
                <name>yarn.nodemanager.resource.memory-mb</name>
                <value>2048</value>
        </property>
        <property>
                <name>yarn.scheduler.minimum-allocation-mb</name>
                <value>2048</value>
        </property>
        <property>
                <name>yarn.nodemanager.resource.cpu-vcores</name>
                <value>1</value>
        </property>
        <property>
                <name>yarn.nodemanager.aux-services</name>
                <value>mapreduce_shuffle</value>
        </property>
        <property>
                <name>yarn.nodemanager.resource.memory-mb</name>
                <value>2048</value>
        </property>
        <property>
                <name>yarn.scheduler.minimum-allocation-mb</name>
                <value>2048</value>
        </property>
        <property>
                <name>yarn.nodemanager.resource.cpu-vcores</name>
                <value>1</value>
        </property>
        <property>
               <name>yarn.resourcemanager.address</name>
               <value>master:8032</value>
        </property>
        <property>
               <name>yarn.resourcemanager.scheduler.address</name>
               <value>master:8030</value>
        </property>
        <property>
               <name>yarn.resourcemanager.resource-tracker.address</name>
               <value>master:8031</value>
        </property>
        <property>
               <name>yarn.resourcemanager.admin.address</name>
               <value>master:8033</value>
        </property>
        <property>
               <name>yarn.resourcemanager.webapp.address</name>
               <value>master:8088</value>
        </property>
        <property>
               <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
               <value>org.apache.hadoop.mapred.ShuffleHandler</value>
        </property>
</configuration>

[root@master hadoop]# vi hadoop-env.sh 
export JAVA_HOME=/usr/local/src/java
export HADOOP_PERFIX=/usr/local/src/hadoop
export HADOOP_OPTS="-Djava.library.path=$HADOOP_PERFIX/lib:$HADOOP_PERFIX/lib/native"

[root@master hadoop]# vi masters
master
[root@master hadoop]# vi slaves
slave1
slave2
  • 创建tmp,data和name文件夹
[root@master hadoop]# mkdir /usr/local/src/hadoop/tmp
[root@master hadoop]# mkdir -p /usr/local/src/hadoop/dfs/{name,data}
[root@master hadoop]# chown -R root:root /usr/local/src/
  • 修改环境变量
vi /root/.bashrc
export HADOOP_HOME=/usr/local/src/hadoop
export JAVA_HOME=/usr/local/src/java
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin
  • 分发
scp -r /usr/local/src/hadoop slave1:/usr/local/src
scp -r /usr/local/src/hadoop slave2:/usr/local/src

scp /root/.bashrc slave1:/root/.bashrc
scp /root/.bashrc slave2:/root/.bashrc
  • 激活
#master
source /root/.bashrc
#slave1
source /root/.bashrc
#slave2
source /root/.bashrc

2.安装scala

  • 解压 scala 安装包到"/usr/local/src"路径下,并更名为 scala,截图并保存 结果;
  • /.bash_profile对全局有效,/.bash_profile对当前用户有效
tar -zxvf /opt/software/scala-2.11.8.tgz -C /usr/local/src
mv /usr/local/src/scala-2.11.8 /usr/local/src/scala
vi /root/.bashrc

3. 设置 scala 环境变量

export SCALA_HOME=/usr/local/src/scala
export PATH=$PATH:$SCALA_HOME/bin
  • 使环境变量只对当前用户生效,截图并保存结果;
source  /root/.bashrc

4. 进入scala并截图

scala

5.安装spark

  • 解压 Spark 安装包到“etc/local/src”路径下,并更名为 spark,截图并保 存结果;
tar -zxvf /opt/software/spark-2.0.0-bin-hadoop2.7.gz  -C /usr/local/src
mv /usr/local/src/spark-2.0.0-bin-hadoop2.7 /usr/local/src/spark

6.设置spark环境变量

vi /root/.bashrc
export SPARK_HOME=/usr/local/src/spark
export PATH=$PATH:$SPARK_HOME/bin
  • 并使环境变量只对当前用户生效,截图并保存结果;
source /root/.bashrc

7.修改spark配置文件

  • 修改 Spark 参数配置,指定 Spark slave 节点,截图并保存结果;
cd /usr/local/src/spark
mv spark-env.sh.template spark-env.sh
vi /conf/spark-env.sh
export HADOOP_HOME=/usr/local/src/hadoop
export SCALA_HOME=/usr/local/src/scala
export JAVA_HOME=/usr/local/src/java
export SPARK_MASTER_IP=master
export SPARK_MASTER_PORT=7077
export SPARK_DIST_CLASSPATH=$(/usr/local/src/hadoop/bin/hadoop classpath)
export HADOOP_CONF_DIR=/usr/local/src/hadoop/etc/hadoop
export SPARK_YARN_USER_ENV="CLASSPATH=/usr/local/src/hadoop/etc/hadoop"
export YARN_CONF_DIR=/usr/local/src/hadoop/etc/hadoop
  • 指定spark slave节点,截图保存结果
vi conf/slaves
master
slave1
slave2
  • 分发spark到slave1 slave2
cd /usr/local/src
chown -R root:root spark
scp -r spark slave1:/usr/local/src
scp -r spark slave2:/usr/local/src
scp -r scala slave1:/usr/local/src
scp -r scala slave2:/usr/local/src
scp /root/.bashrc slave1:/root/
scp /root/.bashrc slave2:/root/

# master
source /root/.bashrc
# slave1
source /root/.bashrc
# slave2
source /root/.bashrc

8.启动spark

cd /usr/local/src/spark
./sbin/start-all.sh
  • 使用命令查看wenUI结果,截图并保存结果
# 地址栏输入:
master:8080
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

厨 神

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值