一:centos7 yum源切换为国内
-
备份
mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup
-
安装
curl -o /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo
-
清空缓存
yum clean all
-
生成缓存
yum makecache
二:安装工具
yum -y install vim
yum -y install wget
三:配置固定IP
-
操作:
cd /etc/sysconfig/network-scripts vim ifcfg-ens33 #名字可能不一样 内容为: TYPE="Ethernet" PROXY_METHOD="none" BROWSER_ONLY="no" 修改或新增: BOOTPROTO="static" DEFROUTE="yes" IPV4_FAILURE_FATAL="no" IPV6INIT="yes" IPV6_AUTOCONF="yes" IPV6_DEFROUTE="yes" IPV6_FAILURE_FATAL="no" IPV6_ADDR_GEN_MODE="stable-privacy" NAME="ens33" UUID="18f6f7f0-1630-4a2b-9a47-071673608ce6" DEVICE="ens33" ONBOOT="yes" BROADCAST=192.168.137.255 IPADDR=192.168.137.103 NETMASK=255.255.255.0 GATEWAY=192.168.137.1 DNS1=192.168.137.1
-
保存退出后,重启网络服务:
service network restart 备注:virtualbox配置网络链接 only主机模式,网卡共享网络 NETMASK/GATEWAY 配置为网卡地址 IPADDR 配置为网段内IP
四:修改主机名
vim /etc/hostname
重启:
sync
reboot
五:配置host文件
分别在三台机器上执行:
#添加host
vim /etc/hosts
192.168.137.101 node1.spark
192.168.137.102 node2.spark
192.168.137.103 node3.spark
#禁用root账户登录,如果是用root用户登录请开启
vim /etc/ssh/sshd_config
PermitRootLogin yes
六:配置免密登录
node1执行
#生成密钥Pair,输入之后一直选择enter即可。生成的秘钥位于 ~/.ssh文件夹下
ssh-keygen -t rsa
cd .ssh/
chmod 700 ~/.ssh
cat id_rsa.pub >> authorized_keys
node2,node3执行
ssh-copy-id -i /root/.ssh/id_rsa.pub root@node1
node1执行,共享认证文件
scp /root/.ssh/authorized_keys root@node2:~/.ssh/
scp /root/.ssh/authorized_keys root@node3:~/.ssh/
重启SSHD服务:
service sshd restart
七:安装JDK环境
centos7 安装 JDK1.8
1:清理自带jdk
卸载centos原本自带的openjdk,运行命令:rpm -qa | grep java
然后通过 rpm -e --nodeps 后面跟系统自带的jdk名 这个命令来删除系统自带的jdk,
例如:rpm -e --nodeps java-1.8.0-openjdk-1.8.0.102-4.b14.el7.x86_64
2:创建自己的目录,并下载jdk1.8
首先去官网下载jdk:http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html
cd /usr/local/
mkdir -p tools/java
tar -zxvf jdk-8u11-linux-x64.tar.gz -C /usr/local/tools/java/
编辑/etc/下的profile文件,配置环境变量
vim /etc/profile
export JAVA_HOME=/usr/local/tools/java/jdk1.8.0_11
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib
export PATH=${JAVA_HOME}/bin:$PATH
使/etc/profile生效
source /etc/profile
拷贝到其他节点:
scp -r /usr/local/tools/ root@node2:/usr/local/
scp -r /usr/local/tools/ root@node3:/usr/local/
scp -r /etc/profile root@node2:/etc
scp -r /etc/profile root@node3:/etc
拷贝完记得刷新profile
八:安装Hadoop
cd /usr/local/tools & mkdir hadoop && cd hadoop
1:下载
wget http://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-3.1.1/hadoop-3.1.1.tar.gz
解压
tar -zxvf hadoop-3.1.1.tar.gz -C /usr/local/tools/hadoop/
cd /usr/local/tools/hadoop
2:修改环境变量
vim /etc/profile
export HADOOP_HOME=/usr/local/tools/hadoop/hadoop-3.1.1
export PATH=${HADOOP_HOME}/bin:$PATH
刷新配置文件
source /etc/profile
3:修改配置文件
cd /usr/local/tools/hadoop/hadoop-3.1.1/etc/hadoop
vim hadoop-env.sh
#The java implementation to use. By default, this environment
#variable is REQUIRED on ALL platforms except OS X!
#export JAVA_HOME=
export JAVA_HOME=/usr/local/tools/java/jdk1.8.0_11
vim core-site.xml
<configuration>
<!-- 指定HDFS老大(namenode)的通信地址 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://node1:9000</value>
</property>
<!-- 指定hadoop运行时产生文件的存储路径 -->
<property>
<name>hadoop.tmp.dir</name>
<value>file:/usr/local/tools/hadoop/data/tmp</value>
</property>
</configuration>
vim hdfs-site.xml
<configuration>
<!-- 设置namenode的http通讯地址 -->
<property>
<name>dfs.namenode.http-address</name>
<value>node1:50070</value>
</property>
<!-- 设置secondarynamenode的http通讯地址 -->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>node2:50090</value>
</property>
<!-- 设置namenode存放的路径 -->
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/tools/hadoop/data/name</value>
</property>
<!-- 设置hdfs副本数量 -->
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<!-- 设置datanode存放的路径 -->
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/tools/hadoop/data/datanode</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
vim mapred-site.xml
<configuration>
<!-- 通知框架MR使用YARN -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>
/usr/local/tools/hadoop/hadoop-3.1.1/etc/hadoop,
/usr/local/tools/hadoop/hadoop-3.1.1/share/hadoop/common/*,
/usr/local/tools/hadoop/hadoop-3.1.1/share/hadoop/common/lib/*,
/usr/local/tools/hadoop/hadoop-3.1.1/share/hadoop/hdfs/*,
/usr/local/tools/hadoop/hadoop-3.1.1/share/hadoop/hdfs/lib/*,
/usr/local/tools/hadoop/hadoop-3.1.1/share/hadoop/mapreduce/*,
/usr/local/tools/hadoop/hadoop-3.1.1/share/hadoop/mapreduce/lib/*,
/usr/local/tools/hadoop/hadoop-3.1.1/share/hadoop/yarn/*,
/usr/local/tools/hadoop/hadoop-3.1.1/share/hadoop/yarn/lib/*
</value>
</property>
</configuration>
vim yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandle</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>node1:8025</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>node1:8030</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>node1:8040</value>
</property>
</configuration>
touch /usr/local/tools/hadoop/hadoop-3.1.1/etc/hadoop/masters
vim /usr/local/tools/hadoop/hadoop-3.1.1/etc/hadoop/masters
添加 node2
注意:Hadoop 3.1.0中,workers文件名字为workers,不是slaves!
touch /usr/local/tools/hadoop/hadoop-3.1.1/etc/hadoop/workers
vim /usr/local/tools/hadoop/hadoop-3.1.1/etc/hadoop/workers
添加
node2
node3
在/usr/local/tools/hadoop/hadoop-3.1.1/sbin路径下:
将start-dfs.sh,stop-dfs.sh两个文件顶部添加以下参数
#!/usr/bin/env bash
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
start-yarn.sh,stop-yarn.sh顶部也需添加以下:
#!/usr/bin/env bash
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
创建文件夹
mkdir -p /usr/local/tools/hadoop/data/tmp
mkdir -p /usr/local/tools/hadoop/data/name
mkdir -p /usr/local/tools/hadoop/data/datanode
复制到其他主机
scp -r /usr/local/tools/hadoop node2:/usr/local/tools/
scp -r /usr/local/tools/hadoop node3:/usr/local/tools/
修改node2 node3 环境变量
vim /etc/profile
export HADOOP_HOME=/usr/local/tools/hadoop/hadoop-3.1.1
export PATH=$PATH:$HADOOP_HOME/bin
刷新配置文件
source /etc/profile
第一次启动得格式化
/usr/local/tools/hadoop/hadoop-3.1.1/bin/hdfs namenode -format
启动
/usr/local/tools/hadoop/hadoop-3.1.1/sbin/start-all.sh
停止
/usr/local/tools/hadoop/hadoop-3.1.1/sbin/stop-all.sh
注:将绑定IP或mpi-1改为0.0.0.0,而不是本地回环IP,这样,就能够实现外网访问本机的8088端口了。比如这里需要将yarn-site.xml中的
cd /usr/local/tools/hadoop/hadoop-3.1.1/etc/hadoop
vim yarn-site.xml
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>mpi-1:8088</value>
</property>
修改为:
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>0.0.0.0:8088</value>
</property>
测试:
http://192.168.137.101:50070/dfshealth.html#tab-datanode
测试YARN
http://192.168.137.101:8088/cluster
/usr/local/tools/hadoop/hadoop-3.1.1/sbin
vim hadoop-daemon.sh
HADOOP_PID_DIR=/root/hadoop/pid #第25行
vim yarn-daemon.sh
YARN_PID_DIR=/root/hadoop/pid
scp -r /usr/local/tools/hadoop/hadoop-3.1.1/sbin/hadoop-daemon.sh node2:/usr/local/tools/hadoop/hadoop-3.1.1/sbin/
scp -r /usr/local/tools/hadoop/hadoop-3.1.1/sbin/hadoop-daemon.sh node3:/usr/local/tools/hadoop/hadoop-3.1.1/sbin/
scp -r /usr/local/tools/hadoop/hadoop-3.1.1/sbin/yarn-daemon.sh node2:/usr/local/tools/hadoop/hadoop-3.1.1/sbin/
scp -r /usr/local/tools/hadoop/hadoop-3.1.1/sbin/yarn-daemon.sh node3:/usr/local/tools/hadoop/hadoop-3.1.1/sbin/
#执行以下命令关闭防火墙
[root@node1 ~]systemctl stop firewalld && systemctl disable firewalld
[root@node1 ~]setenforce 0
#将SELINUX的值改成disabled
[root@node1 ~]vim /etc/selinux/config
SELINUX=disabled
#重启服务器
[root@node1 ~]reboot
重置环境
rm -rf /var/lib/hadoop/
rm -rf /usr/local/tools/hadoop/hadoop-3.1.1/logs/*
rm -rf /usr/local/tools/hadoop/data/tmp
rm -rf /usr/local/tools/hadoop/data/name
rm -rf /usr/local/tools/hadoop/data/datanode
mkdir -p /usr/local/tools/hadoop/data/tmp
mkdir -p /usr/local/tools/hadoop/data/name
mkdir -p /usr/local/tools/hadoop/data/datanode