hadoop环境配置

一:centos7 yum源切换为国内

  1. 备份

    mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup
    
  2. 安装

    curl -o /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo
    
  3. 清空缓存

    yum clean all 
    
  4. 生成缓存

    yum makecache
    

二:安装工具

yum -y install vim 
yum -y install wget

三:配置固定IP

  1. 操作:

    cd /etc/sysconfig/network-scripts 
    vim ifcfg-ens33 #名字可能不一样
    内容为:
    TYPE="Ethernet"
    PROXY_METHOD="none"
    BROWSER_ONLY="no"
    修改或新增:
    BOOTPROTO="static"
    DEFROUTE="yes"
    IPV4_FAILURE_FATAL="no"
    IPV6INIT="yes"
    IPV6_AUTOCONF="yes"
    IPV6_DEFROUTE="yes"
    IPV6_FAILURE_FATAL="no"
    IPV6_ADDR_GEN_MODE="stable-privacy"
    NAME="ens33"
    UUID="18f6f7f0-1630-4a2b-9a47-071673608ce6"
    DEVICE="ens33"
    ONBOOT="yes"
    BROADCAST=192.168.137.255
    IPADDR=192.168.137.103
    NETMASK=255.255.255.0
    GATEWAY=192.168.137.1
    DNS1=192.168.137.1
    
  2. 保存退出后,重启网络服务:

    service network restart
    
    备注:virtualbox配置网络链接
    only主机模式,网卡共享网络
    NETMASK/GATEWAY  配置为网卡地址
    IPADDR	配置为网段内IP
    

四:修改主机名

	vim /etc/hostname
	重启:
	sync
	reboot

五:配置host文件

	分别在三台机器上执行:
	#添加host
	vim /etc/hosts
	192.168.137.101 node1.spark
	192.168.137.102 node2.spark
	192.168.137.103 node3.spark
	
	#禁用root账户登录,如果是用root用户登录请开启
	vim /etc/ssh/sshd_config
	PermitRootLogin yes

六:配置免密登录

	node1执行
	#生成密钥Pair,输入之后一直选择enter即可。生成的秘钥位于 ~/.ssh文件夹下
	ssh-keygen -t rsa 
	cd .ssh/
	chmod 700 ~/.ssh
	cat id_rsa.pub >> authorized_keys
	
	node2,node3执行
	ssh-copy-id -i /root/.ssh/id_rsa.pub root@node1
	
	node1执行,共享认证文件
	scp /root/.ssh/authorized_keys root@node2:~/.ssh/
	scp /root/.ssh/authorized_keys root@node3:~/.ssh/
	
	重启SSHD服务:
	service sshd restart

七:安装JDK环境

centos7 安装 JDK1.8

1:清理自带jdk

	卸载centos原本自带的openjdk,运行命令:rpm -qa | grep java
	然后通过    rpm -e --nodeps   后面跟系统自带的jdk名    这个命令来删除系统自带的jdk,
	例如:rpm -e --nodeps java-1.8.0-openjdk-1.8.0.102-4.b14.el7.x86_64

2:创建自己的目录,并下载jdk1.8

首先去官网下载jdk:http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html
cd /usr/local/
mkdir -p tools/java
tar -zxvf jdk-8u11-linux-x64.tar.gz -C /usr/local/tools/java/ 

编辑/etc/下的profile文件,配置环境变量
vim /etc/profile
export JAVA_HOME=/usr/local/tools/java/jdk1.8.0_11
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib
export PATH=${JAVA_HOME}/bin:$PATH
使/etc/profile生效
source /etc/profile

拷贝到其他节点:
scp -r /usr/local/tools/ root@node2:/usr/local/
scp -r /usr/local/tools/ root@node3:/usr/local/
scp -r /etc/profile root@node2:/etc
scp -r /etc/profile root@node3:/etc

拷贝完记得刷新profile

八:安装Hadoop

cd /usr/local/tools & mkdir hadoop && cd hadoop

1:下载

wget http://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-3.1.1/hadoop-3.1.1.tar.gz
解压
tar -zxvf hadoop-3.1.1.tar.gz -C /usr/local/tools/hadoop/
cd /usr/local/tools/hadoop 

2:修改环境变量

vim /etc/profile

export HADOOP_HOME=/usr/local/tools/hadoop/hadoop-3.1.1
export PATH=${HADOOP_HOME}/bin:$PATH

刷新配置文件
source /etc/profile

3:修改配置文件

cd /usr/local/tools/hadoop/hadoop-3.1.1/etc/hadoop

vim hadoop-env.sh
#The java implementation to use. By default, this environment
#variable is REQUIRED on ALL platforms except OS X!
#export JAVA_HOME=
export JAVA_HOME=/usr/local/tools/java/jdk1.8.0_11
vim core-site.xml
<configuration>
    <!-- 指定HDFS老大(namenode)的通信地址 -->
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://node1:9000</value>
    </property>
    <!-- 指定hadoop运行时产生文件的存储路径 -->
    <property>
        <name>hadoop.tmp.dir</name>
        <value>file:/usr/local/tools/hadoop/data/tmp</value>
    </property>
</configuration>
vim hdfs-site.xml
<configuration>
    <!-- 设置namenode的http通讯地址 -->
    <property>
        <name>dfs.namenode.http-address</name>
        <value>node1:50070</value>
    </property>
    <!-- 设置secondarynamenode的http通讯地址 -->
    <property>
        <name>dfs.namenode.secondary.http-address</name>
        <value>node2:50090</value>
    </property>
    <!-- 设置namenode存放的路径 -->
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>file:/usr/local/tools/hadoop/data/name</value>
    </property>
    <!-- 设置hdfs副本数量 -->
    <property>
        <name>dfs.replication</name>
        <value>2</value>
    </property>
    <!-- 设置datanode存放的路径 -->
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>file:/usr/local/tools/hadoop/data/datanode</value>
    </property>
    
    <property>
        <name>dfs.permissions</name>
        <value>false</value>
    </property>
</configuration>
vim mapred-site.xml
<configuration>
    <!-- 通知框架MR使用YARN -->
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <property>  
        <name>mapreduce.application.classpath</name>  
        <value>  
        /usr/local/tools/hadoop/hadoop-3.1.1/etc/hadoop,  
        /usr/local/tools/hadoop/hadoop-3.1.1/share/hadoop/common/*,  
        /usr/local/tools/hadoop/hadoop-3.1.1/share/hadoop/common/lib/*,  
        /usr/local/tools/hadoop/hadoop-3.1.1/share/hadoop/hdfs/*,  
        /usr/local/tools/hadoop/hadoop-3.1.1/share/hadoop/hdfs/lib/*,  
        /usr/local/tools/hadoop/hadoop-3.1.1/share/hadoop/mapreduce/*,  
        /usr/local/tools/hadoop/hadoop-3.1.1/share/hadoop/mapreduce/lib/*,  
        /usr/local/tools/hadoop/hadoop-3.1.1/share/hadoop/yarn/*,  
        /usr/local/tools/hadoop/hadoop-3.1.1/share/hadoop/yarn/lib/*  
        </value>  
    </property>
</configuration>
vim yarn-site.xml
<configuration>
    <property>  
        <name>yarn.nodemanager.aux-services</name>  
        <value>mapreduce_shuffle</value>  
    </property>  
    <property>  
        <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>  
        <value>org.apache.hadoop.mapred.ShuffleHandle</value>  
    </property>  
    <property>  
        <name>yarn.resourcemanager.resource-tracker.address</name>  
        <value>node1:8025</value>  
    </property>  
    <property>  
        <name>yarn.resourcemanager.scheduler.address</name>  
        <value>node1:8030</value>  
    </property>  
    <property>  
        <name>yarn.resourcemanager.address</name>  
        <value>node1:8040</value>  
    </property>  
</configuration>
touch /usr/local/tools/hadoop/hadoop-3.1.1/etc/hadoop/masters
vim /usr/local/tools/hadoop/hadoop-3.1.1/etc/hadoop/masters
添加 node2

注意:Hadoop 3.1.0中,workers文件名字为workers,不是slaves!
touch /usr/local/tools/hadoop/hadoop-3.1.1/etc/hadoop/workers
vim /usr/local/tools/hadoop/hadoop-3.1.1/etc/hadoop/workers
添加
node2
node3

在/usr/local/tools/hadoop/hadoop-3.1.1/sbin路径下: 

将start-dfs.sh,stop-dfs.sh两个文件顶部添加以下参数
#!/usr/bin/env bash
HDFS_DATANODE_USER=root  
HDFS_DATANODE_SECURE_USER=hdfs  
HDFS_NAMENODE_USER=root  
HDFS_SECONDARYNAMENODE_USER=root 

start-yarn.sh,stop-yarn.sh顶部也需添加以下:
#!/usr/bin/env bash
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root

创建文件夹
mkdir -p /usr/local/tools/hadoop/data/tmp
mkdir -p /usr/local/tools/hadoop/data/name
mkdir -p /usr/local/tools/hadoop/data/datanode

复制到其他主机
scp -r /usr/local/tools/hadoop node2:/usr/local/tools/
scp -r /usr/local/tools/hadoop node3:/usr/local/tools/

修改node2 node3 环境变量
vim /etc/profile

export HADOOP_HOME=/usr/local/tools/hadoop/hadoop-3.1.1
export PATH=$PATH:$HADOOP_HOME/bin

刷新配置文件
source /etc/profile

第一次启动得格式化
/usr/local/tools/hadoop/hadoop-3.1.1/bin/hdfs namenode -format
启动 
/usr/local/tools/hadoop/hadoop-3.1.1/sbin/start-all.sh
停止
/usr/local/tools/hadoop/hadoop-3.1.1/sbin/stop-all.sh

注:将绑定IP或mpi-1改为0.0.0.0,而不是本地回环IP,这样,就能够实现外网访问本机的8088端口了。比如这里需要将yarn-site.xml中的

cd /usr/local/tools/hadoop/hadoop-3.1.1/etc/hadoop

vim yarn-site.xml

<property>
       <name>yarn.resourcemanager.webapp.address</name>
       <value>mpi-1:8088</value>
 </property>
修改为:
 <property>
     <name>yarn.resourcemanager.webapp.address</name>
     <value>0.0.0.0:8088</value>
 </property>
测试:
http://192.168.137.101:50070/dfshealth.html#tab-datanode
测试YARN 
http://192.168.137.101:8088/cluster


/usr/local/tools/hadoop/hadoop-3.1.1/sbin
vim hadoop-daemon.sh
HADOOP_PID_DIR=/root/hadoop/pid #第25行
vim yarn-daemon.sh
YARN_PID_DIR=/root/hadoop/pid

scp -r /usr/local/tools/hadoop/hadoop-3.1.1/sbin/hadoop-daemon.sh  node2:/usr/local/tools/hadoop/hadoop-3.1.1/sbin/
scp -r /usr/local/tools/hadoop/hadoop-3.1.1/sbin/hadoop-daemon.sh  node3:/usr/local/tools/hadoop/hadoop-3.1.1/sbin/
scp -r /usr/local/tools/hadoop/hadoop-3.1.1/sbin/yarn-daemon.sh  node2:/usr/local/tools/hadoop/hadoop-3.1.1/sbin/
scp -r /usr/local/tools/hadoop/hadoop-3.1.1/sbin/yarn-daemon.sh  node3:/usr/local/tools/hadoop/hadoop-3.1.1/sbin/

#执行以下命令关闭防火墙
[root@node1 ~]systemctl stop firewalld && systemctl disable firewalld
[root@node1 ~]setenforce 0

#将SELINUX的值改成disabled
[root@node1 ~]vim /etc/selinux/config

SELINUX=disabled

#重启服务器
[root@node1 ~]reboot

重置环境
rm -rf /var/lib/hadoop/

rm -rf /usr/local/tools/hadoop/hadoop-3.1.1/logs/*
rm -rf /usr/local/tools/hadoop/data/tmp
rm -rf /usr/local/tools/hadoop/data/name
rm -rf /usr/local/tools/hadoop/data/datanode

mkdir -p /usr/local/tools/hadoop/data/tmp
mkdir -p /usr/local/tools/hadoop/data/name
mkdir -p /usr/local/tools/hadoop/data/datanode
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值