Hadoop-集群搭建

一、虚拟机设置

1.  搭建虚拟机

2. 确定hostname (master, slave1 salve2)

3. 设置网络

4. 设置hosts

vi /etc/hosts

10.211.55.10 master
10.211.55.9 slave1
10.211.55.8 slave2

5. 关闭防火墙

service iptables stop

chkconfig iptables off

6. 关闭selinux

vi /etc/selinux/config

7. 配置ssh免密登录(使用hadoop用户wachoo)

ssh-keygen -t rsa

ssh-copy-id wachoo@master
ssh-copy-id wachoo@slave1
ssh-copy-id wachoo@slave2

二、JDK1.8安装

1. 下载安装包

scp software/jdk-8u121-linux-x64.rpm wachoo@master:/home/wachoo/software

2. 安装

sudo rpm -ivh jdk-8u121-linux-x64.rpm

3. 设置环境变量

sudo vi ~/.bashrc

export JAVA_HOME=/usr/java/jdk1.8.0_121
export PATH=$PATH:$JAVA_HOME/bin

三、安装Hadoop

1. 下载、解压安装包

Index of /dist/hadoop/core/hadoop-3.1.1

sudo wget https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/core/hadoop-3.1.3/hadoop-3.1.3.tar.gz

tar -xvf hadoop-3.1.3.tar.gz -C /usr/hadoop/

2. 配置hadoop环境变量

vi /etc/profile
#在配置文件最后一行添加如下配置
 
# HADOOP_HOME
export HADOOP_HOME=/usr/hadoop/hadoop-3.1.3
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

# 运行如下命令刷新环境变量
source /etc/profile

# 进行测试是否成功
hadoop version

3. 修改配置文件

#在/usr/hadoop目录下创建目录

mkdir /usr/hadoop/tmp  /usr/hadoop/mr
mkdir -p /usr/hadoop/hdfs/namenode  /usr/hadoop/hdfs/datanode
mkdir -p /usr/hadoop/yarn/nodemanager  /usr/hadoop/yarn/logs
cd $HADOOP_HOME/etc/hadoop

# 修改文件如下:
# workers hadoop-env.sh
# core-site.xml hdfs-site.xml yarn-site.xml mapred-site.xml

#1. 删除localhost,添加从节点主机名,例如:
vi workers

slave1
slave2

#2. 在 #JAVA_HOME=/usr/java/testing hdfs dfs -ls一行下面添加如下代码
vi hadoop-env.sh

export JAVA_HOME=/usr/java/jdk1.8.0_121
export HADOOP_HOME=/usr/hadoop/hadoop-3.1.3
export HDFS_NAMENODE_USER=wachoo
export HDFS_DATANODE_USER=wachoo
export HDFS_SECONDARYNAMENODE_USER=wachoo
export YARN_RESOURCEMANAGER_USER=wachoo
export YARN_NODEMANAGER_USER=wachoo

#3.修改core-site.xml
vi core-site.xml

<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://master:9000</value>
        <description>namenode节点地址与端口</description>
    </property>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/usr/hadoop/tmp</value>
        <description>临时文件存储目录</description>
    </property>
</configuration>

#4.修改hdfs-site.xml
vi hdfs-site.xml

<configuration>
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>file:/usr/hadoop/hdfs/namenode</value>
        <description>
            Path on the local filesystem where theNameNode stores the namespace and transactions logs persistently.
        </description>
    </property>
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>file:/usr/hadoop/hdfs/datanode</value>
        <description>
            Comma separated list of paths on the localfilesystem of a DataNode where it should store itsblocks.
        </description>
    </property>
    <property>
        <name>dfs.namenode.secondary.http-address</name>
        <value>master:9001</value>
    </property>
    <property>
        <name>dfs.replication</name>
        <value>2</value>
    </property>
    <property>
        <name>dfs.permissions.enabled</name>
        <value>false</value>
        <description>need not permissions</description>
    </property>
</configuration>

#5.修改yarn-site.xml
vi yarn-site.xml

<configuration>
    <!-- Site specific YARN configuration properties -->
    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>master</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.scheduler.minimum-allocation-mb</name>
        <value>512</value>
    </property>
    <property>
        <name>yarn.scheduler.maximum-allocation-mb</name>
        <value>512</value>
    </property>
    <property>
        <name>yarn.nodemanager.local-dirs</name>
        <value>file:/usr/hadoop/yarn/nodemanager</value>
    </property>
    <property>
        <name>yarn.nodemanager.log-dirs</name>
        <value>file:/usr/hadoop/yarn/logs</value>
    </property>
</configuration>


#6.修改mapred-site.xml
vi mapred-site.xml

<configuration>
    <property>
          <name>mapreduce.framework.name</name>
          <value>yarn</value>
    </property>
    <property>
         <name>mapreduce.map.memory.mb</name>
         <value>512</value>
   </property>
   <property>
         <name>mapreduce.map.java.opts</name>
         <value>-Xmx480M</value>
   </property>
   <property>
         <name>mapreduce.reduce.memory.mb</name>
         <value>512</value>
   </property>
   <property>
         <name>mapreduce.reduce.java.opts</name>
         <value>-Xmx480M</value>
   </property>
   <property>
         <name>yarn.app.mapreduce.am.env</name>
         <value>HADOOP_MAPRED_HOME=/usr/hadoop/hadoop-3.1.3</value>
   </property>
   <property>
         <name>mapreduce.map.env</name>
         <value>HADOOP_MAPRED_HOME=/usr/hadoop/hadoop-3.1.3</value>
   </property>
   <property>
         <name>mapreduce.reduce.env</name>
         <value>HADOOP_MAPRED_HOME=/usr/hadoop/hadoop-3.1.3</value>
   </property>
   <property>
         <name>mapreduce.map.memory.mb</name>
         <value>512</value>
   </property>
   <property>
         <name>mapreduce.reduce.memory.mb</name>
         <value>512</value>
   </property>
   <property>
         <name>mapreduce.application.classpath</name>
          <value>
             /usr/hadoop/hadoop-3.1.3/etc/hadoop,
             /usr/hadoop/hadoop-3.1.3/share/hadoop/common/*,
             /usr/hadoop/hadoop-3.1.3/share/hadoop/common/lib/*,
             /usr/hadoop/hadoop-3.1.3/share/hadoop/hdfs/*,
             /usr/hadoop/hadoop-3.1.3/share/hadoop/hdfs/lib/*,
             /usr/hadoop/hadoop-3.1.3/share/hadoop/mapreduce/*,
             /usr/hadoop/hadoop-3.1.3/share/hadoop/mapreduce/lib/*,
             /usr/hadoop/hadoop-3.1.3/share/hadoop/yarn/*,
             /usr/hadoop/hadoop-3.1.3/share/hadoop/yarn/lib/*
          </value>
          <description>设置MapReduce资源调度类路径,如果不设置可能会报错</description>
    </property>
</configuration>

4. 启动(master)

#1. 格式化

hdfs namenode -format

# cd  $HADOOP_HOME/sbin
#2. 启动HDFS

start-dfs.sh

#3. 启动YARN

start-yarn.sh

# 查看进程
jps

5. 验证

hdfs fds -ls  /


cd /hadoop-3.1.3/share/hadoop/mapreduce

hadoop jar hadoop-mapreduce-examples-3.1.3.jar pi 5 12

6. 重启清除

# cd  $HADOOP_HOME/sbin
#2. 启动HDFS
stop-all.sh

rm -rf /usr/hadoop/tmp/*

hadoop namenode -format

start-all.sh

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值