生产环境下部署hadoop集群(包括dns配置,nfs共享,awk资源分发)

生产环境中部署hadoop集群与测试环境中部署的区别主要是:

1.使用DNS而不是hosts来解析主机名

2.使用NFS共享秘钥文件,而不是手工拷贝配置

3.复制hadoop时使用脚本批量操作,而不是单个复制


第一。安装环境和节点地址划分

1.节点规划

主机名IP地址Hadoop其他服务
dns.hadoop.com192.168.80.100DNS服务器,NTP时间服务器
master.hadoop.com192.168.80.101NameNode
slave1.hadoop.com192.168.80.102DataNode
alave2.hadoop.com192.168.80.103DataNode
2.安装环境

宿主机 i3-3240 cpu,8G RAM,1T 硬盘, win7-64位旗舰版

虚拟软件 VMWare 10.0.2 build-1744117

操作系统 CentOS 7.0 x64

安装软件 jdk1.8.0_20,hadoop 2.5.1,bind9.10.1



第二。安装操作系统和软件

1.安装CentOS7.0(过程不再赘述,我安装的CLI版)

操作系统安装后请注意关闭防火墙

systemctl stop firewalld (CentOS7.0 默认firewall为防火墙,并且操作命令有所改变)

systemctl disable firewalld (禁止防火墙开机启动)

关闭selinux 

vi /etc/selinux/config

修改SELINUX=enforcing 为disabled

2.安装jdK,hadoop

我用的是rpm安装

[root@Master hadoop]# rpm -ivh jdk-8u20-linux-x64.rpm

默认安装目录是 /usr/java/jdk1.8.0_20 (这里我就不在更改目录了)
[hadoop@Master ~]$ tar -xvzf hadoop-2.5.1.tar.gz (默认解压到当前目录)

3.配置环境变量

[root@Master ~]# vi /etc/profile.d/java.sh

添加

export JAVA_HOME=/usr/java/jdk1.8.0_20

export PATH=.:$PATH:$JAVA_HOME/bin
[root@Master ~]# vi /etc/profile.d/hadoop.sh (此变量并不要求,配置只是为了操作方便)

添加

export HADOOP_HOME=/home/hadoop/hadoop-2.5.1

export PATH=.:$PATH:$HADOOP_HOME/bin

[root@Master ~]# source /etc/profile



第三。安装DNS
1.

[root@CLI ]# yum install bind bind-utils bind-libs

[root@CLI ]# yum install bind-chroot

[root@CLI ]# yum install bind-chroot 限定bind根目录

[root@CLI ]# vi /etc/rsyslog.conf

添加

$AddUnixListenSocket /var/named/chroot/dev/log

2.

[root@CLI ]# vi /etc/named.conf

options {
        listen-on port 53 { 127.0.0.1; }; 127.0.0.1修改为any
        listen-on-v6 port 53 { ::1; };
        directory       "/var/named";
        dump-file       "/var/named/data/cache_dump.db";
        statistics-file "/var/named/data/named_stats.txt";
        memstatistics-file "/var/named/data/named_mem_stats.txt";
        allow-query     { localhost; }; localhost修改为any

        /*
         - If you are building an AUTHORITATIVE DNS server, do NOT enable recursion.
         - If you are building a RECURSIVE (caching) DNS server, you need to enable
           recursion.
         - If your recursive DNS server has a public IP address, you MUST enable access
           control to limit queries to your legitimate users. Failing to do so will
           cause your server to become part of large scale DNS amplification
           attacks. Implementing BCP38 within your network would greatly
           reduce such attack surface
        */
        recursion yes;

        dnssec-enable yes;
        dnssec-validation yes;
        dnssec-lookaside auto;

        /* Path to ISC DLV key */
        bindkeys-file "/etc/named.iscdlv.key";

        managed-keys-directory "/var/named/dynamic";

        pid-file "/run/named/named.pid";
        session-keyfile "/run/named/session.key";
};

logging {
        channel default_debug {
                file "data/named.run";
                severity dynamic;
        };
};

zone "." IN {
        type hint;
        file "named.ca";
};

include "/etc/named.rfc1912.zones";
//include "/etc/named.root.key";  注释掉

3.配置named.rfc1921.zones

[root@CLI ]# vi /etc/named.rfc1921.zones

修改为

zone "hadoop.com" IN {
        type master;
        file "/var/named/chroot/var/namde/hadoop.com.zone";  //本来只写的hadoop.com.zone,可是后来发现不行运行时找不到文件、所有要吧路径写上
        allow-update { none; };
};


zone "80.168.192.in-addr.arpa" IN {
        type master;
        file "/var/named/chroot/var/namde/80.168.192.zone";   //本来只写的80.168.192.zone,可是后来发现不行运行时找不到文件、所有要吧路径写上
        allow-update { none; };
};


4.创建区域文件

需要创建正向记录文件 cli.com.zone和反向记录文件80.168.192.in-addr.zone

创建位置应位于 /var/named/chroot/var/named 目录下

[root@CLI named ]# vi 80.168.192.zone

$TTL 86400
@       IN   SOA     dns.hadoop.com. root.hadoop.com.  (
                                      1997022700 ; Serial
                                      28800      ; Refresh
                                      14400      ; Retry
                                      3600000    ; Expire
                                      86400 )    ; Minimum
@        IN      NS dns.hadoop.com.
100 IN  PTR dns.hadoop.com
101 IN  PTR master.hadoop.com
102 IN  PTR slave1.hadoop.com

103 IN  PTR slave2.hadoop.com

[root@CLI named ]# hadoop.com.zone

$TTL 1D
@       IN SOA  dns.hadoop.com.  root.hadoop.com. (
                                        20140221        ; serial
                                        1D      ; refresh
                                        1H      ; retry
                                        1W      ; expire
                                        3H )    ; minimum
@ IN NS dns.hadoop.com.
dns.hadoop.com.     IN A 192.168.80.100

master.hadoop.com.    IN A 192.168.80.101
slave1.hadoop.com.    IN A 192.168.80.102

slave2.hadoop.com.      IN A 192.168.80.103


[root@CLI named ]# chgrp named *


5。修改各节点/etc/resolv.conf

nameserver 192.168.80.100


6。检查


[root@dns ~]# namded-checkzone dns.hadoop.com /var/named/chroot/var/named/hadoop.com.zone
-bash: namded-checkzone: command not found
[root@dns ~]# named-checkzone dns.hadoop.com /var/named/chroot/var/named/hadoop.com.zone
/var/named/chroot/var/named/hadoop.com.zone:10: ignoring out-of-zone data (master.hadoop.com)
/var/named/chroot/var/named/hadoop.com.zone:11: ignoring out-of-zone data (slave1.hadoop.com)
zone dns.hadoop.com/IN: loaded serial 20140221
OK



[root@dns ~]# named-checkzone 192.168.80.100 /var/named/chroot/var/named/80.168.192.zone
zone 192.168.80.100/IN: loaded serial 1997022700
OK


7.启动服务

[root@dns ~]# systemctl restart named


测试


[root@dns ~]# nslookup dns.hadoop.com
Server:         192.168.80.100
Address:        192.168.80.100#53

Name:   dns.hadoop.com
Address: 192.168.80.100

[root@dns ~]# nslookup master.hadoop.com
Server:         192.168.80.100
Address:        192.168.80.100#53

Name:   master.hadoop.com
Address: 192.168.80.101


[root@dns ~]# nslookup slave1.hadoop.com
Server:         192.168.80.100
Address:        192.168.80.100#53

Name:   slave1.hadoop.com
Address: 192.168.80.102

[root@dns ~]# nslookup slave2.hadoop.com
Server:         192.168.80.100
Address:        192.168.80.100#53

Name:   slave2.hadoop.com
Address: 192.168.80.103


8。配置开机自动启动named


[root@dns ~]# systemctl enable named


第四。安装配置NFS


1.安装

[root@master ~]# yum install nfs-utils

2.启动服务

[root@master ~]# systemctl restart rpcbind
[root@master ~]# systemctl restart nfs
[root@master ~]# systemctl restart nfslock

3设置开机自动启动

[root@master ~]#  systemctl enable  nfs-server
[root@master ~]#  systemctl enable rpcbind
[root@master ~]# systemctl enable  nfs-lock


4。设置共享目录

[hadoop@master ~]#  mkdir share

[root@master ~]#  vi  /etc/exprots

添加:

/home/hadoop/share *(insecure,rw,async,no_root_squash)

解释

/home/hadoop/share  nfs 要共享的目录

rw为读写,ro为只读
Sync为立刻写入硬盘,rsync为优先写入缓存
No_root_squas root用户具有根目录的完全管理访问权限(这个如果不配置会造成远程root用户只读)

5。配置挂载目录

显示本地挂载点,也就是master的共享目录:

[root@master ~]# showmount -e localhost
Export list for localhost:
/home/hadoop/share *

[root@master etc]# mkdir /nfs_share/
[root@master etc]# mount -t nfs 192.168.80.101:/home/hadoop/share  /nfs_share/

用同样的方法在其它客户端(slave1,slave2)上操作
slave1:

[root@slave1 ~]# showmount -e 192.168.80.101 查看共享master目录
Export list for 192.168.80.101:
/home/hadoop/share *
[root@slave1 ~]# mkdir /nfs_share
[root@slave1 ~]# mount -t nfs 192.168.80.101:/home/hadoop/share /nfs_share/

6.设置开机自动挂载nfs共享目录

[root@master ~]# vi /etc/fstab

添加:

192.168.80.101:/home/hadoop/share  /nfs_share            nfs     defaults        1 1


#
# /etc/fstab
# Created by anaconda on Wed Oct 29 08:56:41 2014
#
# Accessible filesystems, by reference, are maintained under '/dev/disk'
# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info
#
UUID=f30c6793-da3c-47d5-9502-4f2d2765d082 /                       ext4    defaults        1 1
UUID=b6f26b57-4be7-41cc-961a-079c07834bc5 swap                    swap    defaults        0 0
192.168.80.101:/home/hadoop/share       /nfs_share                nfs     defaults        1 1

同样方法设置其他节点。


五。使用NFS做免密码登陆

1.生产秘钥对

[hadoop@master ~]$ ssh-keygen -t rsa

Generating public/private rsa key pair.
Enter file in which to save the key (/home/hadoop/.ssh/id_rsa):
Created directory '/home/hadoop/.ssh'.
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /home/hadoop/.ssh/id_rsa.
Your public key has been saved in /home/hadoop/.ssh/id_rsa.pub.
The key fingerprint is:
5d:f8:0f:50:d3:61:56:34:62:1f:46:20:3f:f6:27:db hadoop@slave1.hadoop.com
The key's randomart image is:
+--[ RSA 2048]----+
|           .o=*Oo|
|           o++= o|
|          o .+ . |
|         . +. o  |
|        S . o ...|
|             o +.|
|              o E|
|                 |
|                 |
+-----------------+

在其他节点上同样操作

2.整合authorized_keys

[hadoop@master .ssh]$ cp ~/.ssh/id_rsa.pub authorized_keys
[hadoop@master .ssh]$ ll
total 12
-rw-r--r--  1 hadoop hadoop  395 Oct 30 11:43 authorized_keys
-rw-------. 1 hadoop hadoop 1679 Oct 29 12:57 id_rsa
-rw-r--r--. 1 hadoop hadoop  395 Oct 29 12:57 id_rsa.pub

[hadoop@master .ssh]$ ssh slave1.hadoop.com cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
[hadoop@master .ssh]$ ssh slave2.hadoop.com cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys

[hadoop@master nfs_share]$ mkdir .ssh /在共享目录中建立.ssh目录
[hadoop@master nfs_share]$ cp ~/.ssh/authorized_keys /nfs_share/.ssh/authorized_keys

3.创建共享目录文件authorized_keys的软连接

在slave1上:

[hadoop@slave1 ~]$ ln -s /nfs_share/.ssh/authorized_keys  ~/.ssh/authorized_keys

在slave2上:

[hadoop@slave2 ~]$ ln -s /nfs_share/.ssh/authorized_keys  ~/.ssh/authorized_keys


4.验证

[hadoop@master ~]$ ssh slave1
Last login: Thu Oct 30 13:24:14 2014 from 192.168.80.101

其余不再验证

5.问题

如果你还是不可以免密码登陆,请特别注意,

1.你的master机器上nfs共享文件目录 /nfs_share的权限,这个目录的权限应当是744,也就是说只允许拥有者对其具有w权限。

2.master机器上authorized_keys的权限应当是644. .ssh目录权限应当是700


第八。安装hadoop系统。(请先在各节点安装jdk)

1.解压 hadoop

2.修改配置文件

core-site.xml

    <configuration>  
        <property>  
            <name>hadoop.tmp.dir</name>  
            <value>/home/hadoop/tmp</value>  
            <description>Abase for other temporary directories.</description>  
        </property>  
        <property>  
            <name>fs.defaultFS</name>  
            <value>hdfs://master.hadoop.com:9000</value>  
        </property>  
        <property>  
            <name>io.file.buffer.size</name>  
            <value>4096</value>  
        </property>  
    </configuration>  

hdfs-site.xml

    <configuration>  
        <property>  
            <name>dfs.nameservices</name>  
            <value>hadoop-cluster1</value>  
        </property>  
        <property>  
            <name>dfs.namenode.secondary.http-address</name>  
            <value>master.hadoop.com:50090</value>  
        </property>  
        <property>  
            <name>dfs.namenode.name.dir</name>  
            <value>file:///home/hadoop/dfs/name</value>  
        </property>  
        <property>  
            <name>dfs.datanode.data.dir</name>  
            <value>file:///home/hadoop/dfs/data</value>  
        </property>  
        <property>  
            <name>dfs.replication</name>  
            <value>2</value>  
        </property>  
        <property>  
            <name>dfs.webhdfs.enabled</name>  
            <value>true</value>  
        </property>  
    </configuration>  

mapred-site.xml

<pre name="code" class="html"><span style="font-size:18px;"><configuration>  </span>

<property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <name>mapreduce.jobtracker.http.address</name> <value>master.hadoop.com:50030</value> </property> <property> <name>mapreduce.jobhistory.address</name> <value>master.hadoop.com:10020</value> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>master.hadoop.com:19888</value> </property>
<span style="font-size:18px;"></configuration>  </span>

 

yarn-site.xml

<span style="font-size:18px;">    <configuration>  
      
    <!-- Site specific YARN configuration properties -->  
        <property>  
            <name>yarn.nodemanager.aux-services</name>  
            <value>mapreduce_shuffle</value>  
        </property>  
        <property>  
            <name>yarn.resourcemanager.address</name>  
            <value>master.hadoop.com:8032</value>  
        </property>  
        <property>  
            <name>yarn.resourcemanager.scheduler.address</name>  
            <value>master.hadoop.com:8030</value>  
        </property>  
        <property>  
            <name>yarn.resourcemanager.resource-tracker.address</name>  
            <value>master.hadoop.com:8031</value>  
        </property>  
        <property>  
            <name>yarn.resourcemanager.admin.address</name>  
            <value>master.hadoop.com:8033</value>  
        </property>  
        <property>  
            <name>yarn.resourcemanager.webapp.address</name>  
            <value>master.hadoop.com:8088</value>  
        </property>  
    </configuration>  </span>

slaves

slave1.hadoop.com

slave2.hadoop.com


hadoop-env.sh

修改

export  JAVA_HOME=/usr/java/jdk1.8.0_20


yarn-env.sh

修改(注意解除注释)

export  JAVA_HOME=/usr/java/jdk1.8.0_20


3.使用awk生成分发脚本

[hadoop@master hadoop]$ cat slaves | awk '{print "scp -rp hadoop-2.5.1 hadoop@"$1":/home/hadoop"}' > ~/cphadoop.sh
[hadoop@master ~]$ chmod 755 cphadoop.sh

4.分发

[hadoop@master ~]$  ./cphadoop.sh


5.格式化namenode

hadooop namenode -format

6.启动hadoop

start-dfs.sh

start-yarn.sh













































  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值