- 配置搭建
hadoop
集群,首先需要建立hadoop
用户
[root@server6 ~]# useradd -u 900 hadoop
[root@server6 ~]# id hadoop
uid=900(hadoop) gid=900(hadoop) groups=900(hadoop
- 然后需要解压文件并且创建软连接
[hadoop@server6 ~]$ tar -zxf hadoop-1.2.1.tar.gz
[hadoop@server6 ~]$ ln -sv hadoop-1.2.1 hadoop
`hadoop' -> `hadoop-1.2.1'
[hadoop@server6 ~]$ tar -zxf jdk-7u79-linux-x64.tar.gz
[hadoop@server6 ~]$ ln -sv jdk1.7.0_79/ java
`java' -> `jdk1.7.0_79/'
- 配置
hadoop
的环境变量
[hadoop@server6 hadoop]$ vim conf/hadoop-env.sh
9 export JAVA_HOME=/home/hadoop/java
- 按照官方文档上面的要求创建目录,并且复制内容
[hadoop@server6 hadoop]$ mkdir input
[hadoop@server6 hadoop]$ cp conf/* input/
[hadoop@server6 hadoop]$ bin/hadoop jar hadoop-examples-1.2.1.jar\
grep input ouput 'dfs[a-z.]+'
- 然后需要按照要求修改配置文件
[hadoop@server6 hadoop]$ vim conf/core-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://172.25.23.6:9000</value>
</property>
</configuration>
[hadoop@server6 hadoop]$ vim conf/hdfs-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
[hadoop@server6 hadoop]$ vim conf/mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>172.25.23.6:9001</value>
</property>
</configuration>
[hadoop@server6 hadoop]$ cat conf/masters
修改里面的localhost为
172.25.23.6
同时也需要修改这个文件
[hadoop@server6 hadoop]$ cat conf/slaves
172.25.23.6
- 接下来首先修改
hadoop
用户的密码
[root@server6 ~]# passwd hadoop
Changing password for user hadoop.
New password:
BAD PASSWORD: it is based on a dictionary word
BAD PASSWORD: is too simple
Retype new password:
passwd: all authentication tokens updated successfully.
- 然后使用
hadoop
用户生成公私钥,在执行互信认证
[hadoop@server6 hadoop]$ ssh-keygen
Generating public/private rsa key pair.
Enter file in which to save the key (/home/hadoop/.ssh/id_rsa):
Created directory '/home/hadoop/.ssh'.
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /home/hadoop/.ssh/id_rsa.
Your public key has been saved in /home/hadoop/.ssh/id_rsa.pub.
The key fingerprint is:
0c:c5:02:62:12:de:21:d4:13:eb:0b:08:9c:89:f1:0b hadoop@server6.com
The key's randomart image is:
+--[ RSA 2048]----+
|=++oo. .. |
|+==+o ... |
|E=.o. .. |
|o... o |
|.... S |
| . . |
| . |
| |
| |
+-----------------+
[hadoop@server6 hadoop]$ ssh-copy-id 172.25.23.6
[hadoop@server6 ~]$ ssh-copy-id 172.25.23.6
hadoop@172.25.23.6's password:
Now try logging into the machine, with "ssh '172.25.23.6'", and check in:
.ssh/authorized_keys
to make sure we haven't added extra keys that you weren't expecting.
- 然后启动
hadoop distributed filesystem
[hadoop@server6 bin]$ ./start-dfs.sh
starting namenode, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-namenode-server6.com.out
172.25.23.6: starting datanode, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-datanode-server6.com.out
172.25.23.6: starting secondarynamenode, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-secondarynamenode-server6.com.out
- 最后启动
jps
[hadoop@server6 ~]$ java/bin/jps
6329 SecondaryNameNode
6553 TaskTracker
6227 DataNode
6791 Jps
6446 JobTracker
- 如果需要停止上述服务的运行使用命令
[hadoop@server6 hadoop]$ bin/stop-all.sh
- 接下来需要执行
[hadoop@server6 hadoop]$ ./bin/hadoop namenode -format
18/03/24 15:36:19 INFO namenode.NameNode: STARTUP_MSG:
/************************************************************
STARTUP_MSG: Starting NameNode
STARTUP_MSG: host = server6.com/172.25.23.6
STARTUP_MSG: args = [-format]
STARTUP_MSG: version = 1.2.1
STARTUP_MSG: build = https://svn.apache.org/repos/asf/hadoop/common/branches/branch-1.2 -r 1503152; compiled by 'mattf' on Mon Jul 22 15:23:09 PDT 2013
STARTUP_MSG: java = 1.7.0_79
************************************************************/
18/03/24 15:36:20 INFO util.GSet: Computing capacity for map BlocksMap
18/03/24 15:36:20 INFO util.GSet: VM type = 64-bit
18/03/24 15:36:20 INFO util.GSet: 2.0% max memory = 1013645312
18/03/24 15:36:20 INFO util.GSet: capacity = 2^21 = 2097152 entries
18/03/24 15:36:20 INFO util.GSet: recommended=2097152, actual=2097152
18/03/24 15:36:20 INFO namenode.FSNamesystem: fsOwner=hadoop
18/03/24 15:36:20 INFO namenode.FSNamesystem: supergroup=supergroup
18/03/24 15:36:20 INFO namenode.FSNamesystem: isPermissionEnabled=true
18/03/24 15:36:20 INFO namenode.FSNamesystem: dfs.block.invalidate.limit=100
18/03/24 15:36:20 INFO namenode.FSNamesystem: isAccessTokenEnabled=false accessKeyUpdateInterval=0 min(s), accessTokenLifetime=0 min(s)
18/03/24 15:36:20 INFO namenode.FSEditLog: dfs.namenode.edits.toleration.length = 0
18/03/24 15:36:20 INFO namenode.NameNode: Caching file names occuring more than 10 times
18/03/24 15:36:21 INFO common.Storage: Image file /tmp/hadoop-hadoop/dfs/name/current/fsimage of size 112 bytes saved in 0 seconds.
18/03/24 15:36:21 INFO namenode.FSEditLog: closing edit log: position=4, editlog=/tmp/hadoop-hadoop/dfs/name/current/edits
18/03/24 15:36:21 INFO namenode.FSEditLog: close success: truncate to 4, editlog=/tmp/hadoop-hadoop/dfs/name/current/edits
18/03/24 15:36:22 INFO common.Storage: Storage directory /tmp/hadoop-hadoop/dfs/name has been successfully formatted.
18/03/24 15:36:22 INFO namenode.NameNode: SHUTDOWN_MSG:
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at server6.com/172.25.23.6
************************************************************/
- 接下来执行启动所有的服务
[hadoop@server6 hadoop]$ bin/start-all.sh
[hadoop@server6 hadoop]$ ~/java/bin/jps
7573 JobTracker
7690 TaskTracker
7242 NameNode
7805 Jps
7353 DataNode
7478 SecondaryNameNode
- 在分布式文件系统里面创建两个目录,并且查看这两个目录
[hadoop@server6 hadoop]$ bin/hadoop fs -mkdir hadooptest
[hadoop@server6 hadoop]$ bin/hadoop fs -ls
Found 1 items
drwxr-xr-x - hadoop supergroup 0 2018-03-24 15:40 /user/hadoop/hadooptest
[hadoop@server6 hadoop]$ bin/hadoop fs -mkdir hadooplinux
[hadoop@server6 hadoop]$ bin/hadoop fs -ls
Found 2 items
drwxr-xr-x - hadoop supergroup 0 2018-03-24 15:41 /user/hadoop/hadooplinux
drwxr-xr-x - hadoop supergroup 0 2018-03-24 15:40 /user/hadoop/hadooptest
- 在创建的目录里面放入新的文件
[hadoop@server6 hadoop]$ bin/hadoop fs -put conf/* hadooptest
-
通过端口
50070
可以查看到hadoop
节点的信息
-
通过端口
50030
可以得进入hadoop
管理员界面
-
同样也可以通过命令来查看,
dfs
的执行情况
[hadoop@server6 hadoop]$ bin/hadoop dfsadmin -report
Configured Capacity: 27058577408 (25.2 GB)
Present Capacity: 11108106240 (10.35 GB)
DFS Remaining: 11107684352 (10.34 GB)
DFS Used: 421888 (412 KB)
DFS Used%: 0%
Under replicated blocks: 1
Blocks with corrupt replicas: 0
Missing blocks: 0
-------------------------------------------------
Datanodes available: 1 (1 total, 0 dead)
Name: 172.25.23.6:50010
Decommission Status : Normal
Configured Capacity: 27058577408 (25.2 GB)
DFS Used: 421888 (412 KB)
Non DFS Used: 15950471168 (14.86 GB)
DFS Remaining: 11107684352(10.34 GB)
DFS Used%: 0%
DFS Remaining%: 41.05%
Last contact: Sat Mar 24 16:00:04 CST 2018
- 可以进行目录的删除活动
[hadoop@server6 hadoop]$ bin/hadoop fs -rmr output
Deleted hdfs://172.25.23.6:9000/user/hadoop/output
[hadoop@server6 hadoop]$ bin/hadoop fs -ls
Found 2 items
drwxr-xr-x - hadoop supergroup 0 2018-03-24 15:41 /user/hadoop/hadooplinux
drwxr-xr-x - hadoop supergroup 0 2018-03-24 15:43 /user/hadoop/hadooptest
- 接下来还需要两台机器搭建
hadoop集群
server7.com 172.25.23.7
server8.com 172.25.23.8
- 首先关闭各个节点上面的服务
[hadoop@server6 hadoop]$ bin/hadoop fs -rmr output
Deleted hdfs://172.25.23.6:9000/user/hadoop/output
[hadoop@server6 hadoop]$ bin/hadoop fs -ls
Found 2 items
drwxr-xr-x - hadoop supergroup 0 2018-03-24 15:41 /user/hadoop/hadooplinux
drwxr-xr-x - hadoop supergroup 0 2018-03-24 15:43 /user/hadoop/hadooptest
[hadoop@server6 hadoop]$ bin/stop-all.sh
stopping jobtracker
172.25.23.6: stopping tasktracker
stopping namenode
172.25.23.6: stopping datanode
172.25.23.6: stopping secondarynamenode
- 接下来需要修改这个配置文件
[hadoop@server6 hadoop]$ vim conf/hdfs-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
</configuration>
- 同时还需要修改
slave
文件,用于指定slave
的IP
的地址
[hadoop@server6 hadoop]$ vim conf/slaves
172.25.23.7
172.25.23.8
- 接下来需要配置各个节点
rpcbind
正常运行,因为需要使用nfs
server6.com
X11 forwarding request failed on channel 0
rpcbind (pid 1055) is running...
server7.com
X11 forwarding request failed on channel 0
rpcbind (pid 975) is running...
server8.com
X11 forwarding request failed on channel 0
rpcbind (pid 1229) is running...
- 编辑
nfs
的配置文件
[root@server6 ~]# cat /etc/exports
/home/hadoop 172.25.23.0/255.255.255.0(rw,anonuid=900,anongid=900)
- 分别在三个节点上面添加
hadoop
用户,并且指定UID=900 GID=900
;
[root@server7 ~]# useradd -u hadoop
[root@server7 ~]# id hadoop
uid=900(hadoop) gid=900(hadoop) groups=900(hadoop)
[root@server8 ~]# useradd -u hadoop
[root@server8 ~]# id hadoop
uid=900(hadoop) gid=900(hadoop) groups=900(hadoop)
- 然后需要在各个节点上面执行挂载操作,并且切换用户到
hadoop
[root@server7 ~]# mount 172.25.23.6:/home/hadoop /home/hadoop/
[root@server7 ~]# su - hadoop
[hadoop@server7 ~]$
[root@server8 ~]# mount 172.25.23.6:/home/hadoop /home/hadoop/
[root@server8 ~]# su - hadoop
[hadoop@server8 ~]$
- 确定各个节点上面是互信通信
[hadoop@server6 ~]$ ssh 172.25.23.7
The authenticity of host '172.25.23.7 (172.25.23.7)' can't be established.
RSA key fingerprint is 94:a2:20:ac:ab:30:b8:87:fc:8e:40:b1:d9:17:b5:97.
Are you sure you want to continue connecting (yes/no)? yes
Warning: Permanently added '172.25.23.7' (RSA) to the list of known hosts.
[hadoop@server6 ~]$ ssh 172.25.23.8
The authenticity of host '172.25.23.8 (172.25.23.8)' can't be established.
RSA key fingerprint is fe:43:ff:0c:00:b9:27:07:03:aa:25:d7:be:b8:bc:e3.
Are you sure you want to continue connecting (yes/no)? yes
Warning: Permanently added '172.25.23.8' (RSA) to the list of known hosts.
[hadoop@server8 ~]$ logout
Connection to 172.25.23.8 closed.
- 重新进行
servr6.com
节点的格式化
[hadoop@server6 hadoop]$ bin/hadoop namenode -format
[hadoop@server6 hadoop]$ bin/start-dfs.sh
starting namenode, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-namenode-server6.com.out
172.25.23.8: starting datanode, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-datanode-server8.com.out
172.25.23.7: starting datanode, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-datanode-server7.com.out
[hadoop@server6 hadoop]$ ~/java/bin/jps
10985 Jps
10770 NameNode
10923 SecondaryNameNode
- 这次的信息和上面的有着明显的不同,因为加入了新的节点
server7.com
以及server8.com
- 在节点
server7.com
以及server8.com
上面执行
[hadoop@server7 hadoop]$ ~/java/bin/jps
1764 Jps
1694 DataNode
[hadoop@server8 ~]$ ~/java/bin/jps
1579 Jps
1498 DataNode
- 继续在
server6.com
上面启动
[hadoop@server6 hadoop]$ bin/start-mapred.sh
starting jobtracker, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-jobtracker-server6.com.out
172.25.23.8: starting tasktracker, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-tasktracker-server8.com.out
172.25.23.7: starting tasktracker, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-tasktracker-server7.com.out
- 接下来尝试运行大的计算任务
[hadoop@server6 hadoop]$ bin/hadoop jar hadoop-examples-1.2.1.jar grep input output 'dfs[a-z.]+'
- 同时可以查看浏览器的界面
- 查看关于增加的节点的界面
- 查看关于新的执行任务的总结
[hadoop@server6 hadoop]$ bin/hadoop dfsadmin -report
Configured Capacity: 26649571328 (24.82 GB)
Present Capacity: 22909509806 (21.34 GB)
DFS Remaining: 22908887040 (21.34 GB)
DFS Used: 622766 (608.17 KB)
DFS Used%: 0%
Under replicated blocks: 0
Blocks with corrupt replicas: 0
Missing blocks: 0
-------------------------------------------------
Datanodes available: 2 (2 total, 0 dead)
Name: 172.25.23.8:50010
Decommission Status : Normal
Configured Capacity: 8042254336 (7.49 GB)
DFS Used: 311383 (304.08 KB)
Non DFS Used: 1654792105 (1.54 GB)
DFS Remaining: 6387150848(5.95 GB)
DFS Used%: 0%
DFS Remaining%: 79.42%
Last contact: Sat Mar 24 17:40:52 CST 2018
Name: 172.25.23.7:50010
Decommission Status : Normal
Configured Capacity: 18607316992 (17.33 GB)
DFS Used: 311383 (304.08 KB)
Non DFS Used: 2085269417 (1.94 GB)
DFS Remaining: 16521736192(15.39 GB)
DFS Used%: 0%
DFS Remaining%: 88.79%
Last contact: Sat Mar 24 17:40:52 CST 2018