Hadoop部署

一、基础配置

关闭防火墙及SELinux(所有服务器)

[root@master ~]# systemctl stop firewalld
[root@master ~]# setenforce 0
setenforce: SELinux is disabled
[root@master ~]# vi /etc/hosts   (添加映射)
192.168.200.98 master
192.168.200.99 slave1
192.168.200.100 slave2
192.168.200.110 slave3

配置ssh免密

[root@master ~]# ssh-keygen -t rsa     (一路回车)
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa): 
Enter passphrase (empty for no passphrase): 
Enter same passphrase again: 
Your identification has been saved in /root/.ssh/id_rsa.
#####部分省略
[root@master ~]# ssh-copy-id master
[root@master ~]# ssh-copy-id slave1
[root@master ~]# ssh-copy-id slave2
[root@master ~]# ssh-copy-id slave3
[root@master ~]# scp /etc/hosts root@192.168.200.99:/etc/
[root@master ~]# scp /etc/hosts root@192.168.200.100:/etc/
[root@master ~]# scp /etc/hosts root@192.168.200.110:/etc/

二、安装服务过程(全分布)

安装JDK

[root@master opt]# ll
-rw-r--r-- 1 root root 212046774 629 2019 hadoop-2.7.2.tar.gz
-rw-r--r-- 1 root root 185540433 629 2019 jdk-8u131-linux-x64.tar.gz
[root@master opt]# tar xf jdk-8u131-linux-x64.tar.gz
[root@master opt]# vi /etc/profile
#JAVA
export JAVA_HOME=/opt/jdk1.8.0_131
export PATH=$PATH:$JAVA_HOME/bin
[root@master opt]# source !$
[root@master opt]# java -version
java version "1.8.0_131"
Java(TM) SE Runtime Environment (build 1.8.0_131-b11)
Java HotSpot(TM) 64-Bit Server VM (build 25.131-b11, mixed mode)

####相同步骤复制到各节点,部分内容省略
[root@master opt]# scp -r jdk1.8.0_131/ slave1:/opt/
[root@master opt]# scp /etc/profile slave1:/etc/

安装Hadoop

[root@master opt]# tar xf hadoop-2.7.2.tar.gz
[root@master opt]# cd hadoop-2.7.2/etc/hadoop/
[root@master hadoop]# vi core-site.xml 
<configuration>

        <property>
                 <name>fs.defaultFS</name>
                <value>hdfs://master:8020</value>
        </property>

        <property>
                <name>io.file.buffer.size</name>
                <value>4096</value>
        </property>
        <property>
                <name>hadoop.tmp.dir</name>
                <value>/opt/hadoop-2.7.2/data</value>

        </property>
        <property> 
                 <name>hadoop.http.staticuser.user</name>
                <value>root</value> 
        </property>
</configuration>

修改hdfs-site.xml

[root@master hadoop]# vi hdfs-site.xml
<configuration>
                <property>     配置副本因子
                        <name>dfs.replication</name>
                         <value>3</value>
                </property>

                <property>       配置块大小
                        <name>dfs.block.size</name>
                        <value>134217728</value>
                </property>

                <property>         配置元数据的存储位置
                         <name>dfs.namenode.name.dir</name>
                         <value>file:///opt/data/dfs/name</value>
                </property>

                <property>          配置datanode数据存放位置
                        <name>dfs.datanode.data.dir</name>
                        <value>/opt/data/dfs/data</value>
                </property>

                <property>          配置fs检测目录存放位置
                           <name>fs.checkpoint.dir</name>
                          <value>/opt/data/checkpoint/dfs/cname</value>
                </property>

                <property>       配置hdfs的namenode的web ui地址
                          <name>dfs.http.address</name>
                          <value>master:50070</value>
                </property>

                <property>           配置dfs的SNN的web ui地址
                           <name>dfs.secondary.http.address</name>
                           <value>slave1:50090</value>
                </property>

                <property>        是否开启web操作dfs
                           <name>dfs.webhdfs.enabled</name>
                           <value>true</value>
                </property>

                <property>          是否启用hdfs的权限
                          <name>dfs.permissions</name>
			              <value>false</value>
                </property>
</configuration>

修改mapred-site.xml

[root@master hadoop]# vi mapred-site.xml

<configuration>

			<property>       指定mapreduce运行框架
          			<name>mapreduce.framework.name</name>
          			<value>yarn</value>
        			  <final>true</final>
			</property>

				<property>         历史服务的通信地址
       				  <name>mapreduce.jobhistory.address</name>
     				   <value>master:10020</value>
				</property>

				<property>           历史服务的web ui通信地址
          				 <name>mapreduce.jobhistory.webapp.address</name>
          				 <value>master:19888</value>
				</property>

</configuration>

修改yarn-site.xml

[root@master hadoop]# vi yarn-site.xml

<configuration>

					<property>          指定resourcemanager所在的主机名
  								 <name>yarn.resourcemanager.hostname</name>
  								 <value>master</value>
					</property>
			
					<property>        指定mapreduce的shuffle
  							 <name>yarn.nodemanager.aux-services</name>
  							 <value>mapreduce_shuffle</value>
					</property>

					<property>          指定resourcemanager内部通信地址
 				 				 <name>yarn.resourcemanager.address</name>
				   				<value>master:8032</value>
					</property>

					<property>           指定scheduler的内部通信地址
					   					<name>yarn.resourcemanager.scheduler.address</name>
  								 <value>master:8030</value>
					</property>

					<property>         指定rm的resource-tracker的内部通信地址
  								 <name>yarn.resourcemanager.resource-tracker.address</name>
 								  <value>master:8031</value>
					</property>

					<property>       指定rm的admin的内部通信地址
 								  <name>yarn.resourcemanager.admin.address</name>
 									<value>master:8033</value>
					</property>

					<property>  指定rm的web ui地址
  								 <name>yarn.resourcemanager.webapp.address</name>
								   <value>master:8088</value>
					</property>

					<property>   <!-- 开启日志聚集功能 -->
    									<name>yarn.log-aggregation-enable</name>
									    <value>true</value>
					</property>

					<property>   <!-- 设置日志聚集服务器地址 -->
 								   <name>yarn.log.server.url</name>
   								 <value>http://master:19888/jobhistory/logs</value>
					</property>

					<property>   <!-- 设置日志保留时间为 7 天 -->
    									<name>yarn.log-aggregation.retain-seconds</name>
  								  <value>604800</value>
					</property>
</configuration>

修改hadoop-env.sh

[root@master hadoop]# vi hadoop-env.sh 
export JAVA_HOME=/opt/jdk1.8.0_131

添加节点信息,可写主机名

[root@master hadoop]# vi slaves 
192.168.200.98
192.168.200.99
192.168.200.100
192.168.200.110

设置Hadoop变量

[root@master hadoop]# vi /etc/profile
#HADOOP
export HADOOP_HOME=/opt/hadoop-2.7.2
export PATH=$HADOOP_HOME/bin:$JAVA_HOME/bin:$PATH

将修改完配置的Hadoop拷贝到Slave节点*

[root@master opt]# scp -r hadoop-2.7.2 slave1:/opt/
[root@master opt]# scp -r hadoop-2.7.2 slave2:/opt/
[root@master opt]# scp -r hadoop-2.7.2 slave3:/opt/

格式化Namenode

[root@master hadoop]# ./hadoop namenode -format
#出现这条说明格式化成功
**INFO common.Storage: Storage directory /opt/data/dfs/name has been successfully formatted.**
部分省略.......

三、启动方式

1、全启动

[root@master sbin]# ./start-all.sh 
This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh
Starting namenodes on [master]
master: starting namenode, logging to /opt/hadoop-2.7.2/logs/hadoop-root-namenode-master.out
192.168.200.98: starting datanode, logging to /opt/hadoop-2.7.2/logs/hadoop-root-datanode-master.out
192.168.200.99: starting datanode, logging to /opt/hadoop-2.7.2/logs/hadoop-root-datanode-slave1.out
192.168.200.100: starting datanode, logging to /opt/hadoop-2.7.2/logs/hadoop-root-datanode-slave2.out
192.168.200.110: starting datanode, logging to /opt/hadoop-2.7.2/logs/hadoop-root-datanode-slave3.out
Starting secondary namenodes [slave1]
slave1: starting secondarynamenode, logging to /opt/hadoop-2.7.2/logs/hadoop-root-secondarynamenode-slave1.out
starting yarn daemons
starting resourcemanager, logging to /opt/hadoop-2.7.2/logs/yarn-root-resourcemanager-master.out
192.168.200.98: starting nodemanager, logging to /opt/hadoop-2.7.2/logs/yarn-root-nodemanager-master.out
192.168.200.99: starting nodemanager, logging to /opt/hadoop-2.7.2/logs/yarn-root-nodemanager-slave1.out
192.168.200.110: starting nodemanager, logging to /opt/hadoop-2.7.2/logs/yarn-root-nodemanager-slave3.out
192.168.200.100: starting nodemanager, logging to /opt/hadoop-2.7.2/logs/yarn-root-nodemanager-slave2.out

2、模式启动

[root@master sbin]# ./start-dfs.sh 
[root@master sbin]# ./start-yarn.sh

3、进程启动

[root@master sbin]# hadoop-daemon.sh start namenode
[root@master sbin]# hadoop-daemons.sh start datanode
[root@master sbin]# yarn-daemon.sh start namenode
[root@master sbin]# yarn-daemons.sh start datanode

四、测试

访问HDFS 50070页面
在这里插入图片描述

访问YARN 8088 页面
在这里插入图片描述

五、命令行

1、HDFS

[root@master hadoop]# hadoop fs -mkdir /test    #HDFS创建目录
[root@master hadoop]# hadoop fs -ls /
Found 1 items
drwxr-xr-x   - root supergroup          0 2022-12-09 00:00 /test
[root@master hadoop]# hadoop fs -put /opt/hadoop-2.7.2/etc/hadoop/core-site.xml /test/  #上传文件至HDFS
[root@master hadoop]# hadoop fs -ls /test/
Found 1 items
-rw-r--r--   3 root supergroup        441 2022-12-09 00:01 /test/core-site.xml
[root@master hadoop]# hadoop fs -du -s -h  /test/core-site.xml    #查看文件占用
441  /test/core-site.xml

命令行操作可参照官方地址:https://hadoop.apache.org/docs/r1.0.4/cn/commands_manual.html

2、YARN

[root@master hadoop]# yarn application -list   #查看任务
22/12/09 00:05:40 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.200.98:8032
Total number of applications (application-types: [] and states: [SUBMITTED, ACCEPTED, RUNNING]):0
                Application-Id	    Application-Name	    Application-Type	      User	     Queue	             State	       Final-State	       Progress	                       Tracking-URL
 
 [root@master hadoop]# yarn node -list -all    #查看节点状态
22/12/09 00:09:33 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.200.98:8032
Total Nodes:4
         Node-Id	     Node-State	Node-Http-Address	Number-of-Running-Containers
    slave3:36736	        RUNNING	      slave3:8042	                           0
    slave2:33586	        RUNNING	      slave2:8042	                           0
    master:35013	        RUNNING	      master:8042	                           0
    slave1:41883	        RUNNING	      slave1:8042	                           0

 **查看程序状态**   
 yarn application -list -appStates FINISHED
 根据Application状态过滤:yarn application -list -appStates (所有状态:ALL、NEW、NEW_SAVING、SUBMITTED、ACCEPTED、RUNNING、FINISHED、FAILED、KILLED)
 
**Kill掉Application**
yarn application -kill application_1612577921195_0001

**查看任务日志**
例:查询Application日志:yarn logs -applicationId <ApplicationId>
yarn logs -applicationId application_1612577921195_0001

**查看所有容器**
列出所有Container:yarn container -list <ApplicationAttemptId>
yarn container -list appattempt_1612577921195_0001_000001

**查看容器状态**
打印Container状态:    yarn container -status <ContainerId>
yarn container -status container_1612577921195_0001_01_000001
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值