#################################
# HADOOP 3.1.3 single1 安装配置 #
#################################
cd /opt/download
ls =>hadoop-3.1.3.tar.gz
#解压到software文件夹里
tar -zxvf /opt/download/hadoop-3.1.3.tar.gz -C /opt/software/
chown -R root:root /opt/software/hadoop313
cd /opt/software
ls =>hadoop-3.1.3
#重命名
mv hadoop-3.1.3/ hadoop313
ls =>hadoop313
cd hadoop313/
pwd =>/opt/software/hadoop313
#环境变量并激活
vim /etc/profile.d/my.sh
#---------------------------------------------------------------------
#hadoop 3.1.3
export HADOOP_HOME=/opt/software/hadoop313
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HADOOP_HOME/lib
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export HDFS_JOURNALNODE_USER=root
export HDFS_ZKFC_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_LIBEXEC_DIR=$HADOOP_HOME/libexec
export JAVA_LIBRARY_PATH=$HADOOP_HOME/lib/native:$JAVA_LIBRARY_PATH
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
#---------------------------------------------------------------------
#激活配置文件
source /etc/profile
#创建数据临时目录
cd /opt/software/hadoop313
mkdir data
#配置内部环境变量
#切换到hadoop配置以下文件
cd /opt/software/hadoop313/etc/hadoop
vim hadoop-env.sh
#-----------------------------------------------------------------
export JAVA_HOME=/opt/software/jdk8 #(前面#号去掉,后面jdk8为自己software下安装的jdk名)
#-----------------------------------------------------------------
#----------------------------------------------------------
vim core-site.xml
vim hdfs-site.xml
vim mapred-site.xml
vim yarn-site.xml
#----------------------------------------------------------
#配置核心配置文件
vim core-site.xml
#-----------------------------------------------------------------------------------
<configuration>
----------------------
<property>
<name>fs.defaultFS</name>
<value>hdfs://single01:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/tmp/hadoop/kb16</value>
<description>namenode上本地的hadoop临时文件夹</description>
</property>
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
<description>Size of read/write SequenceFiles buffer: 128K</description>
</property>
----------------------
</configuration>
#----------------------------------------------------------------------------------
#更改windows本地映射
C:\Windows\System32\drivers\etc\hosts里面最后加上
192.168.245.168 single01
#配置文件系统
vim hdfs-site.xml
#--------------------------------------------------
<configuration>
----------------------
<property>
<name>dfs.replication</name>
<value>1</value>
<description>Hadoop中每个block的备份数</description>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/software/hadoop313/data/dfs/name</value>
<description>namenode上存储hdfs名字空间元数据 </description>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/software/hadoop313/data/dfs/data</value>
<description>datanode上数据块的物理存储位置</description>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>single01:9869</value>
</property>
<!--权限设定避免因权限问题导致操作失败异常-->
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
<description>关闭权限验证</description>
</property>
----------------------
</configuration>
#-----------------------------------------------
#计算框架配置
vim mapred-site.xml
#-----------------------------------------------------------------------------
<configuration>
----------------------
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
<description>job执行框架:local, classic or yarn.</description>
<final>true</final>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
</property>
<!--job history单节点配置即可-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>single01:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>single01:19888</value>
</property>
<!--Container内存上限,由nodemanager读取并控制,实际使用超出时会被nodemanager kill Connection reset by peer-->
<property>
<name>mapreduce.map.memory.mb</name>
<value>1024</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>2048</value>
</property>
----------------------
</configuration>
#-----------------------------------------------------------------------------
#调度框架配置vim yarn-site.xml
#---------------------------------------------------------------
<configuration>
<!-- Site specific YARN configuration properties -->
----------------------
<!-- Resource Manager Configs -->
<property>
<name>yarn.resourcemanager.connect.retry-interval.ms</name>
<value>10000</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
</property>
<!-- Node Manager Configs-->
<property>
<description>Address where the localizer IPC is. ********* </description>
<name>yarn.nodemanager.localizer.address</name>
<value>single01:8040</value>
</property>
<property>
<description>Address where the localizer IPC is. ********* </description>
<name>yarn.nodemanager.address</name>
<value>single01:8050</value>
</property>
<property>
<description>NM Webapp address. ********* </description>
<name>yarn.nodemanager.webapp.address</name>
<value>single01:8042</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/tmp/hadoop/yarn/local</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>/tmp/hadoop/yarn/log</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>$HADOOP_HOME/etc/hadoop:$HADOOP_HOME/share/hadoop/common/lib/*:$HADOOP_HOME/share/hadoop/common/*:$HADOOP_HOME/share/hadoop/hdfs:$HADOOP_HOME/share/hadoop/hdfs/lib/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/mapreduce/*:$HADOOP_HOME/share/hadoop/yarn:$HADOOP_HOME/share/hadoop/yarn/lib/*:$HADOOP_HOME/share/hadoop/yarn/*</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
----------------------
</configuration>
#---------------------------------------------------------------
首次开启要格式化
格式化:hdfs namenode -format
#以后开启要格式化,格式化前要删除下面两个文件
#删除data下面的文件
#删除data下面的文件
cd /opt/software/hadoop313 切换到hadoop313目录
rm -rf data/* 删除hadoop313/data/下面的文件
rm -rf tmp/hadoop/*
namenode下的日志:/opt/software/hadoop313/data/dfs/name/current/fsimage...
secondarynamenode下的日志: kb16/dfs/namesecondary/current/fsimage...
当namedode里面的edits.log达到一定数量和时间时,会把edits.log和当前最新的fsimage传一份给secondarynamenode,由secondarynamenode合成出一份新的fsimage,在把新的fsimage同步到namedode里面;每次重启时会加载namenode里的fsimage,但还有部分最新的edits_...
#格式化:
hdfs namenode -format
#从第二次开始
#一次性启动和关闭不推荐
stop-all.sh
start-all.sh
#【推荐】启动和关闭hdfs和yarn方式
start-dfs.sh
NN=> load fsimage_0000000 目录,文件名称及依赖关系
load edit_inprogress_000-未来得及合并的日志记录
Tree
start-yarn.sh
stop-dfs.sh
stop-yarn.sh
#启动|关闭job historyserver方式
mapred --daemon start|stop historyserver
mapred --daemon start historyserver
mapred --daemon stop historyserver
#web访问hadoop状态
192.168.43.200:9870 hdfs 50070
192.168.43.200:19888 history server
192.168.43.200:8088 application monitoring