准备
事先安装好VMware虚拟机
安装JDK并且配置环境变量
下载好hadoop文件
安装步骤
建立一个sh文件,输入以下脚本并输入Hadooo的tgz包作为参数,运行该脚本即可安装配置Hadoop,并验证Hadoop服务是否启动成功。
源代码
#1、压缩包存在
pack=$1
if [[ ! "$pack" =~ ^.*hadoop.*\.(tar\.gz|tgz)$ ]];then
echo "ERROR : ONLY SUPPORT tar.gz OR tgz HADOOP COMPASS FORMAT"
exit 1
fi
#2.1、 检查 Hadoop 服务,若存在则关闭
sc=$(jps|awk 'BEGIN {c=0}/DataNode|SecondaryNameNode|NodeManager|ResourceManager|NameNode/{c++}END{print c}')
if [ $sc -gt 0 ];then
stop-all.sh 1>/dev/null 2>hadoop_err.log || jps|awk '/DataNode|SecondaryNameNode|NodeManager|ResourceManager|NameNode/{print $1}|xargs kill -9'
if [ $? -ne 0 ];then
echo "ERROR : FAIL TO STOP RUNNING HADOOP SERVICES"
exit 1
else
echo "INFO : SUCCESS TO STOP OLD RUNNING HADOOP SERVICES"
fi
fi
#2.2、目标目录(不存在/opt/software则创建,存在子目录则删除)
dest=${2%/}
echo $dest
old=$(ls $dest|grep ^hadoop)
if [ $? -eq 0 ];then
rm -rf $dest/$old
echo "INFO : OLD HADOOP EDITION FOUND AND REMOVED"
fi
if [ ! -e $dest ];then
mkdir -p $dest
echo "INFO : DEST DIR NOT EXISTS BUT CREATED"
fi
if [ ! -d $dest ];then
echo "ERROR : DEST FOR ARG 2 MUSY BE A DIRECTORY"
exit 2
fi
#3、解压
tar -zxf $pack -C $dest
if [ $? -eq 0 ];then
echo -n "INFO : SUCCESS"
else
echo -n "ERROR : FAIL"
exit 3
fi
echo "TO DECOMPRESS $pack TO $dest"
#4、环境变量(存在则删除,再追加新环境变量)
env=/etc/profile.d/myenv.sh
if [ ! -e $env ];then
touch $env
echo "INFO : ENV FILE NOT EXIST BUT TOUCHED"
fi
old=$(cat $env|awk 'BEGIN{b=0; e=0; ln=0;}{ln++; if(b>0 && match($0,/^#.*hadoop/)) e=ln-1; if(match($0,/^#.*hadoop/)) b=ln}END{if(b>0 && e==0) e=ln; print b","e}')
if [ "$old" != "0,0" ];then
sed -i "${old}d" $env
echo "INFO : ENV VARIABLES FOR HADOOP FOUND BUT REMOVED LINE $OLD IN $env "
fi
old=$(ls $dest|grep ^hadoop)
dest="$dest/$old"
cat >> $env <<EOF
# hadoop 3.1.3
export HADOOP_HOME=$dest
export PATH=\$PATH:\$HADOOP_HOME/bin:\$HADOOP_HOME/sbin:\$HADOOP_HOME/lib
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export HDFS_JOURNALNODE_USER=root
export HDFS_ZKFC_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
export HADOOP_MAPRED_HOME=\$HADOOP_HOME
export HADOOP_COMMON_HOME=\$HADOOP_HOME
export HADOOP_HDFS_HOME=\$HADOOP_HOME
export HADOOP_YARN_HOME=\$HADOOP_HOME
export HADOOP_INSTALL=\$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=\$HADOOP_HOME/lib/native
export HADOOP_LIBEXEC_DIR=\$HADOOP_HOME/libexec
export JAVA_LIBRARY_PATH=\$HADOOP_HOME/lib/native:\$JAVA_LIBRARY_PATH
export HADOOP_CONF_DIR=\$HADOOP_HOME/etc/hadoop
EOF
echo "INFO : HADOOP ENV VARIABLES APPEND TO $env"
#5、激活环境变量
source /etc/profile
if [ $? -eq 0 ];then
echo "INFO : SUCCESS TO ACTIVE ENV VARIABLES FOR HADOOP"
else
echo "INFO : FAIL TO ACTIVE ENV VARIABLES FOR HADOOP"
exit 4
fi
#6、 记录脚本目录,切到Hadoop主配置目录
sdir=$(cd $(dirname $0); pwd)
cd $dest/etc/hadoop
echo "INFO : CURRENT DIRECTORY CHANGED TO $pwd"
#7、Hadoop内部Java环境变量配置
sed -i "s/# export JAVA_HOME=/export JAVA_HOME=${JAVA_HOME//\//\\/}/" hadoop-env.sh
echo "INFO : SUCCESS TO FINISH hadoop-env.sh CONFIG"
#8.0 检查并完善/etc/hosts下主机名与IP地址映射
cat /etc/hosts|grep $HOSTNAME 1>/dev/null 2>$1
echo -n "INFO : IP &HOSTNAME MAP"
if [ $? -ne 0 ];then
ip=$(ip addr|grep -E inet.*ens33|awk '{print $2}'|cut -d/ -f1)
echo "$ip $HOSTNAME">>/etc/hosts
echo "NOT FOUND BUT CREATED"
else
echo "EXIST"
fi
#8.1 检查 hadoop 临时目录 ,存在则删除
dir=/tmp/hadoop
if [ -e $dir ];then
rm -rf $dir
echo "INFO : TEMP & YARN LOCAL & YARN LOG DIR ($dir) FOUND AND REMOVED"
fi
#8、hadoop内部core-site.xml 配置
cat > core-site.xml <<EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://$HOSTNAME:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>$dir/$HOSTNAME</value>
</property>
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>1048576</value>
</property>
</configuration>
EOF
echo "INFO : SUCCESS TO FINISH core-site.xml CONFIG"
#9、Hadoop内部hdfs-site.xml配置
cat > hdfs-site.xml<<EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>${dest}/data/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>${dest}/data/dfs/data</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>$HOSTNAME:9869</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
</configuration>
EOF
echo "INFO : SUCCESS TO FINISH hdfs-site.xml CONFIG"
#10、Hadoop内部mapred-site.xml配置
#10.1 激活Hadoop环境变量并通过Hadoop classpath 命令提取信息
source /etc/profile
if [ $? -ne 0 ];then
echo "ERROR : FAIL TO ACTIVATE HADOOP ENV VARIABLES"
exit 1
fi
hc=$(hadoop classpath)
if [ $? -ne 0 ];then
echo "ERROR : FAIL TO FETCH HADOOP CLASSPATH"
exit 1
fi
echo "INFO : HADOOP ENV VARIABLES ACTIVATED AND HADOOP CLASSPATH FETCHED"
#10.2 配置mapred-site.xml
cat > mapred-site.xml <<EOF
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>$hc</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>$HOSTNAME:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>$HOSTNAME:19888</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>256</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>512</value>
</property>
</configuration>
EOF
echo "INFO : SUCCESS TO FINISH mapred-site.xml CONFIG"
#11、Hadoop内部yarn-site.xml 配置
cat > yarn-site.xml <<EOF
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.connect.retry-interval.ms</name>
<value>10000</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
</property>
<property>
<name>yarn.nodemanager.localizer.address</name>
<value>$HOSTNAME:8040</value>
</property>
<property>
<name>yarn.nodemanager.address</name>
<value>$HOSTNAME:8050</value>
</property>
<property>
<name>yarn.nodemanager.webapp.address</name>
<value>$HOSTNAME:8042</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>$dir/yarn/local</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>$dir/yarn/log</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>$hc</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
</configuration>
EOF
echo "INFO : SUCCESS TO FINISH yarn-site.xml CONFIG"
#12.0 切换到脚本路径
echo $sdir
cd $sdir
#12 格式化namenode
hdfs namenode -format 1>/dev/null 2>>hadoop_err.log
if [ $? -ne 0 ];then
echo "ERROR : FAIL TO format hdfs namenode"
exit 1
else
echo "INFO : SUCCESS TO FORMAT hdfs namenode"
fi
#13 启动Hadoop服务
start-all.sh 1>/dev/null 2>>hadoop_err.log
if [ $? -ne 0 ];then
echo "ERROR : FAIL TO START HADOOP SERVICE"
exit 1
else
sc=$(jps|awk 'BEGIN{c=0}/DataNode|SecondaryNameNode|NodeManager|ResourceManager|NameNode/{c++}END{print c}')
if [ $sc -eq 5 ];then
echo "INFO : SUCCESS TO START HADOOP SERVICE"
else
echo "WARN : FAIL TO START HADOOP SERVICE FOR NOT 5 SERVICES STARTED"
fi
fi
unset hc
unset dir
unset sdir
unset env
unset sc
unset old
unset dest
unset pack