hadoop集群配置
基本配置
- 环境:
- 虚拟机系统:CentOS 7
- 主机名:hadoop01
- 用户:hadoop
- hadoop version:hadoop-3.3.1
- java version:jdk 1.8
- hadoop安装模式:分布式模式
core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop01:8020</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/kkb/install/hadoop-3.3.1/hadoopDatas/tempDatas</value>
</property> <!-- 缓冲区大小,实际工作中根据服务器性能动态调整;默认值4096 -->
<property>
<name>io.file.buffer.size</name>
<value>4096</value>
</property> <!-- 开启hdfs的垃圾桶机制,删除掉的数据可以从垃圾桶中回收,单位分钟;默认值0 -->
<property>
<name>fs.trash.interval</name>
<value>10080</value>
</property>
</configuration>
hdfs-site.xml
<configuration>
<!-- NameNode存储元数据信息的路径,实际工作中,一般先确定磁盘的挂载目录,然后多个目录用,进行分割 -->
<!-- 集群动态上下线
<property>
<name>dfs.hosts</name>
<value>/kkb/install/hadoop-3.3.1/etc/hadoop/accept_host</value>
</property>
<property>
<name>dfs.hosts.exclude</name>
<value>/kkb/install/hadoop-3.3.1/etc/hadoop/deny_host</value>
</property>
-->
<property>
<name>dfs.namenode.secondary.http-address</name> <value>hadoop01:9868</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>hadoop01:9870</value>
</property>
<!-- namenode保存fsimage的路径 -->
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///kkb/install/hadoop-3.3.1/hadoopDatas/namenodeDatas</value> </property>
<!-- 定义dataNode数据存储的节点位置,实际工作中,一般先确定磁盘的挂载目录,然后多个目录用,进行分割 -->
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///kkb/install/hadoop-3.3.1/hadoopDatas/datanodeDatas</value> </property>
<!-- namenode保存editslog的目录 -->
<property>
<name>dfs.namenode.edits.dir</name>
<value>file:///kkb/install/hadoop-3.3.1/hadoopDatas/dfs/nn/edits</value>
</property>
<!-- secondarynamenode保存待合并的fsimage -->
<property>
<name>dfs.namenode.checkpoint.dir</name>
<value>file:///kkb/install/hadoop-3.3.1/hadoopDatas/dfs/snn/name</value> </property>
<!-- secondarynamenode保存待合并的editslog -->
<property>
<name>dfs.namenode.checkpoint.edits.dir</name> <value>file:///kkb/install/hadoop-3.3.1/hadoopDatas/dfs/nn/snn/edits</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
</configuration>
mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.job.ubertask.enable</name>
<value>true</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop01:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop01:19888</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
</configuration>
yarn-site.xm
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop01</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
xsync脚本
case $1 in
"start" ){
source /etc/profile;
/kkb/install/hadoop-3.3.1/sbin/start-dfs.sh
/kkb/install/hadoop-3.3.1/sbin/start-yarn.sh
/kkb/install/hadoop-3.3.1/sbin/mr-jobhistory-daemon.sh start historyserver
};;
"stop"){
/kkb/install/hadoop-3.3.1/sbin/stop-dfs.sh
/kkb/install/hadoop-3.3.1/sbin/stop-yarn.sh
/kkb/install/hadoop-3.3.1/sbin/mr-jobhistory-daemon.sh stop historyserver
};;
esac
集群启动脚本
pcount=$#
if ((pcount==0)); then
echo no args;
exit;
fi
p1=$1
fname=`basename $p1`
echo $fname
pdir=`cd -P $(dirname $p1); pwd`
echo $pdir
user=`whoami`
for((host=1; host<4; host++)); do
echo ------------------- hadoop0$host --------------
rsync -av $pdir/$fname $user@hadoop0$host:$pdir
done
查看进程脚本
params=$@
for ( ( i=l ; i = 3 ; i = $i +1 )) ; do
echo============= hadoop0$i $params
ssh hadoop0$i "source /etc/profile;$params"done