文章目录
Hadoop2.7.7 完全分布式配置
三台节点centos7
[root@node01 software]# cat /etc/hosts
192.168.8.11 node01
192.168.8.12 node02
192.168.8.13 node03
192.168.8.14 node04
解压并配置jdk、hadoop环境变量
[root@node01 software]# cat /etc/profile.d/bigdata.sh
# JAVA_HOME 指定jdk的路径,也就是解压的路径
export JAVA_HOME=/opt/module/jdk1.8.0_191
# 指定classpath
export CLASSPATH=.:$JAVA_HOME/lib/tools.jar:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib
export PATH=$PATH:$JAVA_HOME/bin
# Hadoop-2.7.7环境变量
export HADOOP_HOME=/opt/module/hadoop-2.7.7
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://node01:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/module/hadoop-2.7.7/data/tmp</value>
</property>
</configuration>
hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.http-address</name>
<value>node01:50070</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>node02:50090</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/module/hadoop-2.7.7/data/dfs/nn</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/module/hadoop-2.7.7/data/dfs/dn</value>
</property>
</configuration>
mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>node01:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>node01:19888</value>
</property>
</configuration>
配置yarn
修改yarn-env.sh
vim yarn-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_191
yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!--指定yarn的resourcemanager的地址 -->
<property>
<name>yarn.resourcemanager.address</name>
<value>node01:8032</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>node01:8031</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>node01:8030</value>
</property>
<!-- pyspark配置 -->
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
</configuration>
编辑slave文件
vim /opt/module/hadoop-2.7.7/etc/hadoop/slaves
node01
node02
node03
分发到另两台节点
scp -r /opt/module/hadoop-2.7.7/ root@node02:/opt/module/
scp -r /opt/module/hadoop-2.7.7/ root@node03:/opt/module/
文件系统初始化
hdfs namenode -format
启动、关闭
start-all.sh
stop-all.sh
测试
hadoop jar /opt/module/hadoop-2.7.7/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.7.jar pi 5 5