我这里3台虚拟机的配置为:
master:192.168.27.100
slave1:192.168.27.101
slave2:192.168.27.102
路径要与虚拟机文件路径对应(不需要自己创建,在运行hadoop的时候会自己创建)
具体hadoop配置教程可以参考 hadoop配置教程
1.配置core-site.xml
<configuration>
#配置hadoop存储数据的路径
<property>
<name>hadoop.tmp.dir</name>
<value>/data/tmp/hadoop</value>
</property>
#配置哪个节点来启动hdfs
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
#设置权限
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
</configuration>
2.配置 hdfs-site.xml
<configuration>
#设置哪台虚拟机作为namenode节点
<property>
<name>dfs.namenode.http-address</name>
<value>master:50070</value>
</property>
#设置文件的备份数量
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
#设置那台虚拟机作为冷备份namenode节点,用于辅助namenode
<property>
<name>dfs.secondary.http.address</name>
<value>slave1:50090</value>
</property>
#存放hadoop的名称节点namenode里的metadata的文件路径
<property>
<name>dfs.namenode.name.dir</name>
<value>/data/tmp/hadoop/dfs/name</value>
</property>
#存放hadoop的数据节点datanode里的多个数据块
<property>
<name>dfs.datanode.data.dir</name>
<value>/data/tmp/hadoop/dfs/data</value>
</property>
<property>
<name>dfs.client.use.dataname.hostname</name>
<value>true</value>
</property>
</configuration>
3.配置yarn-site.xml
<configuration>
#配置成mapreduce_shuffle才能运行MapReduce程序
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
#用户可通过该地址再浏览器中查看集群各类信息
<name>yarn.resourcemanager.webapp.address</name>
<value>master:8088</value>
</property>
<property>
# 配置yarn启动的主机名,也就是说配置在哪台虚拟机上就在那台虚拟机上进行启动
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<!-- 忽略虚拟内存的检查, 对于安装在虚拟机上很有用 -->
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>/software/hadoop/etc/hadoop:/software/hadoop/share/hadoop/common/lib/*:/software/hadoop/share/hadoop/common/*:/software/hadoop/share/hadoop/hdfs:/software/hadoop/share/hadoop/hdfs/lib/*:/software/hadoop/share/hadoop/hdfs/*:/software/hadoop/share/hadoop/mapreduce/*:/software/hadoop/share/hadoop/yarn:/software/hadoop/share/hadoop/yarn/lib/*:/software/hadoop/share/hadoop/yarn/*</value>
</property>
</configuration>
4.配置mapred-sitr.xml
<configuration>
#配置yarn来进行任务调度
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
5.配置yarn-env.sh
export JAVA_HOME=/software/java
6.配置hadoop-env.sh
export HDFS_NAMENODE__USER=root
export HDFS_DATANODE_USER=root
export JAVA_HOME=/software/java
export HDFS_SECONDARYNAMENODE__USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
7.配置workers(hadoop3.0以下的版本为slaves)
master
slave1
slave2