搭建hadoop开源版本分布式集群(无高可用)
在搭建之前需要安装jdk,并设置环境变量。最低要求版本jdk1.8
1.修改各个节点名称
vim /etc/hostname
修改之后reboot重启生效
2.增加hadoop用户
useradd hadoop
passwd hadoop
3.各个节点配置免密登陆
ssh-keygen
ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop@10.10.81.166
4.时钟同步
ntpdate ntp.aliyun.com
5.解压hadoop的包
修改配置文件
hdfs-site.xml
<configuration>
<!-- nn web 端访问地址-->
<property>
<name>dfs.namenode.http-address</name>
<value>hadoop01:9870</value>
</property>
<!-- 2nn web 端访问地址-->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop02:9868</value>
</property>
<!-- 默认为3副本-->
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<!-- 创建相应的目录-->
<property>
<name>dfs.namenode.name.dir</name>
<value>/home/hadoop/metadata/dfs/name</value>
</property>
<property>
<name>dfs.permissions</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.acls.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions.superusergroup</name>
<value>hadoop</value>
</property>
<property>
<name>hadoop.http.staticuser.user</name>
<value>hadoop</value>
</property>
<!-- 创建相应的目录-->
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///home/hadoop/data/dfs</value>
</property>
<property>
<name>dfs.datanode.use.datanode.hostname</name>
<value>true</value>
</property>
</configuration>
core-site.xml
<configuration>
<!-- 指定 NameNode 的地址 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop01:9000</value>
</property>
<!-- 指定 hadoop 数据的临时存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/data/tmp</value>
</property>
<!-- 配置 HDFS 网页登录使用的静态用户为 hadoop -->
<property>
<name>hadoop.http.staticuser.user</name>
<value>hadoop</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
<property>
<name>fs.hdfs.impl.disable.cache</name>
<value>true</value>
</property>
mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.reduce.input.buffer.percent</name>
<value>0.0</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.merge.percent</name>
<value>0.66</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.input.buffer.percent</name>
<value>0.7</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/opt/soft/hadoop-3.2.0</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/opt/soft/hadoop-3.2.0</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/opt/soft/hadoop-3.2.0</value>
</property>
<property>
<name>mapred.output.compress</name>
<value>false</value>
</property>
<property>
<name>mapred.output.compression.type</name>
<value>BLOCK</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop03:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop03:19888</value>
</property>
<property>
<name>mapreduce.map.speculative</name>
<value>false</value>
</property>
<property>
<name>mapreduce.reduce.speculative</name>
<value>false</value>
</property>
<property>
<name>mapreduce.job.reduce.slowstart.completedmaps</name>
<value>1</value>
</property>
<property>
<name>mapreduce.job.counters.limit</name>
<value>2000</value>
</property>
<property>
<name>mapred.child.java.opts</name>
<value>-Xmx4096m</value>
</property>
</configuration>
workers
hadoop01
hadoop02
hadoop03
6.初始化hdfs 集群
bin/hdfs namenode -format
7.启动集群:
sbin/start-all.sh
8.jps查看进程