一.基础环境配置
1.vi /etc/sysconfig/network-scripts/ifcfg-ens33
1.创建目录
mkdir /opt/programs
2.解压jdk
tar -zxvf jdk-8u212-linux-x64.tar.gz -C /opt/programs/
3.配置jdk的环境变量
vi /etc/profile !!!注意jdk的型号
export JAVA_HOME=/opt/programs/jdk1.8.0_211/
export CLASS_PATH=:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/toots.jar
export PATH=$JAVA_HOME/bin:$PATH
4.重新读取环境变量 并 验证安装情况
source /etc/profile
java -version
5.关机 设置快照,克隆 ,改ip
service network restart
6.分别更改三台主机名
hostnamectl set-hostname hadoop01
hostnamectl set-hostname hadoop02
hostnamectl set-hostname hadoop03
重启 reboot
7.vi /etc/hosts 域名映射
192.168.165.46 hadoop01
192.168.165.47 hadoop02
192.168.165.48 hadoop03
8.拷贝给其他主机
scp /etc/hosts root@192.168.165.47:/etc/
9.分别在三台主机上 免密登录
ssh-keygen
ssh-copy-id hadoop01
ssh-copy-id hadoop02
ssh-copy-id hadoop03
二.安装hadoop
1.安装hadoop
tar -zxvf hadoop-2.7.6.tar.gz -C /opt/programs/
2.配置文件
cd /opt/programs/hadoop-2.7.6/etc/hadoop/
(1)vi core-site.xml
<!--配置NameNode地址8020端口也可以,9000是RPC通信端口-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop01:9000</value>
</property>
<!--配置操作hdfs的缓存大小-->
<property>
<name>io.file.buffer.size</name>
<value>4096</value>
</property>
<!--HDFS数据保存在Linux的哪个目录,默认值是Linux的tmp目录-->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/programs/hadoop-2.7.6/tmp</value>
</property>
(2)vi hdfs-site.xml
<!--副本数-->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<!--块大小 hadoop2.x:128M,hadoop1.x:64M-->
<property>
<name>dfs.block.size</name>
<value>134217728</value>
</property>
<!--元数据存放的目录-->
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/programs/hadoop-2.7.6/namenode</value>
</property>
<!--数据存放的目录-->
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/programs/hadoop-2.7.6/datanode</value>
</property>
<!--hdfs检测目录,可以不配置-->
<property>
<name>fs.checkpoint.dir</name>
<value>/opt/programs/hadoop-2.7.6/cname</value>
</property>
<!--hdfs的namenode的web ui地址-->
<property>
<name>dfs.http.address</name>
<value>hadoop01:50070</value>
</property>
<!--hdfs的secondary的web ui地址-->
<property>
<name>dfs.secondary.http.address</name>
<value>hadoop01:50090</value>
</property>
<!--是否开启web操作hdfs-->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<!--是否启用hdfs权限(acl控制列表)-->
<property>
<name>dfs.permissions</name>
<value>true</value>
</property>
(3)vi yarn-site.xml
<!--指定resourceManager所启动的服务器主机名或者IP地址-->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop01</value>
</property>
<!--指定mapreduce的shuffle-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!--指定resourcemanager的内部通信地址-->
<property>
<name>yarn.resourcemanager.address</name>
<value>hadoop01:8032</value>
</property>
<!--指定resourcemanager的scheduler的内部通信地址-->
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>hadoop01:8030</value>
</property>
<!--指定resourcemanager的resource-tracker的内部通信地址-->
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>hadoop01:8031</value>
</property>
<!--指定resourcemanager的管理者的内部通信地址-->
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>hadoop01:8033</value>
</property>
<!--指定resourcemanager的web ui监控地址-->
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>hadoop01:8088</value>
</property>
(4)
ls
cp mapred-site.xml.template mapred-site.xml
vi mapred-site.xml
<!--指定mapreduce运行框架-->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
<final>true</final>
</property>
<!--历史服务的通信地址-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop01:10020</value>
</property>
<!--历史服务的web ui通信地址-->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop01:19888</value>
</property>
(5)vi slaves
hadoop01
hadoop02
hadoop03
三.配置环境变量
(1)vi hadoop-env.sh
export JAVA_HOME=/opt/programs/jdk1.8.0_211
(2)vi mapred-env.sh
export JAVA_HOME=/opt/programs/jdk1.8.0_211
(3)vi yarn-env.sh 在# some Java parameters 之后加入
export JAVA_HOME=/opt/programs/jdk1.8.0_211
(4)vi /etc/profile
export HADOOP_HOME=/opt/programs/hadoop-2.7.6
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
四.拷贝
scp -r /opt/programs/hadoop-2.7.6/ root@hadoop02:/opt/programs/
scp -r /opt/programs/hadoop-2.7.6/ root@hadoop03:/opt/programs/
scp -r /etc/profile root@hadoop02:/etc
scp -r /etc/profile root@hadoop03:/etc
source /etc/profile
五.
(1)初始化
hdfs namenode -format
(2)启动HDFS
start-dfs.sh
(3).启动Yarn
start-yarn.sh
(4)
如果出现问题停止所有脚本,没有就跳过下一步
stop-all.sh
(5)重新启动
start-dfs.sh
start-yarn.sh
(6)
jps
systemctl status firewalld
(7)创建目录
hadoop fs -mkdir /dds
或者
hadoop fs -mkdir /opt/
上传到HDFS上
hadoop fs -put /root/anaconda-ks.cfg /opt/
(8)到HDFS上 访问
http://192.168.165.46:50070
http://192.168.165.46:8088
要截图的地方
测试响应
在hadoop02上 ping hadoop01
测试登录
ssh hadoop01
显示Java编辑器
javac
防火墙:systemctl status firewalld
Selinux状态
查询getenforce
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
启动start-all.sh
可能遇到的问题
localhost: Error: JAVA_HOME is not set and could not be found.
localhost: Error: JAVA_HOME is not set and could not be found.
Starting secondary namenodes [0.0.0.0] 0.0.0.0: Error:JAVA_HOME is not set and could not be found.
starting yarn daemons
解决方法
cd /opt/programs/hadoop-2.7.6/etc/hadoop
vi hadoop-env.sh
export JAVA_HOME=/opt/programs/jdk1.8.0_211
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!