ubuntu下的Hadoop搭建
1.安装ifconfig
apt install net-tools
2.卸载升级vim
apt-get remove vim -common
apt-get install vim
一、修改静态IP
vi /etc/netplan/1-network-manager-all.yamn
记得空两格
network:
version: 2
ethernets:
ens33:
addresses: [192.168.254.101/24]
gateway4: 192.168.254.2
nameservers:
addresses: [192.168.254.1,114.114.114.114]
renderer: NetworkManager
netplan apply #更新IP
二、修改,给普通用户root权限
su root
vi /etc/sudoers
chmod u+w /etc/sudoers
hadoop ALL=(ALL:ALL) NOPASSWD:ALL
chmod u-w /etc/sudoers
三、关闭防火墙
ufw disable 关闭
ufw status 查看
iptables -F 关闭
iptables -L 查看
四、hosts
vi /etc/host
192.168.254.101 linux1
192.168.254.102 linux2
192.168.254.103 linux3
五、修改用户名
hostnamectl --static set-hostname 主机名
更新apt包
sudo apt-get update
sudo apt-get install openssh-server
六、ssh免密
ssh localhost
exit
cd ~/.ssh
ssh-keygen -t rsa
ssh-copy-id linux1
ssh-copy-id linux2
ssh-copy-id linux3
把公钥加入Localhost中
cat ./id_rsa.pub >> ./authorized_keys
七、环境变量
vi /etc/profile
#JAVA_HOME
export JAVA_HOME= /user/local/jdk1.8.0_131
export PATH=$PATH:$JAVA_HOME/bin
#HADOOP_HOME
export HADOOP_HOME= /user/local/hadoop-2.7.3
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
#ZOOKEEPER_HOME
export ZOOKEEPER_HOME= /user/local/zookeeper-3.4.10
export PATH=$PATH:$ZOOKEEPER_HOME/bin
#HIVE_HOME
export HIVE_HOME= /user/local/hive
export PATH=$PATH:$HIVE_HOME/bin
#HBASE_HOME
export HBASE_HOME= /user/local/hbase-1.3.1
export PATH=$PATH:$HBASE_HOME/bin
#KAFKA_HOME
export KAFKA_HOME= /user/local/kafka
export PATH=$PATH:$KAFKA_HOME/bin
source /etc/profile
八、安装jdk
检查是否有java
java -version
rpm -qa|grep java
删除java
rpm -e --nodeps 路径
创建目录
sudo mkdir -p /user/local/
tar -zxvf jdk1.8.0_131
九、安装hadoop
tar -zxvf hadoop-2.7.3
cd hadoop2.7.2/etc/hadoop/
vi core-site.xml
<!-- 指定HDFS中NameNode的地址 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
<!-- 指定hadoop运行时产生文件的存储目录, 暂时保存文件的目录,默认是/tmp/hadoop-$user,此位置有可能在重启时被清空,因此必须另外配置。 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/module/hadoop-2.7.3/data/tmp</value>
</property>
vi hdfs-site.xml
<!-- 指定HDFS的副本数,不配置也可以,因为默认就是3 -->
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<!--指定secondaryNameNode的http访问地址和端口号,在规划中,我们将simple01规划为SecondaryNameNode服务器。如果不配置默认是与namenode同一个节点上启动-->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>slave01:50090</value>
</property>
vi slaves
master
slave01
slave02
vi yarn-site.xml
<!-- reducer获取数据的方式 -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 指定YARN的ResourceManager的地址 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>slave02</value>
</property>
vi mapred-site.xml
<!-- 指定mr运行在yarn上 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
1.集群分发
scp -r hadoop-2.7.2 jdk1.8.0 hadoop@hadoop201:/opt/module/
scp -r hadoop-2.7.2 jdk1.8.0 hadoop@hadoop201:/opt/module/
2.在有namenode的主机进行格式化
bin/hdfs namenode -format
3.集群启动
master : start-dfs.sh
slave02: start-yarn.sh