建议用户名三台机子要一致 都用hadoop
主节点 hadoop0
从节点 hadoop1、hadoop2
1、创建/usr/program目录
su - #切换到到root用户
cd /usr/local
mkdir program
chown -R hadoop:hadoop program/
2、编辑hosts文件
vi /etc/hosts
#添加以下内容
192.168.159.100 hadoop0
192.168.159.101 hadoop1
192.168.159.102 hadoop2
vi /etc/selinux/config
#修改
SELINUX=disabled
3、关闭防火墙后重启
systemctl disable firewalld
systemctl stop firewalld
reboot
4、重启后hadoop用户登录
mkdir software #创建software目录,外部软件可以先上传到此目录,解压后再移动到/usr/local/program
5、上传hadoop-2.9.2.tar.gz jdk-8u191-linux-x64.tar.gz到software目录
cd software
#解压
tar -zxf hadoop-2.9.2.tar.gz
tar -zxf jdk-8u191-linux-x64.tar.gz
#移动并重命名
mv hadoop-2.9.2 /usr/local/program/hadoop-2.9
mv jdk1.8.0_191 /usr/local/program/jdk-1.8
以上步骤hadoop0、hadoop1、hadoop2三台虚拟机都需要做
6、 配置免密登录
需要在从节点生成秘钥,然后发送给主节点
#以下三台机子都执行
cd
ssh-keygen #首次生成一路回车即可
ssh-copy-id hadoop0
ssh-copy-id hadoop1
ssh-copy-id hadoop2
# ssh 主机名 在hadoop0上验证
ssh hadoop1/2
6、在主节点hadoop0里做以下配置
添加环境变量
cd
vi .bashrc
#添加以下内容
#JAVA
export JAVA_HOME=/usr/local/program/jdk-1.8
export PATH=$JAVA_HOME/bin:$PATH
#HADOOP
export HADOOP_HOME=/usr/local/program/hadoop-2.9
export PATH=$HADOOP_HOME/bin:$PATH
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export PATH=.:$PATH
source .bashrc #式配置生效
java -version #验证java环境变量是否配对
hadoop version #验证hadoop
cd $HADOOP_HOME
cd etc/hadoop
vi hadoop-env.sh
# 修改
# 配置过 JAVA_HOME 可以不修改
#export JAVA_HOME=${JAVA_HOME}
export JAVA_HOME=/usr/local/program/jdk-1.8
vi core-site.xml
#在configuration 标签里添加
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop0:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/usr/local/program/hadoop-2.9/hdfs/tmp</value>
</property>
vi hdfs-site.xml
# 添加
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/program/hadoop-2.9/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/program/hadoop-2.9/hdfs/data</value>
</property>
vi yarn-site.xml
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop0</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
vi slaves
# 添加以下内容(把原有localhost删掉)
hadoop0
hadoop1
hadoop2
7、把hadoop0上的配置复制到其他两台机子上
hadoop0上执行
scp /home/hadoop/.bashrc hadoop@hadoop1:/home/hadoop/
scp /home/hadoop/.bashrc hadoop@hadoop2:/home/hadoop/
scp /home/hadoop/.ssh/authorized_keys hadoop@hadoop1:/home/hadoop/.ssh/
scp /home/hadoop/.ssh/authorized_keys hadoop@hadoop2:/home/hadoop/.ssh/
scp /usr/local/program/hadoop-2.9/etc/hadoop/*-site.xml hadoop@hadoop1:/usr/local/program/hadoop-2.9/etc/hadoop/
scp /usr/local/program/hadoop-2.9/etc/hadoop/*-site.xml hadoop@hadoop2:/usr/local/program/hadoop-2.9/etc/hadoop/
scp /usr/local/program/hadoop-2.9/etc/hadoop/slaves hadoop@hadoop1:/usr/local/program/hadoop-2.9/etc/hadoop/
scp /usr/local/program/hadoop-2.9/etc/hadoop/slaves hadoop@hadoop2:/usr/local/program/hadoop-2.9/etc/hadoop/
hadoop0上执行
cd $HADOOP_HOME
rm -rf hdfs
mkdir hdfs
cd hdfs
mkdir tmp
mkdir data
mkdir name
hadoop1和hadoop2上执行
source .bashrc
cd $HADOOP_HOME
rm -rf hdfs
mkdir hdfs
cd hdfs
mkdir tmp
mkdir data
mkdir name
hadoop0上执行
hdfs namenode -format
启动
hadoop0上执行
cd $HADOOP_HOME
sbin/start-all.sh
停
sbin/stop-all.sh
8、验证
-
在浏览器访问主节点50070端口
-
运行PI验证
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.9.2.jar pi 10 100
- 在HDFS中创建文件夹
hadoop fs -mkdir /input
- 在本地新建text.txt 然手上传到HDFS 的input目录下
# 新建
vi text.txt
# 写入内容后保存
# 上传到input 目录 上传的目录不能有同名文件
hadoop fs -put text.txt /input
# 查看上传的文件
hadoop fs -ls /input
# 查看文件内容
hadoop fs -cat /input/text.txt
# 删除文件
hadoop fs -rm /input/text.txt
# 删除文件夹
hadoop fs -rm -r /input