一、创建hadoop用户
sudo useradd -m hadoop -s /bin/bash
sudo passwd hadoop
sudo adduser hadoop sudo
二、配置SSH免密码登陆
安装SSH软件
sudo apt-get install openssh-server
给hadoop用户配置密钥
# 若没有该目录,请先执行一次ssh localhost,再exit
cd ~/.ssh/
ssh-keygen -t rsa
cat ./id_rsa.pub >> ./authorized_keys
# 测试
ssh localhost
三、安装hadoop
- 解压安装
sudo tar -zxvf ~/hadoop-2.9.2.tar.gz -C /opt/bigdata
sudo chown -R hadoop:hadoop /opt/bigdata/hadoop-2.9.2
- 配置环境变量
#Hadoop
export HADOOP_HOME=/opt/bigdata/hadoop-2.9.2
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
export PATH=${HADOOP_HOME}/bin:$PATH
保存退出
source /etc/profile
- 修改hadoop-env.sh
export_JAVA_HOME=/home/hadoop/app/jdk1.8.0_91
- 修改core-site.xml ,指定hdfs的访问方式
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/home/hadoop/tmp</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
- 修改hdfs-site.xml ,指定namenode和datanode的数据存储位置
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/home/hadoop/tmp/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/home/hadoop/tmp/dfs/data</value>
</property>
</configuration>
- 修改mapred-site.xml,配置mapreduce
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapred.job.tracker</name>
<value>http://localhost:9001</value>
</property>
</configuration>
- 修改yarn-site.xml ,配置yarn
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
</configuration>
- 启动hdfs
# 格式化
hdfs namenode -format
# 启动
sbin/start-dfs.sh
# 关闭
sbin/stop-dfs.sh
使用jps命令查看启动状态,成功如下所示
注:如果没有DataNode就删除之前配置的tmp目录,重新格式化再启动。
- 启动yarn
# 启动
sbin/start-yarn.sh
# 关闭
sbin/stop-yarn.sh