下载
配置ssh无密码登录
# 测试ssh localhost, 默认需要密码
[root@node1 hadoop]# ssh localhost
The authenticity of host 'localhost (::1)' can't be established.
ECDSA key fingerprint is SHA256:Ii9RadytomW4X2LEvMQwRxoOTeGgxfNbOgwXrc/wwZI.
ECDSA key fingerprint is MD5:bc:b5:ef:93:e6:fd:7c:cd:a3:4f:a7:f6:4c:24:c7:a7.
Are you sure you want to continue connecting (yes/no)? yes
Warning: Permanently added 'localhost' (ECDSA) to the list of known hosts.
root@localhost's password:
ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
# 测试免密登录,无需密码即可登录成功
[root@node1 hadoop]# ssh localhost
Last failed login: Thu May 7 11:44:07 CST 2020 from localhost on ssh:notty
There was 1 failed login attempt since the last successful login.
Last login: Thu May 7 11:43:04 2020 from 192.168.41.1
[root@node1 ~]#
配置java环境
#已经部署过了
[root@node1 hadoop]# echo $JAVA_HOME
/usr/local/bin/jdk1.8.0_112
安装hadoop
将hadoop-2.7.7.tar.gz
拷贝到/root/hadoop
目录下并解压.
[root@node1 hadoop]# pwd
/root/hadoop
[root@node1 hadoop]# tar -zxvf hadoop-2.7.7.tar.gz
在/etc/profile
或者~/.bash_profile
配置环境变量HADOOP_HOME
.
export HADOOP_HOME=/root/hadoop/hadoop-2.7.7
export PATH=$PATH:$HADOOP_HOME/bin
使配置生效
source /etc/profile
配置hadoop
进入$HADOOP_HOME/etc/hadoop
目录,配置hadoop-env.sh
等。涉及的配置文件如下:
hadoop-2.9.2/etc/hadoop/hadoop-env.sh
hadoop-2.9.2/etc/hadoop/yarn-env.sh
hadoop-2.9.2/etc/hadoop/core-site.xml
hadoop-2.9.2/etc/hadoop/hdfs-site.xml
hadoop-2.9.2/etc/hadoop/mapred-site.xml
hadoop-2.9.2/etc/hadoop/yarn-site.xml
- 配置
hadoop-env.sh
# The java implementation to use.
export JAVA_HOME=${JAVA_HOME}
- 配置
yarn-env.sh
#export JAVA_HOME=/home/y/libexec/jdk1.6.0/
- 配置
core-site.xml
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://192.168.41.128:9000</value>
<description>HDFS的URI,文件系统://namenode标识:端口号</description>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/root/hadoop/tmp</value>
<description>namenode上本地的hadoop临时文件夹</description>
</property>
</configuration>
- 配置
hdfs-site.xml
<configuration>
<property>
<name>dfs.name.dir</name>
<value>/root/hadoop/name</value>
<description>namenode上存储hdfs名字空间元数据 </description>
</property>
<property>
<name>dfs.data.dir</name>
<value>/root/hadoop/data</value>
<description>datanode上数据块的物理存储位置</description>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
<description>副本个数,配置默认是3,应小于datanode机器数量</description>
</property>
</configuration>
- 配置
mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
- 配置
yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>${yarn.resourcemanager.hostname}:8099</value>
</property>
</configuration>
启动hadoop
- 格式化hdfs文件系统
bin/hadoop namenode -format
- 启动namenode
sbin/hadoop-daemon.sh start namenode
- 启动datanode
sbin/hadoop-daemon.sh start datanode
- 启动yarn
sbin/start-yarn.sh
- 验证
查看logs/
目录下是否有错误日志,通过jps
命令查看后台进程.
[root@node1 hadoop-2.7.7]# jps
17152 NodeManager
17920 Jps
16721 DataNode #数据节点
16866 ResourceManager
62190 HMaster # Hbase
16623 NameNode #名称节点
- 查看
UI
- Nodes of the cluster: http://192.168.41.128:8099/cluster/nodes
- Applications running on this node: http://192.168.41.128:8042/node/allApplications
- Browse Hdfs: http://192.168.41.128:50070/
- DataNode overview: http://192.168.41.128:50075/
提交MapReduce
作业
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.7.jar pi 2 100
hdfs使用
- 创建一个目录
hadoop fs -mkdir /test
- 上传一个文件到指定目录
hadoop fs -put README.txt /test
或者
hadoop fs -moveFromLocal README.txt /test
- 查看目录下文件
[root@node1 hadoop-2.7.7]# hadoop fs -ls /test