安装配置hadoop
• 注意:hadoop的配置对内存有要求
• 配置hadoop、jdk(jdk辅助)
以hadoop用户操作
[root@server1 ~]# ls
hadoop-2.7.3.tar.gz jdk-7u79-linux-x64.tar.gz
[root@server1 ~]# useradd -u 800 hadoop
[root@server1 ~]# mv * /home/hadoop/
[root@server1 ~]# su - hadoop
[hadoop@server1 ~]$ ls
hadoop-2.7.3.tar.gz jdk-7u79-linux-x64.tar.gz
解压,配置软链接
[hadoop@server1 ~]$ tar zxf hadoop-2.7.3.tar.gz
[hadoop@server1 ~]$ tar zxf jdk-7u79-linux-x64.tar.gz
[hadoop@server1 ~]$ ln -s jdk1.7.0_79/ java
[hadoop@server1 ~]$ ln -s hadoop-2.7.3 hadoop
配置java,当jdk更新时,更改软链接即可
[hadoop@server1 ~]$ cd hadoop
[hadoop@server1 hadoop]$ cd etc/hadoop/
[hadoop@server1 hadoop]$ vim hadoop-env.sh
The java implementation to use.
export JAVA_HOME=/home/hadoop/java
测试hadoop
[hadoop@server1 hadoop]$ pwd
/home/hadoop/hadoop
[hadoop@server1 hadoop]$ mkdir input
[hadoop@server1 hadoop]$ cp etc/hadoop/* input/
[hadoop@server1 hadoop]$ bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar
[hadoop@server1 hadoop]$ bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar grep input output ‘dfs[a-z.]+’
[hadoop@server1 hadoop]$ cat output/*
dfs.audit.logger
dfs.class
dfs.server.namenode.
dfs.period
dfs.audit.log.maxfilesize
dfs.audit.log.maxbackupindex
dfsmetrics.log
dfsadmin
dfs.servers
dfs.file
数据操作
1、配置hadoop
[hadoop@server1 hadoop]$ cd etc/hadoop/
[hadoop@server1 hadoop]$ vim core-site.xml
fs.defaultFS
hdfs://172.25.120.1:9000
[hadoop@server1 hadoop]$ vim slaves
172.25.120.1
[hadoop@server1 hadoop]$ vim hdfs-site.xml
dfs.replication
1
2、配置ssh
[hadoop@server1 hadoop]$ ssh-keygen
[hadoop@server1 hadoop]$ cd
[hadoop@server1 ~]$ cd .ssh/
[hadoop@server1 .ssh]$ cp id_rsa.pub authorized_keys
[hadoop@server1 .ssh]$ ssh localhost
[hadoop@server1 ~]$ logout
[hadoop@server1 .ssh]$ ssh 172.25.120.1
[hadoop@server1 ~]$ logout
[hadoop@server1 .ssh]$ ssh server1
[hadoop@server1 ~]$ logout
[hadoop@server1 .ssh]$ ssh 0.0.0.0
[hadoop@server1 ~]$ logout
3、启动dfs
格式化(文件存于/tmp)
[hadoop@server1 ~]$ pwd
/home/hadoop
[hadoop@server1 ~]$ cd hadoop
[hadoop@server1 hadoop]$ bin/hdfs namenode -format
[hadoop@server1 hadoop]$ ls /tmp/
hadoop-hadoop hsperfdata_hadoop
启动dfs
[hadoop@server1 hadoop]$ sbin/start-dfs.sh
Starting namenodes on [server1]
server1: starting namenode, logging to /home/hadoop/hadoop-2.7.3/logs/hadoop-hadoop-namenode-server1.out
172.25.120.1: starting datanode, logging to /home/hadoop/hadoop-2.7.3/logs/hadoop-hadoop-datanode-server1.out
Starting secondary namenodes [0.0.0.0]
0.0.0.0: starting secondarynamenode, logging to /home/hadoop/hadoop-2.7.3/logs/hadoop-hadoop-secondarynamenode-server1.out
配置环境变量
[hadoop@server1 ~]$ vim .bash_profile
PATH=
P
A
T
H
:
PATH:
PATH:HOME/bin:~/java/bin
[hadoop@server1 ~]$ logout
[root@server1 ~]# su - hadoop
[hadoop@server1 ~]$ jps
1896 SecondaryNameNode
1713 DataNode
1620 NameNode
2031 Jps
处理文件系统
[hadoop@server1 hadoop]$ pwd
/home/hadoop/hadoop
[hadoop@server1 hadoop]$ bin/hdfs dfs -usage ##查看用法
[hadoop@server1 hadoop]$ bin/hdfs dfs -mkdir /user
[hadoop@server1 hadoop]$ bin/hdfs dfs -mkdir /user/hadoop
[hadoop@server1 hadoop]$ bin/hdfs dfs -put input/
[hadoop@server1 hadoop]$ bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreducexamples-2.7.3.jar wordcount input output