安装hadoop需要java环境
建议在普通用户下安装
useradd -u 800 hadoop
mv * /home/hadoop/
su - hadoop
tar zxf jdk-7u79-linux-x64.tar.gz
tar zxf hadoop-2.7.3.tar.gz
ln -s jdk1.7.0_79/ java
ln -s hadoop-2.7.3 hadoop
修改环境变量:
/home/hadoop/hadoop/etc/hadoop
vim hadoop-env.sh
简单测试:
/home/hadoop/hadoop/
mkdir input
cp etc/hadoop/*.xml input/
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar grep input output 'dfs[a-z.]+'
1.单机模式
配置slave:将其改为ip
/home/hadoop/hadoop/etc/hadoop
vim slaves
172.25.40.1
etc/hadoop/core-site.xml:
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://172.25.40.1:9000</value>
</property>
</configuration>
etc/hadoop/hdfs-site.xml:
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
ssh免密连接:
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 0600 ~/.ssh/authorized_keys
初始化:
bin/hdfs namenode -format
开启dfs:
sbin/start-dfs.sh
查看(jps):
vim .bash_profile
PATH=$PATH:$HOME/bin:/home/hadoop/java/bin
source .bash_profile
jps
1509 DataNode
1692 SecondaryNameNode
1416 NameNode
1812 Jps
查看全局信息
bin/hdfs dfsadmin -report
查看文件信息:
bin/hdfs dfs -ls
建立存储目录:
bin/hdfs dfs -mkdir /user/
bin/hdfs dfs -mkdir /user/hadoop
此时web端:172.25.40.1:50070
导入数据:
bin/hdfs dfs -put input/ #上传
bin/hdfs dfs -ls input/
删除之前的input和output,重新筛选
rm -fr input/
rm -fr output/
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar wordcount input output
bin/hadoop dfs -cat output/*
bin/hadoop dfs -get output
2.
sbin/stop-dfs.sh
vim etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
</configuration>
vim slaves
172.25.40.2
172.25.40.3
server1配置nfs文件共享:
yum install -y nfs-utils rpcbind
/etc/init.d/rpcbind start
vim /etc/exports
/home/hadoop *(rw,anonuid=800,anongid=800)
/etc/init.d/nfs start
exportfs -v
server2和server3连接:
yum install -y nfs-utils rpcbind
/etc/init.d/rpcbind start
[root@server2 ~]# showmount -e 172.25.40.1
Export list for 172.25.40.1:
/home/hadoop *
useradd -u 800 hadoop #创建普通用户
mount 172.25.40.1:/home/hadoop/ /home/hadoop/ #挂载
顺带解决了ssh问题:
/tmp/目录下是hadoop的缓存文件,删除之后重新初始化开启(简单粗暴):
bin/hdfs namenode -format
sbin/start-dfs.sh
创建存储目录:
bin/hdfs dfs -mkdir /user
bin/hdfs dfs -mkdir /user/hadoop
dd if=/dev/zero of=bigfile bs=1M count=500
bin/hdfs dfs -put bigfile
bin/hdfs dfsadmin -report