Hadoop学习笔记01
大数据生态体系
Hadoop
分布式海量数据存储、处理技术系统
HBase
基于Hadoop的分布式海量数据库
Hive sql
基于Hadoop的数据仓库工具
Zookeeper
集群协调服务
Sqoop
数据导入导出工具
Flume
数据采集框架
Storm
实时流式计算框架
Hadoop的部署安装
虚拟机和Linux服务器
虚拟机安装配置
虚拟机使用Vmware 12,配置虚拟网络
Linux安装配置
1、网络的配置
2、连接Xshell
3、主机名的修改和本地映射
[root@hadoop01 ~]# vi /etc/sysconfig/network
NETWORKING=yes
HOSTNAME=hadoop01
[root@hadoop01 ~]# vi /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.1.101 hadoop01
192.168.1.102 hadoop02
192.168.1.103 hadoop03
192.168.1.104 hadoop04
4、配置防火墙(关闭)
[root@hadoop01 ~]# service iptables stop
[root@hadoop01 ~]# chkconfig iptables off
5、安装JDK
[root@hadoop01 opt]# tar -zxvf jdk-8u171-linux-x64.tar.gz
[root@hadoop01 opt]# vi /etc/profile
#JAVA_HOME
export JAVA_HOME=/opt/jdk1.8.0_171/
export PATH=$PATH:$JAVA_HOME/bin
[root@hadoop01 opt]# source /etc/profile
安装Hadoop
[root@hadoop01 opt]# tar -zxvf hadoop-2.6.5.tar.gz
[root@hadoop01 opt]# vi /etc/profile
#HADOOP_HOME
export HADOOP_HOME=/opt/hadoop-2.6.5/
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
[root@hadoop01 opt]# source /etc/profile
[root@hadoop01 ~]# cd /opt/hadoop-2.6.5/etc/hadoop/
[root@hadoop01 hadoop]# vi hadoop-env.sh
export JAVA_HOME=/opt/jdk1.8.0_171/
[root@hadoop01 hadoop]# vi core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop01:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop-2.6.5/tmp</value>
</property>
</configuration>
[root@hadoop01 hadoop]# vi hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
[root@hadoop01 hadoop]# mv mapred-site.xml.template mapred-site.xml
[root@hadoop01 hadoop]# vi mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
[root@hadoop01 hadoop]# vi yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop01</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
[root@hadoop01 hadoop]# vi slaves
hadoop01
启动Hadoop
格式化
[root@hadoop01 ~]# cd /opt/hadoop-2.6.5/bin/
[root@hadoop01 bin]# ./hadoop namenode -format
手动启动
[root@hadoop01 sbin]# ./hadoop-daemon.sh start namenode
[root@hadoop01 sbin]# ./hadoop-daemon.sh start datanode
[root@hadoop01 sbin]# ./hadoop-daemon.sh start secondarynamenode
[root@hadoop01 sbin]# jps
2418 SecondaryNameNode
2341 DataNode
2456 Jps
2236 NameNode
[root@hadoop01 sbin]# netstat -nltp
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name
tcp 0 0 0.0.0.0:111 0.0.0.0:* LISTEN 1397/rpcbind
tcp 0 0 0.0.0.0:50070 0.0.0.0:* LISTEN 2236/java
tcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN 1636/sshd
tcp 0 0 127.0.0.1:631 0.0.0.0:* LISTEN 1529/cupsd
tcp 0 0 127.0.0.1:25 0.0.0.0:* LISTEN 1716/master
tcp 0 0 0.0.0.0:50010 0.0.0.0:* LISTEN 2341/java
tcp 0 0 127.0.0.1:6010 0.0.0.0:* LISTEN 1965/sshd
tcp 0 0 0.0.0.0:50075 0.0.0.0:* LISTEN 2341/java
tcp 0 0 0.0.0.0:50020 0.0.0.0:* LISTEN 2341/java
tcp 0 0 0.0.0.0:43972 0.0.0.0:* LISTEN 1448/rpc.statd
tcp 0 0 192.168.1.101:9000 0.0.0.0:* LISTEN 2236/java
tcp 0 0 0.0.0.0:50090 0.0.0.0:* LISTEN 2418/java
tcp 0 0 :::111 :::* LISTEN 1397/rpcbind
tcp 0 0 :::33972 :::* LISTEN 1448/rpc.statd
tcp 0 0 :::22 :::* LISTEN 1636/sshd
tcp 0 0 ::1:631 :::* LISTEN 1529/cupsd
tcp 0 0 ::1:25 :::* LISTEN 1716/master
tcp 0 0 ::1:6010 :::* LISTEN 1965/sshd
[root@hadoop01 sbin]# ./yarn-daemon.sh start resourcemanager
[root@hadoop01 sbin]# ./yarn-daemon.sh start nodemanager
[root@hadoop01 sbin]# jps
2418 SecondaryNameNode
2789 Jps
2341 DataNode
2759 NodeManager
2236 NameNode
2510 ResourceManager
Shell批量启动
ssh免密
[root@hadoop01 ~]# ssh-keygen
[root@hadoop01 ~]# ssh-copy-id hadoop01
批量启动
[root@hadoop01 sbin]# ./start-all.sh
[root@hadoop01 sbin]# jps
28371 NodeManager
27875 NameNode
28404 Jps
28132 SecondaryNameNode
28279 ResourceManager
27965 DataNode
伪分布式拓展为全分布
创建新的虚拟机haoop02,hdoop03
配置网络
配置主机名与本地映射
关闭防火墙
JDK的安装和配置
免密登录
要求hadoop01能免密登录hadoop02,hadoop03
[root@hadoop01 sbin]# ssh-copy-id hadoop02
[root@hadoop01 sbin]# ssh-copy-id hadoop03
拷贝Hadoop文件夹
[root@hadoop01 sbin]# scp -r /opt/hadoop-2.6.5/ hadoop02:/opt/
[root@hadoop01 sbin]# scp -r /opt/hadoop-2.6.5/ hadoop03:/opt/
发送配置文件
[root@hadoop01 hadoop]# scp /etc/profile/ hadoop02:/etc/
[root@hadoop01 hadoop]# scp /etc/profile/ hadoop03:/etc/
[root@hadoop02 hadoop-2.6.5]# source /etc/profile
[root@hadoop03 hadoop-2.6.5]# source /etc/profile
删除tmp
[root@hadoop02 hadoop-2.6.5]# rm -rf tmp/
[root@hadoop03 hadoop-2.6.5]# rm -rf tmp/
配置hadoop01的slaves文件
[root@hadoop01 ~]# cd /opt/hadoop-2.6.5/etc/hadoop/
[root@hadoop01 hadoop]# vi slaves
hadoop01
hadoop02
hadoop03