HADOOP安装和环境配置
#检查/etc/hosts中 ip hostname 映射
ip addr
hostname
#1、本机免密
cd ~
ssh-keygen -t rsa
ssh root@singlechen
logout
#公钥
cat id_rsa.pub>>authorized_keys ->authorized_keys known_keys
#免密登录
ssh root@singlechen
#2、解压
tar -zxvf hadoop-3.1.3.tar.gz -C /opt/software/hadoop
mv hadoop-3.1.3 hadoop313
#3、配置环境变量
vim /etc/profile.d/myenv.sh
export HADOOP_HOME=/opt/software/hadoop/hadoop313
export PATH=
H
A
D
O
O
P
H
O
M
E
/
b
i
n
:
HADOOP_HOME/bin:
HADOOPHOME/bin:HADOOP_HOME/sbin:$PATH
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
#4、配置文件
source /etc/profile/
mkdir data
/opt/software/hadoop/hadoop313/data
cd /opt/software/hadoop/hadoop313/etc/hadoop/
vim hadoop-env.sh
vim /opt/software/hadoop/hadoop313/etc/hadoop/core-site.xml
<configuration>
<!--配置namenode的地址-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://192.168.64.180:9820</value>
</property>
<!--配置数据存储目录-->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/software/hadoop/hadoop313/data</value>
</property>
<!--配置HDFS网页登录使用的静态用户root-->
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>
<!--配置root超级用户允许通过代理访问主机节点-->
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<!--配置root超级用户允许通过代理用户所属组-->
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<!--配置root超级用户允许通过代理的用户-->
<property>
<name>hadoop.proxyuser.root.user</name>
<value>*</value>
</property>
</configuration>
vim /opt/software/hadoop/hadoop313/etc/hadoop/ hdfs-site.xml
<configuration>
<!-- 配置namenode web访问地址-->
<property>
<name>dfs.namenode.http-address</name>
<value>192.168.64.180:9870</value>
</property>
<!-- 配置secondary namenode web访问地址-->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>192.168.64.180:9868</value>
</property>
<!-- 配置hdfs副本数量-->
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
vim /opt/software/hadoop/hadoop313/etc/hadoop/yarn-site.xml
<configuration>
<!--配置mr的执行方式-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!--配置resourcemanager的地址-->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>singlechen</value>
</property>
<!--配置环境变量的继承-->
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,
HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
<!--配置yarn容器允许分配的最小内存-->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>256</value>
</property>
<!--配置yarn容器允许分配的最大内存-->
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>1536</value>
</property>
<!--配置yarn容器允许管理的物理内存大小-->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>1536</value>
</property>
<property>
<name>yarn.app.mapreduce.am.resource.mb</name>
<value>256</value>
</property>
<!--配置关闭yarn对物理内存和虚拟内存的限制检查,
jdk运行于centos6以上版本会导致虚拟内存过大-->
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<!--配置关闭yarn对物理内存和虚拟内存的限制检查-->
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<!--开启日志聚集-->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!--配置日志聚集服务器地址-->
<property>
<name>yarn.log.server.url</name>
<value>http://192.168.64.180:19888/jobhistory/logs</value>
</property>
<!--配置日志保留时间为7天-->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
</configuration>
vim /opt/software/hadoop/hadoop313/etc/hadoop/mapred-site.xml
<configuration>
<!--配置mapreduce运行与yarn上:默认为local,也可只当spark阶段了解的mesos-->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!--配置历史服务器地址-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>192.168.64.180:10020</value>
</property>
<!--配置历史服务器web端地址-->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>192.168.64.180:19888</value>
</property>
</configuration>
#5、namenode格式化
cd /opt/software/hadoop/hadoop313/bin
./hdfs namenode -format
#6、启动服务
start-dfs.sh
start-yarn.sh
jps
[root@singlechen hadoop]# jps
4562 NodeManager
5490 Jps
4132 SecondaryNameNode
3881 DataNode
3739 NameNode
4412 ResourceManager
cd /opt/software/hadoop/hadoop313/
./sbin/mr-jobhistory-daemon.sh start historyserver
5579 JobHistoryServer
#关闭安全模式
hadoop dfsadmin -safemode leave
/opt/software/hadoop/hadoop313/share/hadoop/mapreduce
hadoop jar hadoop-mapreduce-examples-3.1.3.jar wordcount /input/wc01.log /out
#java opr hadoop
#1 准备windows Hadoop环境
hadoop.dll -> windows/systemc32
#2将Linus下hadoop打包
cd /opt/software
tar -zcvf hadoop313.tar.gz hadoop/hadoop313/
#将压缩包通过xftp或sz将压缩包拷贝至windows
#将压缩包解压到Windows d:hadoop
#3将windutils.exe拷贝至windows hadoop313/bin
winutils.exe -> hadoop313/bin
#4.配置windows
hadoop_home=D:\Hadoop\hadoop\hadoop313
path :%hadoop_home%\bin;%hadoop_home%\sbin;
hadoop_user_name=root
#5配置windows下linux下host那么和ip
C:\Windows\System32\drivers\etc
#关闭安全模式
hadoop dfsadmin -safemode leave