博客的质量有高有低,只能当做参考。在参考着博客和官方文档进行探索之后,终于把hadoop伪集群的生产环境安装好了;大家可以做参考,我的系统是win10,jdk为1.8;
本文只做参考,有什么问题欢迎留言
1、首先我下载了hadoop-2.7.7(官网下载,bin版本的,解压好)和对应的hadooponwindows(不知道其他人为什么要弄成用积分下载),hadooponwindows下载好之后解压,替代hadoop中的etc和bin目录,把winutils.exe和hadoop.dll放在C:/windows/system32目录下;
2、下载好之后,把windowsutil.exe和hadoop.dll放在C:/windows/system32 文件下,并且配置好bin和sbin的环境变量;
3、配置好环境变量之后,配置xml文件,我把我的xml文件发出来供大家参考一下,xml作为程序与人的交流处,还是应该好好理解一下,推荐大家认真阅读一下xml文件的配置;Progra~1
core-site.xml:
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9633</value>
<description>服务器IP地址,其实也可以使用主机名</description>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:///H:/hadoop-2.9.2/tmp</value>
<description>Abase for other temporary directories.</description>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
<description>该属性值单位为KB,131072KB即为默认的64M</description>
</property>
</configuration>
hdfs-site.xml:
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
<description>分片数量,伪分布式将其配置成1即可</description>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///H:/hadoop-2.9.2/tmp/data/shr/dfs</value>
<description>命名空间和事务在本地文件系统永久存储的路径</description>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///H:/hadoop-2.9.2/tmp/data/shr/DataNode</value>
<description>DataNode在本地文件系统中存放块的路径</description>
</property>
<!--
<property>
<name>dfs.namenode.hosts</name>
<value>localhost</value>
<description>对应DataNode所在服务器主机名,可以用逗号分隔</description>
</property>
<property>
<name>dfs.blocksize</name>
<value>268435456</value>
<description>大文件系统HDFS块大小为256M,默认值为64M</description>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>100</value>
<description>更多的NameNode服务器线程处理来自DataNodes的RPCS</description>
</property>
-->
</configuration>
maperd-site.xml.template:
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
<description>分片数量,伪分布式将其配置成1即可</description>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///H:/hadoop-2.9.2/tmp/data/shr/dfs</value>
<description>命名空间和事务在本地文件系统永久存储的路径</description>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///H:/hadoop-2.9.2/tmp/data/shr/DataNode</value>
<description>DataNode在本地文件系统中存放块的路径</description>
</property>
<!--
<property>
<name>dfs.namenode.hosts</name>
<value>localhost</value>
<description>对应DataNode所在服务器主机名,可以用逗号分隔</description>
</property>
<property>
<name>dfs.blocksize</name>
<value>268435456</value>
<description>大文件系统HDFS块大小为256M,默认值为64M</description>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>100</value>
<description>更多的NameNode服务器线程处理来自DataNodes的RPCS</description>
</property>
-->
</configuration>
yarn-site.xml:
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.address</name>
<value>localhost</value>
<description>IP地址localhost也可替换为ip地址,指定resourcemanager地址</description>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
<description>nomenodeManager获取数据的方式是shuffle</description>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>4</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>8192</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
<description>yarn打印工作日志</description>
</property>
<!--
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>localhost:18082</value>
<description>IP地址localhost也可替换为ip地址</description>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>localhost:18083</value>
<description>IP地址localhost也可替换为ip地址</description>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>localhost:18084</value>
<description>IP地址localhost也可替换为ip地址</description>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>localhost:18085</value>
<description>IP地址localhost也可替换为ip地址</description>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>FairScheduler</value>
<description>常用类:CapacityScheduler、FairScheduler、orFifoScheduler</description>
</property>
<property>
<name>yarn.scheduler.minimum</name>
<value>100</value>
<description>单位:MB</description>
</property>
<property>
<name>yarn.scheduler.maximum</name>
<value>256</value>
<description>单位:MB</description>
</property>
<property>
<name>yarn.resourcemanager.nodes.include-path</name>
<value>localhost</value>
<description>对应服务器ip地址,可以用逗号分隔</description>
</property>
-->
<!-- 配置DataManager
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>256</value>
<description>单位为MB</description>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>90</value>
<description>百分比</description>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>file:///H:/hadoop-2.9.2/tmp/nodemanager</value>
<description>列表用逗号分隔</description>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>file:///H:/hadoop-2.9.2/tmp/nodemanager/logs</value>
<description>列表用逗号分隔</description>
</property>
<property>
<name>yarn.nodemanager.log.retain-seconds</name>
<value>10800</value>
<description>单位为S</description>
</property>
-->
</configuration>
基本上xml中的各项配置都做了注解,大家可以根据自己的目录进行修正;
4、配置好环境的可以直接在cmd中输入 hdfs namenode -format,先将节点格式化,格式化完成后,start-all.cmd --> jps
(如果cmd命令窗口运行不成功的话,则在bin目录中打开cmd,输入 hadoop namenode -format,再进入sbin目录下,start-all.cmd --> jps) ,出现
6832 DataNode
14804 NameNode
4484 Jps
13512 ResourceManager
4652 NodeManager
并且保证启动成功;
5、输入127.0.0.1:8088 和 127.0.0.1::50070 查看hadoop相关状态