安装hadoop步骤: (非生产环境本机配置)
1.解压
cd /opt/software
tar -zxvf hadoop-2.7.5.tar.gz -C /opt/server/
cd /opt/server/hadoop-2.7.5/etc/hadoop
node1
node2
node3
zookeeper
zk
zk
zk
HDFS
JournalNode
JournalNode
JournalNode
NameNode
NameNode
ZKFC
ZKFC
DataNode
DataNode
DataNode
YARN
ResourceManager
ResourceManager
NodeManager
NodeManager
NodeManager
MapReduce
JobHistoryServer
修改:
core-site.xml
ha.zookeeper.quorum
node1:2181,node2:2181,node3:2181
fs.defaultFS
hdfs://ns
hadoop.tmp.dir
/export/server/hadoop-2.7.5/data/tmp
fs.trash.interval
10080
hdfs-site.xml
dfs.nameservices
ns
dfs.ha.namenodes.ns
nn1,nn2
dfs.namenode.rpc-address.ns.nn1
node1:8020
dfs.namenode.rpc-address.ns.nn2
node2:8020
dfs.namenode.servicerpc-address.ns.nn1
node1:8022
dfs.namenode.servicerpc-address.ns.nn2
node2:8022
dfs.namenode.http-address.ns.nn1
node1:50070
dfs.namenode.http-address.ns.nn2
node2:50070
dfs.namenode.shared.edits.dir
qjournal://node1:8485;node2:8485;node3:8485/ns1
dfs.client.failover.proxy.provider.ns
org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider
dfs.ha.fencing.methods
sshfence
dfs.ha.fencing.ssh.private-key-files
/root/.ssh/id_rsa
dfs.journalnode.edits.dir
/export/server/hadoop-2.7.5/data/dfs/jn
dfs.ha.automatic-failover.enabled
true
dfs.namenode.name.dir
file:///export/server/hadoop-2.7.5/data/dfs/nn/name
dfs.namenode.edits.dir
file:///export/server/hadoop-2.7.5/data/dfs/nn/edits
dfs.datanode.data.dir
file:///export/server/hadoop-2.7.5/data/dfs/dn
dfs.permissions
false
dfs.blocksize
134217728
yarn-site.xml(注意跟其他不同)
yarn.log-aggregation-enable
true
yarn.resourcemanager.ha.enabled
true
yarn.resourcemanager.cluster-id
mycluster
yarn.resourcemanager.ha.rm-ids
rm1,rm2
yarn.resourcemanager.hostname.rm1
node2
yarn.resourcemanager.hostname.rm2
node3
yarn.resourcemanager.address.rm1
node2:8032
yarn.resourcemanager.scheduler.address.rm1
node2:8030
yarn.resourcemanager.resource-tracker.address.rm1
node2:8031
yarn.resourcemanager.admin.address.rm1
node2:8033
yarn.resourcemanager.webapp.address.rm1
node2:8088
yarn.resourcemanager.address.rm2
node3:8032
yarn.resourcemanager.scheduler.address.rm2
node3:8030
yarn.resourcemanager.resource-tracker.address.rm2
node3:8031
yarn.resourcemanager.admin.address.rm2
node3:8033
yarn.resourcemanager.webapp.address.rm2
node3:8088
yarn.resourcemanager.recovery.enabled
true
yarn.resourcemanager.ha.id
rm1
If we want to launch more than one RM in single node, we need this configuration
yarn.resourcemanager.store.class
org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore
yarn.resourcemanager.zk-address
node2:2181,node3:2181,node1:2181
For multiple zk services, separate them with comma
yarn.resourcemanager.ha.automatic-failover.enabled
true
Enable automatic failover; By default, it is enabled only when HA is enabled.
yarn.client.failover-proxy-provider
org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider
yarn.nodemanager.resource.cpu-vcores
2
yarn.nodemanager.resource.memory-mb
2048
yarn.scheduler.minimum-allocation-mb
1024
yarn.scheduler.maximum-allocation-mb
2048
yarn.log-aggregation.retain-seconds
2592000
yarn.nodemanager.log.retain-seconds
604800
yarn.nodemanager.log-aggregation.compression-type
gz
yarn.nodemanager.local-dirs
/export/server/hadoop-2.7.5/yarn/local
yarn.resourcemanager.max-completed-applications
1000
yarn.nodemanager.aux-services
mapreduce_shuffle
yarn.resourcemanager.connect.retry-interval.ms
2000
mapred-site.xml
mapreduce.framework.name
yarn
mapreduce.jobhistory.address
node3:10020
mapreduce.jobhistory.webapp.address
node3:19888
mapreduce.jobtracker.system.dir
/export/server/hadoop-2.7.5/data/system/jobtracker
mapreduce.map.memory.mb
1024
mapreduce.reduce.memory.mb
1024
mapreduce.task.io.sort.mb
100
mapreduce.task.io.sort.factor
10
mapreduce.reduce.shuffle.parallelcopies
15
yarn.app.mapreduce.am.command-opts
-Xmx1024m
yarn.app.mapreduce.am.resource.mb
1536
mapreduce.cluster.local.dir
/export/server/hadoop-2.7.5/data/system/local
slaves
node1
node2
node3
hadoop-env.sh
export JAVA_HOME=/export/server/jdk1.8.0_241
启动命令:
#一台机器执行以下命令:
cd /export/server
scp -r hadoop-2.7.5/ node2:$PWD
scp -r hadoop-2.7.5/ node3:$PWD
#只发XML就可以了
scp -r * node2:$PWD
#台机器上共同创建目录
#台机器执行以下命令
mkdir -p /export/server/hadoop-2.7.5/data/dfs/nn/name
mkdir -p /export/server/hadoop-2.7.5/data/dfs/nn/edits
#改node3的rm2
#二台机器执行以下命令
vim yarn-site.xml
#在node2上配置rm1,在node3上配置rm2,注意:一般都喜欢把配置好的文件远程复制到其它机器上,
#这个在YARN的另一个机器上一定要修改,其他机器上不配置此项
#意我们现在有两个resourceManager 第二台是rm1 第三台是rm2
#个配置一定要记得去node3上面改好
#property>
#yarn.resourcemanager.ha.id
#rm2
# If we want to launch more than one RM in single node, we need this #onfiguration
#/property>
#zookeeper start
/export/server/zookeeper-3.4.6/bin/zkServer.sh start
#node1机器执行以下命令
cd /export/server/hadoop-2.7.5
bin/hdfs zkfc -formatZK
sbin/hadoop-daemons.sh start journalnode
bin/hdfs namenode -format
bin/hdfs namenode -initializeSharedEdits -force
sbin/start-dfs.sh
#node2上面执行
cd /export/server/hadoop-2.7.5
bin/hdfs namenode -bootstrapStandby
sbin/hadoop-daemon.sh start namenode
#启动yarn过程
node2上执行
cd /export/server/hadoop-2.7.5
sbin/start-yarn.sh
#node3上面执行
cd /export/server/hadoop-2.7.5
sbin/start-yarn.sh
#查看resourceManager状态
#node2上面执行
cd /export/server/hadoop-2.7.5
bin/yarn rmadmin -getServiceState rm1
#node3上面执行
cd /export/server/hadoop-2.7.5
bin/yarn rmadmin -getServiceState rm2
#node3启动jobHistory
#node3机器执行以下命令启动jobHistory
cd /export/server/hadoop-2.7.5
sbin/mr-jobhistory-daemon.sh start historyserver
#hdfs状态查看
#node1机器查看hdfs状态
http://192.168.88.161:50070/dfshealth.html
#tab-overview
#node2机器查看hdfs状态
http://192.168.88.162:50070/dfshealth.html
#tab-overview
#yarn集群访问查看
http://192.168.88.163:8088/cluster
#历史任务浏览界面
#页面访问:
http://192.168.88.163:19888/jobhistory
#当nameNode选举机制不启动时候,在LINUX执行此命令添加插件
yum -y install psmisc
ha.zookeeper.quorum
192.168.1.80:2181,192.168.1.81:2181,192.168.1.82:2181

2765

被折叠的 条评论
为什么被折叠?



