高可用集群其中一个NameNode退出,另一个namenode状态为standby,查看执行日志报错如下:
2019-07-25 15:32:27,481 FATAL org.apache.hadoop.hdfs.server.namenode.FSEditLog: Error: starting log segment 7913 failed for required journal (JournalAndStream(mgr=QJM to [192.168.137.128:8485, 192.168.137.129:8485, 192.168.137.130:8485], stream=null))
java.io.IOException: Timed out waiting 20000ms for a quorum of nodes to respond.
at org.apache.hadoop.hdfs.qjournal.client.AsyncLoggerSet.waitForWriteQuorum(AsyncLoggerSet.java:137)
at org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager.startLogSegment(QuorumJournalManager.java:403)
at org.apache.hadoop.hdfs.server.namenode.JournalSet$JournalAndStream.startLogSegment(JournalSet.java:107)
at org.apache.hadoop.hdfs.server.namenode.JournalSet
3.
a
p
p
l
y
(
J
o
u
r
n
a
l
S
e
t
.
j
a
v
a
:
222
)
a
t
o
r
g
.
a
p
a
c
h
e
.
h
a
d
o
o
p
.
h
d
f
s
.
s
e
r
v
e
r
.
n
a
m
e
n
o
d
e
.
J
o
u
r
n
a
l
S
e
t
.
m
a
p
J
o
u
r
n
a
l
s
A
n
d
R
e
p
o
r
t
E
r
r
o
r
s
(
J
o
u
r
n
a
l
S
e
t
.
j
a
v
a
:
393
)
a
t
o
r
g
.
a
p
a
c
h
e
.
h
a
d
o
o
p
.
h
d
f
s
.
s
e
r
v
e
r
.
n
a
m
e
n
o
d
e
.
J
o
u
r
n
a
l
S
e
t
.
s
t
a
r
t
L
o
g
S
e
g
m
e
n
t
(
J
o
u
r
n
a
l
S
e
t
.
j
a
v
a
:
219
)
a
t
o
r
g
.
a
p
a
c
h
e
.
h
a
d
o
o
p
.
h
d
f
s
.
s
e
r
v
e
r
.
n
a
m
e
n
o
d
e
.
F
S
E
d
i
t
L
o
g
.
s
t
a
r
t
L
o
g
S
e
g
m
e
n
t
(
F
S
E
d
i
t
L
o
g
.
j
a
v
a
:
1192
)
a
t
o
r
g
.
a
p
a
c
h
e
.
h
a
d
o
o
p
.
h
d
f
s
.
s
e
r
v
e
r
.
n
a
m
e
n
o
d
e
.
F
S
E
d
i
t
L
o
g
.
r
o
l
l
E
d
i
t
L
o
g
(
F
S
E
d
i
t
L
o
g
.
j
a
v
a
:
1161
)
a
t
o
r
g
.
a
p
a
c
h
e
.
h
a
d
o
o
p
.
h
d
f
s
.
s
e
r
v
e
r
.
n
a
m
e
n
o
d
e
.
F
S
I
m
a
g
e
.
r
o
l
l
E
d
i
t
L
o
g
(
F
S
I
m
a
g
e
.
j
a
v
a
:
1238
)
a
t
o
r
g
.
a
p
a
c
h
e
.
h
a
d
o
o
p
.
h
d
f
s
.
s
e
r
v
e
r
.
n
a
m
e
n
o
d
e
.
F
S
N
a
m
e
s
y
s
t
e
m
.
r
o
l
l
E
d
i
t
L
o
g
(
F
S
N
a
m
e
s
y
s
t
e
m
.
j
a
v
a
:
6344
)
a
t
o
r
g
.
a
p
a
c
h
e
.
h
a
d
o
o
p
.
h
d
f
s
.
s
e
r
v
e
r
.
n
a
m
e
n
o
d
e
.
N
a
m
e
N
o
d
e
R
p
c
S
e
r
v
e
r
.
r
o
l
l
E
d
i
t
L
o
g
(
N
a
m
e
N
o
d
e
R
p
c
S
e
r
v
e
r
.
j
a
v
a
:
933
)
a
t
o
r
g
.
a
p
a
c
h
e
.
h
a
d
o
o
p
.
h
d
f
s
.
p
r
o
t
o
c
o
l
P
B
.
N
a
m
e
n
o
d
e
P
r
o
t
o
c
o
l
S
e
r
v
e
r
S
i
d
e
T
r
a
n
s
l
a
t
o
r
P
B
.
r
o
l
l
E
d
i
t
L
o
g
(
N
a
m
e
n
o
d
e
P
r
o
t
o
c
o
l
S
e
r
v
e
r
S
i
d
e
T
r
a
n
s
l
a
t
o
r
P
B
.
j
a
v
a
:
139
)
a
t
o
r
g
.
a
p
a
c
h
e
.
h
a
d
o
o
p
.
h
d
f
s
.
p
r
o
t
o
c
o
l
.
p
r
o
t
o
.
N
a
m
e
n
o
d
e
P
r
o
t
o
c
o
l
P
r
o
t
o
s
3.apply(JournalSet.java:222) at org.apache.hadoop.hdfs.server.namenode.JournalSet.mapJournalsAndReportErrors(JournalSet.java:393) at org.apache.hadoop.hdfs.server.namenode.JournalSet.startLogSegment(JournalSet.java:219) at org.apache.hadoop.hdfs.server.namenode.FSEditLog.startLogSegment(FSEditLog.java:1192) at org.apache.hadoop.hdfs.server.namenode.FSEditLog.rollEditLog(FSEditLog.java:1161) at org.apache.hadoop.hdfs.server.namenode.FSImage.rollEditLog(FSImage.java:1238) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.rollEditLog(FSNamesystem.java:6344) at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.rollEditLog(NameNodeRpcServer.java:933) at org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolServerSideTranslatorPB.rollEditLog(NamenodeProtocolServerSideTranslatorPB.java:139) at org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos
3.apply(JournalSet.java:222)atorg.apache.hadoop.hdfs.server.namenode.JournalSet.mapJournalsAndReportErrors(JournalSet.java:393)atorg.apache.hadoop.hdfs.server.namenode.JournalSet.startLogSegment(JournalSet.java:219)atorg.apache.hadoop.hdfs.server.namenode.FSEditLog.startLogSegment(FSEditLog.java:1192)atorg.apache.hadoop.hdfs.server.namenode.FSEditLog.rollEditLog(FSEditLog.java:1161)atorg.apache.hadoop.hdfs.server.namenode.FSImage.rollEditLog(FSImage.java:1238)atorg.apache.hadoop.hdfs.server.namenode.FSNamesystem.rollEditLog(FSNamesystem.java:6344)atorg.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.rollEditLog(NameNodeRpcServer.java:933)atorg.apache.hadoop.hdfs.protocolPB.NamenodeProtocolServerSideTranslatorPB.rollEditLog(NamenodeProtocolServerSideTranslatorPB.java:139)atorg.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtosNamenodeProtocolService
2.
c
a
l
l
B
l
o
c
k
i
n
g
M
e
t
h
o
d
(
N
a
m
e
n
o
d
e
P
r
o
t
o
c
o
l
P
r
o
t
o
s
.
j
a
v
a
:
11214
)
a
t
o
r
g
.
a
p
a
c
h
e
.
h
a
d
o
o
p
.
i
p
c
.
P
r
o
t
o
b
u
f
R
p
c
E
n
g
i
n
e
2.callBlockingMethod(NamenodeProtocolProtos.java:11214) at org.apache.hadoop.ipc.ProtobufRpcEngine
2.callBlockingMethod(NamenodeProtocolProtos.java:11214)atorg.apache.hadoop.ipc.ProtobufRpcEngineServer
P
r
o
t
o
B
u
f
R
p
c
I
n
v
o
k
e
r
.
c
a
l
l
(
P
r
o
t
o
b
u
f
R
p
c
E
n
g
i
n
e
.
j
a
v
a
:
619
)
a
t
o
r
g
.
a
p
a
c
h
e
.
h
a
d
o
o
p
.
i
p
c
.
R
P
C
ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:619) at org.apache.hadoop.ipc.RPC
ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:619)atorg.apache.hadoop.ipc.RPCServer.call(RPC.java:962)
at org.apache.hadoop.ipc.Server$Handler
1.
r
u
n
(
S
e
r
v
e
r
.
j
a
v
a
:
2039
)
a
t
o
r
g
.
a
p
a
c
h
e
.
h
a
d
o
o
p
.
i
p
c
.
S
e
r
v
e
r
1.run(Server.java:2039) at org.apache.hadoop.ipc.Server
1.run(Server.java:2039)atorg.apache.hadoop.ipc.ServerHandler
1.
r
u
n
(
S
e
r
v
e
r
.
j
a
v
a
:
2035
)
a
t
j
a
v
a
.
s
e
c
u
r
i
t
y
.
A
c
c
e
s
s
C
o
n
t
r
o
l
l
e
r
.
d
o
P
r
i
v
i
l
e
g
e
d
(
N
a
t
i
v
e
M
e
t
h
o
d
)
a
t
j
a
v
a
x
.
s
e
c
u
r
i
t
y
.
a
u
t
h
.
S
u
b
j
e
c
t
.
d
o
A
s
(
S
u
b
j
e
c
t
.
j
a
v
a
:
415
)
a
t
o
r
g
.
a
p
a
c
h
e
.
h
a
d
o
o
p
.
s
e
c
u
r
i
t
y
.
U
s
e
r
G
r
o
u
p
I
n
f
o
r
m
a
t
i
o
n
.
d
o
A
s
(
U
s
e
r
G
r
o
u
p
I
n
f
o
r
m
a
t
i
o
n
.
j
a
v
a
:
1628
)
a
t
o
r
g
.
a
p
a
c
h
e
.
h
a
d
o
o
p
.
i
p
c
.
S
e
r
v
e
r
1.run(Server.java:2035) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628) at org.apache.hadoop.ipc.Server
1.run(Server.java:2035)atjava.security.AccessController.doPrivileged(NativeMethod)atjavax.security.auth.Subject.doAs(Subject.java:415)atorg.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)atorg.apache.hadoop.ipc.ServerHandler.run(Server.java:2033)
2019-07-25 15:32:27,507 INFO org.apache.hadoop.util.ExitUtil: Exiting with status 1
2019-07-25 15:32:27,520 INFO org.apache.hadoop.hdfs.server.namenode.NameNode: SHUTDOWN_MSG:
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at bdmaster/192.168.137.128
************************************************************/
问题:namenode写jounalnode时请求超时退出
解决问题:在hdfs-site.xml中增加配置
<property>
<name>dfs.qjournal.write-txns.timeout.ms</name>
<value>60000</value>
</property>