问题背景
某天 跑 sparkSQL 的时候,遇到报错:
org.apache.spark.SparkException: Job aborted.
at org.apache.spark.sql.execution.datasources.FileFormatWriter
.
w
r
i
t
e
(
F
i
l
e
F
o
r
m
a
t
W
r
i
t
e
r
.
s
c
a
l
a
:
198
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
h
i
v
e
.
e
x
e
c
u
t
i
o
n
.
S
a
v
e
A
s
H
i
v
e
F
i
l
e
.write(FileFormatWriter.scala:198) at org.apache.spark.sql.hive.execution.SaveAsHiveFile
.write(FileFormatWriter.scala:198)atorg.apache.spark.sql.hive.execution.SaveAsHiveFileclass.saveAsHiveFile(SaveAsHiveFile.scala:86)
at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.saveAsHiveFile(InsertIntoHiveTable.scala:66)
at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.processInsert(InsertIntoHiveTable.scala:195)
at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.run(InsertIntoHiveTable.scala:99)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult
l
z
y
c
o
m
p
u
t
e
(
c
o
m
m
a
n
d
s
.
s
c
a
l
a
:
104
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
e
x
e
c
u
t
i
o
n
.
c
o
m
m
a
n
d
.
D
a
t
a
W
r
i
t
i
n
g
C
o
m
m
a
n
d
E
x
e
c
.
s
i
d
e
E
f
f
e
c
t
R
e
s
u
l
t
(
c
o
m
m
a
n
d
s
.
s
c
a
l
a
:
102
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
e
x
e
c
u
t
i
o
n
.
c
o
m
m
a
n
d
.
D
a
t
a
W
r
i
t
i
n
g
C
o
m
m
a
n
d
E
x
e
c
.
e
x
e
c
u
t
e
C
o
l
l
e
c
t
(
c
o
m
m
a
n
d
s
.
s
c
a
l
a
:
115
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
D
a
t
a
s
e
t
lzycompute(commands.scala:104) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:102) at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:115) at org.apache.spark.sql.Dataset
lzycompute(commands.scala:104)atorg.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:102)atorg.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:115)atorg.apache.spark.sql.Dataset$anonfun
6.
a
p
p
l
y
(
D
a
t
a
s
e
t
.
s
c
a
l
a
:
194
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
D
a
t
a
s
e
t
6.apply(Dataset.scala:194) at org.apache.spark.sql.Dataset
6.apply(Dataset.scala:194)atorg.apache.spark.sql.Dataset$anonfun
6.
a
p
p
l
y
(
D
a
t
a
s
e
t
.
s
c
a
l
a
:
194
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
D
a
t
a
s
e
t
6.apply(Dataset.scala:194) at org.apache.spark.sql.Dataset
6.apply(Dataset.scala:194)atorg.apache.spark.sql.Dataset$anonfun
52.
a
p
p
l
y
(
D
a
t
a
s
e
t
.
s
c
a
l
a
:
3370
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
e
x
e
c
u
t
i
o
n
.
S
Q
L
E
x
e
c
u
t
i
o
n
52.apply(Dataset.scala:3370) at org.apache.spark.sql.execution.SQLExecution
52.apply(Dataset.scala:3370)atorg.apache.spark.sql.execution.SQLExecution
a
n
o
n
f
u
n
anonfun
anonfunwithNewExecutionId
1.
a
p
p
l
y
(
S
Q
L
E
x
e
c
u
t
i
o
n
.
s
c
a
l
a
:
80
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
e
x
e
c
u
t
i
o
n
.
S
Q
L
E
x
e
c
u
t
i
o
n
1.apply(SQLExecution.scala:80) at org.apache.spark.sql.execution.SQLExecution
1.apply(SQLExecution.scala:80)atorg.apache.spark.sql.execution.SQLExecution.withSQLConfPropagated(SQLExecution.scala:127)
at org.apache.spark.sql.execution.SQLExecution
.
w
i
t
h
N
e
w
E
x
e
c
u
t
i
o
n
I
d
(
S
Q
L
E
x
e
c
u
t
i
o
n
.
s
c
a
l
a
:
75
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
D
a
t
a
s
e
t
.
o
r
g
.withNewExecutionId(SQLExecution.scala:75) at org.apache.spark.sql.Dataset.org
.withNewExecutionId(SQLExecution.scala:75)atorg.apache.spark.sql.Dataset.orgapache
s
p
a
r
k
spark
sparksql
D
a
t
a
s
e
t
Dataset
Dataset
w
i
t
h
A
c
t
i
o
n
(
D
a
t
a
s
e
t
.
s
c
a
l
a
:
3369
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
D
a
t
a
s
e
t
.
<
i
n
i
t
>
(
D
a
t
a
s
e
t
.
s
c
a
l
a
:
194
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
D
a
t
a
s
e
t
withAction(Dataset.scala:3369) at org.apache.spark.sql.Dataset.<init>(Dataset.scala:194) at org.apache.spark.sql.Dataset
withAction(Dataset.scala:3369)atorg.apache.spark.sql.Dataset.<init>(Dataset.scala:194)atorg.apache.spark.sql.Dataset.ofRows(Dataset.scala:79)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:643)
at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:694)
at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:62)
at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:371)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376)
at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver
.
m
a
i
n
(
S
p
a
r
k
S
Q
L
C
L
I
D
r
i
v
e
r
.
s
c
a
l
a
:
274
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
h
i
v
e
.
t
h
r
i
f
t
s
e
r
v
e
r
.
S
p
a
r
k
S
Q
L
C
L
I
D
r
i
v
e
r
.
m
a
i
n
(
S
p
a
r
k
S
Q
L
C
L
I
D
r
i
v
e
r
.
s
c
a
l
a
)
a
t
s
u
n
.
r
e
f
l
e
c
t
.
N
a
t
i
v
e
M
e
t
h
o
d
A
c
c
e
s
s
o
r
I
m
p
l
.
i
n
v
o
k
e
0
(
N
a
t
i
v
e
M
e
t
h
o
d
)
a
t
s
u
n
.
r
e
f
l
e
c
t
.
N
a
t
i
v
e
M
e
t
h
o
d
A
c
c
e
s
s
o
r
I
m
p
l
.
i
n
v
o
k
e
(
N
a
t
i
v
e
M
e
t
h
o
d
A
c
c
e
s
s
o
r
I
m
p
l
.
j
a
v
a
:
62
)
a
t
s
u
n
.
r
e
f
l
e
c
t
.
D
e
l
e
g
a
t
i
n
g
M
e
t
h
o
d
A
c
c
e
s
s
o
r
I
m
p
l
.
i
n
v
o
k
e
(
D
e
l
e
g
a
t
i
n
g
M
e
t
h
o
d
A
c
c
e
s
s
o
r
I
m
p
l
.
j
a
v
a
:
43
)
a
t
j
a
v
a
.
l
a
n
g
.
r
e
f
l
e
c
t
.
M
e
t
h
o
d
.
i
n
v
o
k
e
(
M
e
t
h
o
d
.
j
a
v
a
:
498
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
d
e
p
l
o
y
.
J
a
v
a
M
a
i
n
A
p
p
l
i
c
a
t
i
o
n
.
s
t
a
r
t
(
S
p
a
r
k
A
p
p
l
i
c
a
t
i
o
n
.
s
c
a
l
a
:
52
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
d
e
p
l
o
y
.
S
p
a
r
k
S
u
b
m
i
t
.
o
r
g
.main(SparkSQLCLIDriver.scala:274) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) at org.apache.spark.deploy.SparkSubmit.org
.main(SparkSQLCLIDriver.scala:274)atorg.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)atsun.reflect.NativeMethodAccessorImpl.invoke0(NativeMethod)atsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)atsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)atjava.lang.reflect.Method.invoke(Method.java:498)atorg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)atorg.apache.spark.deploy.SparkSubmit.orgapache
s
p
a
r
k
spark
sparkdeploy
S
p
a
r
k
S
u
b
m
i
t
SparkSubmit
SparkSubmit$runMain(SparkSubmit.scala:845)
at org.apache.spark.deploy.SparkSubmit.doRunMain
1
(
S
p
a
r
k
S
u
b
m
i
t
.
s
c
a
l
a
:
161
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
d
e
p
l
o
y
.
S
p
a
r
k
S
u
b
m
i
t
.
s
u
b
m
i
t
(
S
p
a
r
k
S
u
b
m
i
t
.
s
c
a
l
a
:
184
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
d
e
p
l
o
y
.
S
p
a
r
k
S
u
b
m
i
t
.
d
o
S
u
b
m
i
t
(
S
p
a
r
k
S
u
b
m
i
t
.
s
c
a
l
a
:
86
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
d
e
p
l
o
y
.
S
p
a
r
k
S
u
b
m
i
t
1(SparkSubmit.scala:161) at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184) at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86) at org.apache.spark.deploy.SparkSubmit
1(SparkSubmit.scala:161)atorg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)atorg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)atorg.apache.spark.deploy.SparkSubmit$anon
2.
d
o
S
u
b
m
i
t
(
S
p
a
r
k
S
u
b
m
i
t
.
s
c
a
l
a
:
920
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
d
e
p
l
o
y
.
S
p
a
r
k
S
u
b
m
i
t
2.doSubmit(SparkSubmit.scala:920) at org.apache.spark.deploy.SparkSubmit
2.doSubmit(SparkSubmit.scala:920)atorg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala:929)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 6 in stage 16.0 failed 4 times, most recent failure: Lost task 6.3 in stage 16.0 (TID 478, idc-sql-dms-13, executor 40): ExecutorLostFailure (executor 40 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding memory limits. 11.8 GB of 11 GB physical memory used. Consider boosting spark.yarn.executor.memoryOverhead or disabling yarn.nodemanager.vmem-check-enabled because of YARN-4714.
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org
a
p
a
c
h
e
apache
apachespark
s
c
h
e
d
u
l
e
r
scheduler
schedulerDAGScheduler
f
a
i
l
J
o
b
A
n
d
I
n
d
e
p
e
n
d
e
n
t
S
t
a
g
e
s
(
D
A
G
S
c
h
e
d
u
l
e
r
.
s
c
a
l
a
:
1925
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
c
h
e
d
u
l
e
r
.
D
A
G
S
c
h
e
d
u
l
e
r
failJobAndIndependentStages(DAGScheduler.scala:1925) at org.apache.spark.scheduler.DAGScheduler
failJobAndIndependentStages(DAGScheduler.scala:1925)atorg.apache.spark.scheduler.DAGScheduleranonfun$abortStage
1.
a
p
p
l
y
(
D
A
G
S
c
h
e
d
u
l
e
r
.
s
c
a
l
a
:
1913
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
c
h
e
d
u
l
e
r
.
D
A
G
S
c
h
e
d
u
l
e
r
1.apply(DAGScheduler.scala:1913) at org.apache.spark.scheduler.DAGScheduler
1.apply(DAGScheduler.scala:1913)atorg.apache.spark.scheduler.DAGScheduler
a
n
o
n
f
u
n
anonfun
anonfunabortStage
1.
a
p
p
l
y
(
D
A
G
S
c
h
e
d
u
l
e
r
.
s
c
a
l
a
:
1912
)
a
t
s
c
a
l
a
.
c
o
l
l
e
c
t
i
o
n
.
m
u
t
a
b
l
e
.
R
e
s
i
z
a
b
l
e
A
r
r
a
y
1.apply(DAGScheduler.scala:1912) at scala.collection.mutable.ResizableArray
1.apply(DAGScheduler.scala:1912)atscala.collection.mutable.ResizableArrayclass.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1912)
at org.apache.spark.scheduler.DAGSchedulerKaTeX parse error: Can't use function '$' in math mode at position 8: anonfun$̲handleTaskSetFa…anonfun$handleTaskSetFailed
1.
a
p
p
l
y
(
D
A
G
S
c
h
e
d
u
l
e
r
.
s
c
a
l
a
:
948
)
a
t
s
c
a
l
a
.
O
p
t
i
o
n
.
f
o
r
e
a
c
h
(
O
p
t
i
o
n
.
s
c
a
l
a
:
257
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
c
h
e
d
u
l
e
r
.
D
A
G
S
c
h
e
d
u
l
e
r
.
h
a
n
d
l
e
T
a
s
k
S
e
t
F
a
i
l
e
d
(
D
A
G
S
c
h
e
d
u
l
e
r
.
s
c
a
l
a
:
948
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
c
h
e
d
u
l
e
r
.
D
A
G
S
c
h
e
d
u
l
e
r
E
v
e
n
t
P
r
o
c
e
s
s
L
o
o
p
.
d
o
O
n
R
e
c
e
i
v
e
(
D
A
G
S
c
h
e
d
u
l
e
r
.
s
c
a
l
a
:
2146
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
c
h
e
d
u
l
e
r
.
D
A
G
S
c
h
e
d
u
l
e
r
E
v
e
n
t
P
r
o
c
e
s
s
L
o
o
p
.
o
n
R
e
c
e
i
v
e
(
D
A
G
S
c
h
e
d
u
l
e
r
.
s
c
a
l
a
:
2095
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
c
h
e
d
u
l
e
r
.
D
A
G
S
c
h
e
d
u
l
e
r
E
v
e
n
t
P
r
o
c
e
s
s
L
o
o
p
.
o
n
R
e
c
e
i
v
e
(
D
A
G
S
c
h
e
d
u
l
e
r
.
s
c
a
l
a
:
2084
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
u
t
i
l
.
E
v
e
n
t
L
o
o
p
1.apply(DAGScheduler.scala:948) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:948) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2146) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2095) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2084) at org.apache.spark.util.EventLoop
1.apply(DAGScheduler.scala:948)atscala.Option.foreach(Option.scala:257)atorg.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:948)atorg.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2146)atorg.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2095)atorg.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2084)atorg.apache.spark.util.EventLoop$anon
1.
r
u
n
(
E
v
e
n
t
L
o
o
p
.
s
c
a
l
a
:
49
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
c
h
e
d
u
l
e
r
.
D
A
G
S
c
h
e
d
u
l
e
r
.
r
u
n
J
o
b
(
D
A
G
S
c
h
e
d
u
l
e
r
.
s
c
a
l
a
:
759
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
S
p
a
r
k
C
o
n
t
e
x
t
.
r
u
n
J
o
b
(
S
p
a
r
k
C
o
n
t
e
x
t
.
s
c
a
l
a
:
2061
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
e
x
e
c
u
t
i
o
n
.
d
a
t
a
s
o
u
r
c
e
s
.
F
i
l
e
F
o
r
m
a
t
W
r
i
t
e
r
1.run(EventLoop.scala:49) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:759) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061) at org.apache.spark.sql.execution.datasources.FileFormatWriter
1.run(EventLoop.scala:49)atorg.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:759)atorg.apache.spark.SparkContext.runJob(SparkContext.scala:2061)atorg.apache.spark.sql.execution.datasources.FileFormatWriter.write(FileFormatWriter.scala:167)
… 35 more
org.apache.spark.SparkException: Job aborted.
at org.apache.spark.sql.execution.datasources.FileFormatWriter
.
w
r
i
t
e
(
F
i
l
e
F
o
r
m
a
t
W
r
i
t
e
r
.
s
c
a
l
a
:
198
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
h
i
v
e
.
e
x
e
c
u
t
i
o
n
.
S
a
v
e
A
s
H
i
v
e
F
i
l
e
.write(FileFormatWriter.scala:198) at org.apache.spark.sql.hive.execution.SaveAsHiveFile
.write(FileFormatWriter.scala:198)atorg.apache.spark.sql.hive.execution.SaveAsHiveFileclass.saveAsHiveFile(SaveAsHiveFile.scala:86)
at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.saveAsHiveFile(InsertIntoHiveTable.scala:66)
at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.processInsert(InsertIntoHiveTable.scala:195)
at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.run(InsertIntoHiveTable.scala:99)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult
l
z
y
c
o
m
p
u
t
e
(
c
o
m
m
a
n
d
s
.
s
c
a
l
a
:
104
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
e
x
e
c
u
t
i
o
n
.
c
o
m
m
a
n
d
.
D
a
t
a
W
r
i
t
i
n
g
C
o
m
m
a
n
d
E
x
e
c
.
s
i
d
e
E
f
f
e
c
t
R
e
s
u
l
t
(
c
o
m
m
a
n
d
s
.
s
c
a
l
a
:
102
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
e
x
e
c
u
t
i
o
n
.
c
o
m
m
a
n
d
.
D
a
t
a
W
r
i
t
i
n
g
C
o
m
m
a
n
d
E
x
e
c
.
e
x
e
c
u
t
e
C
o
l
l
e
c
t
(
c
o
m
m
a
n
d
s
.
s
c
a
l
a
:
115
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
D
a
t
a
s
e
t
lzycompute(commands.scala:104) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:102) at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:115) at org.apache.spark.sql.Dataset
lzycompute(commands.scala:104)atorg.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:102)atorg.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:115)atorg.apache.spark.sql.Dataset$anonfun
6.
a
p
p
l
y
(
D
a
t
a
s
e
t
.
s
c
a
l
a
:
194
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
D
a
t
a
s
e
t
6.apply(Dataset.scala:194) at org.apache.spark.sql.Dataset
6.apply(Dataset.scala:194)atorg.apache.spark.sql.Dataset$anonfun
6.
a
p
p
l
y
(
D
a
t
a
s
e
t
.
s
c
a
l
a
:
194
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
D
a
t
a
s
e
t
6.apply(Dataset.scala:194) at org.apache.spark.sql.Dataset
6.apply(Dataset.scala:194)atorg.apache.spark.sql.Dataset$anonfun
52.
a
p
p
l
y
(
D
a
t
a
s
e
t
.
s
c
a
l
a
:
3370
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
e
x
e
c
u
t
i
o
n
.
S
Q
L
E
x
e
c
u
t
i
o
n
52.apply(Dataset.scala:3370) at org.apache.spark.sql.execution.SQLExecution
52.apply(Dataset.scala:3370)atorg.apache.spark.sql.execution.SQLExecution
a
n
o
n
f
u
n
anonfun
anonfunwithNewExecutionId
1.
a
p
p
l
y
(
S
Q
L
E
x
e
c
u
t
i
o
n
.
s
c
a
l
a
:
80
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
e
x
e
c
u
t
i
o
n
.
S
Q
L
E
x
e
c
u
t
i
o
n
1.apply(SQLExecution.scala:80) at org.apache.spark.sql.execution.SQLExecution
1.apply(SQLExecution.scala:80)atorg.apache.spark.sql.execution.SQLExecution.withSQLConfPropagated(SQLExecution.scala:127)
at org.apache.spark.sql.execution.SQLExecution
.
w
i
t
h
N
e
w
E
x
e
c
u
t
i
o
n
I
d
(
S
Q
L
E
x
e
c
u
t
i
o
n
.
s
c
a
l
a
:
75
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
D
a
t
a
s
e
t
.
o
r
g
.withNewExecutionId(SQLExecution.scala:75) at org.apache.spark.sql.Dataset.org
.withNewExecutionId(SQLExecution.scala:75)atorg.apache.spark.sql.Dataset.orgapache
s
p
a
r
k
spark
sparksql
D
a
t
a
s
e
t
Dataset
Dataset
w
i
t
h
A
c
t
i
o
n
(
D
a
t
a
s
e
t
.
s
c
a
l
a
:
3369
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
D
a
t
a
s
e
t
.
<
i
n
i
t
>
(
D
a
t
a
s
e
t
.
s
c
a
l
a
:
194
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
D
a
t
a
s
e
t
withAction(Dataset.scala:3369) at org.apache.spark.sql.Dataset.<init>(Dataset.scala:194) at org.apache.spark.sql.Dataset
withAction(Dataset.scala:3369)atorg.apache.spark.sql.Dataset.<init>(Dataset.scala:194)atorg.apache.spark.sql.Dataset.ofRows(Dataset.scala:79)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:643)
at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:694)
at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:62)
at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:371)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376)
at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver
.
m
a
i
n
(
S
p
a
r
k
S
Q
L
C
L
I
D
r
i
v
e
r
.
s
c
a
l
a
:
274
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
h
i
v
e
.
t
h
r
i
f
t
s
e
r
v
e
r
.
S
p
a
r
k
S
Q
L
C
L
I
D
r
i
v
e
r
.
m
a
i
n
(
S
p
a
r
k
S
Q
L
C
L
I
D
r
i
v
e
r
.
s
c
a
l
a
)
a
t
s
u
n
.
r
e
f
l
e
c
t
.
N
a
t
i
v
e
M
e
t
h
o
d
A
c
c
e
s
s
o
r
I
m
p
l
.
i
n
v
o
k
e
0
(
N
a
t
i
v
e
M
e
t
h
o
d
)
a
t
s
u
n
.
r
e
f
l
e
c
t
.
N
a
t
i
v
e
M
e
t
h
o
d
A
c
c
e
s
s
o
r
I
m
p
l
.
i
n
v
o
k
e
(
N
a
t
i
v
e
M
e
t
h
o
d
A
c
c
e
s
s
o
r
I
m
p
l
.
j
a
v
a
:
62
)
a
t
s
u
n
.
r
e
f
l
e
c
t
.
D
e
l
e
g
a
t
i
n
g
M
e
t
h
o
d
A
c
c
e
s
s
o
r
I
m
p
l
.
i
n
v
o
k
e
(
D
e
l
e
g
a
t
i
n
g
M
e
t
h
o
d
A
c
c
e
s
s
o
r
I
m
p
l
.
j
a
v
a
:
43
)
a
t
j
a
v
a
.
l
a
n
g
.
r
e
f
l
e
c
t
.
M
e
t
h
o
d
.
i
n
v
o
k
e
(
M
e
t
h
o
d
.
j
a
v
a
:
498
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
d
e
p
l
o
y
.
J
a
v
a
M
a
i
n
A
p
p
l
i
c
a
t
i
o
n
.
s
t
a
r
t
(
S
p
a
r
k
A
p
p
l
i
c
a
t
i
o
n
.
s
c
a
l
a
:
52
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
d
e
p
l
o
y
.
S
p
a
r
k
S
u
b
m
i
t
.
o
r
g
.main(SparkSQLCLIDriver.scala:274) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) at org.apache.spark.deploy.SparkSubmit.org
.main(SparkSQLCLIDriver.scala:274)atorg.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)atsun.reflect.NativeMethodAccessorImpl.invoke0(NativeMethod)atsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)atsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)atjava.lang.reflect.Method.invoke(Method.java:498)atorg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)atorg.apache.spark.deploy.SparkSubmit.orgapache
s
p
a
r
k
spark
sparkdeploy
S
p
a
r
k
S
u
b
m
i
t
SparkSubmit
SparkSubmit$runMain(SparkSubmit.scala:845)
at org.apache.spark.deploy.SparkSubmit.doRunMain
1
(
S
p
a
r
k
S
u
b
m
i
t
.
s
c
a
l
a
:
161
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
d
e
p
l
o
y
.
S
p
a
r
k
S
u
b
m
i
t
.
s
u
b
m
i
t
(
S
p
a
r
k
S
u
b
m
i
t
.
s
c
a
l
a
:
184
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
d
e
p
l
o
y
.
S
p
a
r
k
S
u
b
m
i
t
.
d
o
S
u
b
m
i
t
(
S
p
a
r
k
S
u
b
m
i
t
.
s
c
a
l
a
:
86
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
d
e
p
l
o
y
.
S
p
a
r
k
S
u
b
m
i
t
1(SparkSubmit.scala:161) at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184) at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86) at org.apache.spark.deploy.SparkSubmit
1(SparkSubmit.scala:161)atorg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)atorg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)atorg.apache.spark.deploy.SparkSubmit$anon
2.
d
o
S
u
b
m
i
t
(
S
p
a
r
k
S
u
b
m
i
t
.
s
c
a
l
a
:
920
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
d
e
p
l
o
y
.
S
p
a
r
k
S
u
b
m
i
t
2.doSubmit(SparkSubmit.scala:920) at org.apache.spark.deploy.SparkSubmit
2.doSubmit(SparkSubmit.scala:920)atorg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala:929)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 6 in stage 16.0 failed 4 times, most recent failure: Lost task 6.3 in stage 16.0 (TID 478, idc-sql-dms-13, executor 40): ExecutorLostFailure (executor 40 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding memory limits. 11.8 GB of 11 GB physical memory used. Consider boosting spark.yarn.executor.memoryOverhead or disabling yarn.nodemanager.vmem-check-enabled because of YARN-4714.
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org
a
p
a
c
h
e
apache
apachespark
s
c
h
e
d
u
l
e
r
scheduler
schedulerDAGScheduler
f
a
i
l
J
o
b
A
n
d
I
n
d
e
p
e
n
d
e
n
t
S
t
a
g
e
s
(
D
A
G
S
c
h
e
d
u
l
e
r
.
s
c
a
l
a
:
1925
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
c
h
e
d
u
l
e
r
.
D
A
G
S
c
h
e
d
u
l
e
r
failJobAndIndependentStages(DAGScheduler.scala:1925) at org.apache.spark.scheduler.DAGScheduler
failJobAndIndependentStages(DAGScheduler.scala:1925)atorg.apache.spark.scheduler.DAGScheduleranonfun$abortStage
1.
a
p
p
l
y
(
D
A
G
S
c
h
e
d
u
l
e
r
.
s
c
a
l
a
:
1913
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
c
h
e
d
u
l
e
r
.
D
A
G
S
c
h
e
d
u
l
e
r
1.apply(DAGScheduler.scala:1913) at org.apache.spark.scheduler.DAGScheduler
1.apply(DAGScheduler.scala:1913)atorg.apache.spark.scheduler.DAGScheduler
a
n
o
n
f
u
n
anonfun
anonfunabortStage
1.
a
p
p
l
y
(
D
A
G
S
c
h
e
d
u
l
e
r
.
s
c
a
l
a
:
1912
)
a
t
s
c
a
l
a
.
c
o
l
l
e
c
t
i
o
n
.
m
u
t
a
b
l
e
.
R
e
s
i
z
a
b
l
e
A
r
r
a
y
1.apply(DAGScheduler.scala:1912) at scala.collection.mutable.ResizableArray
1.apply(DAGScheduler.scala:1912)atscala.collection.mutable.ResizableArrayclass.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1912)
at org.apache.spark.scheduler.DAGSchedulerKaTeX parse error: Can't use function '$' in math mode at position 8: anonfun$̲handleTaskSetFa…anonfun$handleTaskSetFailed
1.
a
p
p
l
y
(
D
A
G
S
c
h
e
d
u
l
e
r
.
s
c
a
l
a
:
948
)
a
t
s
c
a
l
a
.
O
p
t
i
o
n
.
f
o
r
e
a
c
h
(
O
p
t
i
o
n
.
s
c
a
l
a
:
257
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
c
h
e
d
u
l
e
r
.
D
A
G
S
c
h
e
d
u
l
e
r
.
h
a
n
d
l
e
T
a
s
k
S
e
t
F
a
i
l
e
d
(
D
A
G
S
c
h
e
d
u
l
e
r
.
s
c
a
l
a
:
948
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
c
h
e
d
u
l
e
r
.
D
A
G
S
c
h
e
d
u
l
e
r
E
v
e
n
t
P
r
o
c
e
s
s
L
o
o
p
.
d
o
O
n
R
e
c
e
i
v
e
(
D
A
G
S
c
h
e
d
u
l
e
r
.
s
c
a
l
a
:
2146
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
c
h
e
d
u
l
e
r
.
D
A
G
S
c
h
e
d
u
l
e
r
E
v
e
n
t
P
r
o
c
e
s
s
L
o
o
p
.
o
n
R
e
c
e
i
v
e
(
D
A
G
S
c
h
e
d
u
l
e
r
.
s
c
a
l
a
:
2095
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
c
h
e
d
u
l
e
r
.
D
A
G
S
c
h
e
d
u
l
e
r
E
v
e
n
t
P
r
o
c
e
s
s
L
o
o
p
.
o
n
R
e
c
e
i
v
e
(
D
A
G
S
c
h
e
d
u
l
e
r
.
s
c
a
l
a
:
2084
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
u
t
i
l
.
E
v
e
n
t
L
o
o
p
1.apply(DAGScheduler.scala:948) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:948) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2146) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2095) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2084) at org.apache.spark.util.EventLoop
1.apply(DAGScheduler.scala:948)atscala.Option.foreach(Option.scala:257)atorg.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:948)atorg.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2146)atorg.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2095)atorg.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2084)atorg.apache.spark.util.EventLoop$anon
1.
r
u
n
(
E
v
e
n
t
L
o
o
p
.
s
c
a
l
a
:
49
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
c
h
e
d
u
l
e
r
.
D
A
G
S
c
h
e
d
u
l
e
r
.
r
u
n
J
o
b
(
D
A
G
S
c
h
e
d
u
l
e
r
.
s
c
a
l
a
:
759
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
S
p
a
r
k
C
o
n
t
e
x
t
.
r
u
n
J
o
b
(
S
p
a
r
k
C
o
n
t
e
x
t
.
s
c
a
l
a
:
2061
)
a
t
o
r
g
.
a
p
a
c
h
e
.
s
p
a
r
k
.
s
q
l
.
e
x
e
c
u
t
i
o
n
.
d
a
t
a
s
o
u
r
c
e
s
.
F
i
l
e
F
o
r
m
a
t
W
r
i
t
e
r
1.run(EventLoop.scala:49) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:759) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061) at org.apache.spark.sql.execution.datasources.FileFormatWriter
1.run(EventLoop.scala:49)atorg.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:759)atorg.apache.spark.SparkContext.runJob(SparkContext.scala:2061)atorg.apache.spark.sql.execution.datasources.FileFormatWriter.write(FileFormatWriter.scala:167)
… 35 more
通过上面的日志,大概了解到任务失败的原因应该是内存超过限定。 “Container killed by YARN for exceeding memory limits. ”,解决问题的第一思路是 sql 能不能优化下,加内存属于下下策。
解决办法
先讲一下原来的 sql 思路:
SELECT a.name, a.age, b.alias
from a
left join (
SELECT id, concat_ws(',', COLLECT_LIST(alias)) alias
from bb
group by id
) b
这是一个很简单的逻辑,猜测问题应该出现在 collect_ws() 函数, 当 b 表根据 id 聚合的时候,如果大量的数据 加载到 list (COLLECT_LIST)里面,将导致内存耗尽。
解决思路 应该是先把重复数据去掉,再调用 concat_ws(’,’ , COLLECT_LIST(alias)),优化后的sql 如下:
SELECT a.name, a.age, b.alias
from a
left join (
SELECT id, collect_ws(',', COLLECT_LIST(alias)) alias
from (
SELECT id, alias
from bb
group by id, alias
)
group by id
) b
还有更简单的一种写法,就是使用 COLLECT_SET 代替 COLLECT_LIST:
SELECT a.name, a.age, b.alias
from a
left join (
SELECT id, collect_ws(',', COLLECT_SET(alias)) alias
from bb
group by id
) b
哈哈,问题解决!