CallSite Definition
CallSite represents a place in user code. It can have a short and a long form.
private[spark] case class CallSite(shortForm: String, longForm: String)
private[spark] object CallSite {
val SHORT_FORM = "callSite.short"
val LONG_FORM = "callSite.long"
val empty = CallSite("", "")
}
To create a CallSite object, just call Utils.getCallSiste().
private val creationSite: CallSite = Utils.getCallSite()
getCallSite
/**
* When called inside a class in the spark package, returns the name of the user code class
* (outside the spark package) that called into Spark, as well as which Spark method they called.
* This is used, for example, to tell users where in their code each RDD got created.
*
* @param skipClass Function that is used to exclude non-user-code classes.
*/
def getCallSite(skipClass: String => Boolean = sparkInternalExclusionFunction): CallSite = {
// Keep crawling up the stack trace until we find the first function not inside of the spark
// package. We track the last (shallowest) contiguous Spark method. This might be an RDD
// transformation, a SparkContext function (such as parallelize), or anything else that leads
// to instantiation of an RDD. We also track the first (deepest) user method, file, and line.
var lastSparkMethod = "<unknown>"
var firstUserFile = "<unknown>"
var firstUserLine = 0
var insideSpark = true
var callStack = new ArrayBuffer[String]() :+ "<unknown>"
Thread.currentThread.getStackTrace().foreach { ste: StackTraceElement =>
// When running under some profilers, the current stack trace might contain some bogus
// frames. This is intended to ensure that we don't crash in these situations by
// ignoring any frames that we can't examine.
if (ste != null && ste.getMethodName != null
&& !ste.getMethodName.contains("getStackTrace")) {
if (insideSpark) {
if (skipClass(ste.getClassName)) {
lastSparkMethod = if (ste.getMethodName == "<init>") {
// Spark method is a constructor; get its class name
ste.getClassName.substring(ste.getClassName.lastIndexOf('.') + 1)
} else {
ste.getMethodName
}
callStack(0) = ste.toString // Put last Spark method on top of the stack trace.
} else {
if (ste.getFileName != null) {
firstUserFile = ste.getFileName
if (ste.getLineNumber >= 0) {
firstUserLine = ste.getLineNumber
}
}
callStack += ste.toString
insideSpark = false
}
} else {
callStack += ste.toString
}
}
}
val callStackDepth = System.getProperty("spark.callstack.depth", "20").toInt
val shortForm =
if (firstUserFile == "HiveSessionImpl.java") {
// To be more user friendly, show a nicer string for queries submitted from the JDBC
// server.
"Spark JDBC Server Query"
} else {
s"$lastSparkMethod at $firstUserFile:$firstUserLine"
}
val longForm = callStack.take(callStackDepth).mkString("\n")
CallSite(shortForm, longForm)
}
sparkInternalExclusionFunction
/** Default filtering function for finding call sites using `getCallSite`. */
private def sparkInternalExclusionFunction(className: String): Boolean = {
// A regular expression to match classes of the internal Spark API's
// that we want to skip when finding the call site of a method.
val SPARK_CORE_CLASS_REGEX =
"""^org\.apache\.spark(\.api\.java)?(\.util)?(\.rdd)?(\.broadcast)?\.[A-Z]""".r
val SPARK_SQL_CLASS_REGEX = """^org\.apache\.spark\.sql.*""".r
val SCALA_CORE_CLASS_PREFIX = "scala"
val isSparkClass = SPARK_CORE_CLASS_REGEX.findFirstIn(className).isDefined ||
SPARK_SQL_CLASS_REGEX.findFirstIn(className).isDefined
val isScalaClass = className.startsWith(SCALA_CORE_CLASS_PREFIX)
// If the class is a Spark internal class or a Scala class, then exclude.
isSparkClass || isScalaClass
}
Thread.getStackTrace()
/**
* Returns an array of stack trace elements representing the stack dump
* of this thread. This method will return a zero-length array if
* this thread has not started, has started but has not yet been
* scheduled to run by the system, or has terminated.
* If the returned array is of non-zero length then the first element of
* the array represents the top of the stack, which is the most recent
* method invocation in the sequence. The last element of the array
* represents the bottom of the stack, which is the least recent method
* invocation in the sequence.
*
* <p>If there is a security manager, and this thread is not
* the current thread, then the security manager's
* <tt>checkPermission</tt> method is called with a
* <tt>RuntimePermission("getStackTrace")</tt> permission
* to see if it's ok to get the stack trace.
*
* <p>Some virtual machines may, under some circumstances, omit one
* or more stack frames from the stack trace. In the extreme case,
* a virtual machine that has no stack trace information concerning
* this thread is permitted to return a zero-length array from this
* method.
*
* @return an array of <tt>StackTraceElement</tt>,
* each represents one stack frame.
*
* @throws SecurityException
* if a security manager exists and its
* <tt>checkPermission</tt> method doesn't allow
* getting the stack trace of thread.
* @see SecurityManager#checkPermission
* @see RuntimePermission
* @see Throwable#getStackTrace
*
* @since 1.5
*/
public StackTraceElement[] getStackTrace() {
if (this != Thread.currentThread()) {
// check for getStackTrace permission
SecurityManager security = System.getSecurityManager();
if (security != null) {
security.checkPermission(
SecurityConstants.GET_STACK_TRACE_PERMISSION);
}
// optimization so we do not call into the vm for threads that
// have not yet started or have terminated
if (!isAlive()) {
return EMPTY_STACK_TRACE;
}
StackTraceElement[][] stackTraceArray = dumpThreads(new Thread[] {this});
StackTraceElement[] stackTrace = stackTraceArray[0];
// a thread that was alive during the previous isAlive call may have
// since terminated, therefore not having a stacktrace.
if (stackTrace == null) {
stackTrace = EMPTY_STACK_TRACE;
}
return stackTrace;
} else {
// Don't need JVM help for current thread
return (new Exception()).getStackTrace();
}
}