1. Spark-shell 启动选择hive 还是in-memory?
在使用spark-shell时,进一步使用dataframe进行sql处理,
报错:HiveMetaStoreClient:Failed to connect to the MetaStore Server
spark-shell在默认启动的时候会选择Hive做为SqlContext的默认SessionCatalog,所谓catalog就是spark中对表资源进行管理的标准api集合。
如果想使用in-memory的方式 ,可以在spark-shell启动时指定参数
spark-shell --conf spark.sql.catalogImplementation=in-memory
顺便研究一下启动过程:
在SparkSession中
//from StaticSqlConf.scala
val CATALOG_IMPLEMENTATION = buildStaticConf("spark.sql.catalogImplementation")
.internal()
.stringConf
.checkValues(Set("hive", "in-memory"))
.createWithDefault("in-memory")
// from SparkSession.scala
private def sessionStateClassName(conf: SparkConf): String = {
conf.get(CATALOG_IMPLEMENTATION) match {
case "hive" => HIVE_SESSION_STATE_BUILDER_CLASS_NAME
case "in-memory" => classOf[SessionStateBuilder].getCanonicalName
}
}
/**
* Helper method to create an instance of `SessionState` based on `className` from conf.
* The result is either `SessionState` or a Hive based `SessionState`.
*/
private def instantiateSessionState(
className: String,
sparkSession: SparkSession): SessionState = {
try {
// invoke `new [Hive]SessionStateBuilder(SparkSession, Option[SessionState])`
val clazz = Utils.classForName(className)
val ctor = clazz.getConstructors.head
ctor.newInstance(sparkSession, None).asInstanceOf[BaseSessionStateBuilder].build()
} catch {
case NonFatal(e) =>
throw new IllegalArgumentException(s"Error while instantiating '$className':", e)
}
}
/**
* State shared across sessions, including the `SparkContext`, cached data, listener,
* and a catalog that interacts with external systems.
*
* This is internal to Spark and there is no guarantee on interface stability.
*
* @since 2.2.0
*/
@InterfaceStability.Unstable
@transient
lazy val sharedState: SharedState = {
existingSharedState.getOrElse(new Sha