通常在一个流式计算的主流程里,会用到很多映射数据,比较常见的是Text文档,但是文档读进来之后还要匹配相应的schema,本文通过自定义TextSource数据源,自动读取默认的Schema。
DefaultSource.scala
package com.wxx.bigdata.sql_custome_source
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.sources.{BaseRelation, RelationProvider, SchemaRelationProvider}
import org.apache.spark.sql.types.StructType
class DefaultSource extends RelationProvider with SchemaRelationProvider{
def createRelation(sqlContext: SQLContext,
parameters: Map[String, String],
schema: StructType) :BaseRelation = {
val path = parameters.get("path")
path match {
case Some(p) => new TextDatasourceRelation(sqlContext, p, schema)
case _ => throw new IllegalArgumentException("path is required")
}
}
override def createRelation(sqlContext: SQLContext, parameters: Map[String, String]) :Ba