实现 spark DataSourceV2 的几个环节

继承 DataSourceV2
class SimpleWritableDataSource extends DataSourceV2 with ReadSupport with WriteSupport {

	override def createReader()

	override def createWriter()

}
构造 DataSourceReader
class Reader(path: String, conf: Configuration) extends DataSourceReader
{
	/**
   * Returns the actual schema of this data source reader, which may be different from the physical
   * schema of the underlying storage, as column pruning or other optimizations may happen.
   *
	override def readSchema()
	/**
   * Returns a list of reader factories. Each factory is responsible for creating a data reader to
   * output data for one RDD partition. That means the number of factories returned here is same as
   * the number of RDD partitions this scan outputs.
	override def createDataReaderFactories()
}

构造 DataReaderFactory 、DataReader
class SimpleCSVDataReaderFactory(path: String, conf: SerializableConfiguration)
  extends DataReaderFactory[Row] with DataReader[Row] 
{
	/**
   * Returns a data reader to do the actual reading work.
   *
	override def createDataReader(): DataReader[Row]
}

构造 DataSourceWriter
class Writer(jobId: String, path: String, conf: Configuration) extends DataSourceWriter {
	/**
   * Creates a writer factory which will be serialized and sent to executors.
   *
	override def createWriterFactory(): DataWriterFactory[Row]
	
	override def commit(messages: Array[WriterCommitMessage]): Unit
	
	override def abort(messages: Array[WriterCommitMessage]): Unit
}
构造 DataWriterFactory
class SimpleCSVDataWriterFactory(path: String, jobId: String, conf: SerializableConfiguration)
  extends DataWriterFactory[Row] {
/**
   * Returns a data writer to do the actual writing work.
   *
  override def createDataWriter(partitionId: Int, attemptNumber: Int): DataWriter[Row] = {

  }
}
构造 DataWriter
class SimpleCSVDataWriter(fs: FileSystem, file: Path) extends DataWriter[Row] {


}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值