1、对应源码位置
在接口FileScan的partitions方法中
org.apache.spark.sql.execution.datasources.v2.FileScan#partitions:
2、生效原理
2.1、关键方法之 partitions
org.apache.spark.sql.execution.datasources.v2.FileScan#partitions:
protected def partitions: Seq[FilePartition] = {
val selectedPartitions = fileIndex.listFiles(partitionFilters, dataFilters)
// 下面这行代码底层会读取spark.sql.files.maxPartitionBytes参数配置
val maxSplitBytes = FilePartition.maxSplitBytes(sparkSession, selectedPartitions)
val partitionAttributes = fileIndex.partitionSchema.toAttributes
val attributeMap = partitionAttributes.map(a => normalizeName(a.name) -> a).toMap
val readPartitionAttributes = readPartitionSchema.map {
readField =>
attributeMap.getOrElse(normalizeName(readField.name),
throw QueryCompilationErrors.cannotFindPartitionColumnInPartitionSchemaError(
readField, fileIndex.partitionSchema)
)
}
lazy val partitionValueProject =
GenerateUnsafeProjection