HadoopRDD getPartitions returns the splits. In other words,
If the size of file smaller than the default block size return 1 for the file.
if the size of file larger than the default block size, then returns file_size/default_block_size + ((file_size) % default_block_size ==0 ? 0: 1)
It at least return minPartitions(default 2). If the file number is 1 and the file smaller than the default block size, it returns minPartitions.
override def getPartitions: Array[Partition] = {
val jobConf = getJobConf()
// add the credentials here as this can be called before SparkContext initialized
SparkHadoopUtil.get.addCredentials(jobConf)
val inputFormat = getInputFormat(jobConf)
val inputSplits = inputFormat.getSplits(jobConf, minPartitions)
val array = new Array[Partition](inputSplits.size)
for (i <- 0 until inputSplits.size) {
array(i) = new HadoopPartition(id, i, inputSplits(i))
}
array
}