用mapPartitions函数将数据封装成Text(hadoop的数据类型),返回的是MapPartitionRDD,在调用SaveAsHadoopFile
/**
* TODO:存储数据到文件中 并指定压缩格式
* Save this RDD as a compressed text file, using string representations of elements.
*/
def saveAsTextFile(path: String, codec: Class[_ <: CompressionCodec]): Unit = withScope {
// TODO:mapPartitions 返回的是MapPartitionRDD
this.mapPartitions { iter =>
// TODO:将数据封装成Text 是hadoop的格式
val text = new Text()
iter.map { x =>
require(x != null, "text files do not allow null rows")
text.set(x.toString)
(NullWritable.get(), text)
}
// TODO:TextOutputFormat
}.saveAsHadoopFile[TextOutputFormat[NullWritable, Text]](path, codec)
}
设置key value 压缩格式等PairRDDFunctions类
/**
* Output the RDD to any Hadoop-supported file system, using a Hadoop `OutputFormat` class
* supporting the key and value types K and V in this RDD. Compress the result with the
* supplied codec.
*/
def saveAsHadoopFile[F <: OutputFormat[K, V]](
path: String,
codec: Class[_ <: CompressionCodec])(implicit fm: ClassTag[F]): Unit = self.withScope {
val runtimeClass = fm.runtimeClass
// TODO:设置路径 key类型 value类型 压缩格式等
saveAsHadoopFile(path, keyClass, valueClass, runtimeClass.asInstanceOf[Class[F]], codec)
}
/**
* Output the RDD to any Hadoop-supported file system, using a Hadoop `OutputFormat` class
* supporting the key and value types K and V in this RDD. Compress with the supplied codec.
*/
def saveAsHadoopFile(
path: String,
keyClass: Class[_],
valueClass: Class[_],
outputFormatClass: Class[_ <: OutputFormat[_, _]],
codec: Class[_ <: CompressionCodec]): Unit = self.withScope {
saveAsHadoopFile(path, keyClass, valueClass, outputFormatClass,
new JobConf(self.context.hadoopConfiguration), Option(codec))
}
/**
*