Spark中保存文件为各种压缩类型。
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.hadoop.io.compress.GzipCodec
import org.apache.hadoop.io.compress.SnappyCodec
import org.apache.hadoop.io.compress.BZip2Codec
/**
* @author training
*/
object Test {
def main(args: Array[String]){
val conf = new SparkConf().setAppName("test").setMaster("local")
val sc = new SparkContext(conf)
val rdd = sc.textFile("file:/tmp/sparktest/123.txt", 1)
rdd.saveAsTextFile("file:/tmp/sparktest/123.gz", classOf[GzipCodec])
rdd.saveAsTextFile("file:/tmp/sparktest/123.snappy", classOf[SnappyCodec])
rdd.saveAsTextFile("file:/tmp/sparktest/123.bz2", classOf[BZip2Codec])
}
}