实现的功能:按不同的key写到不同的文件名
其中data为kv型的Rdd
data.partitionBy(new HashPartitioner(4)).saveAsHadoopFile(outputPath, classOf[String], classOf[String],
classOf[RDDMultipleTextOutputFormat])
RDDMultipleTextOutputFormat自定义类
import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat
import org.apache.hadoop.io.{BytesWritable, NullWritable, Text}
class RDDMultipleTextOutputFormat extends MultipleTextOutputFormat[Any, Any] {
//key值不在文件内容生成
override def generateActualKey(key: Any, value: Any): NullWritable=
NullWritable.get()
//.asInstanceOf[NullWritable]
override def generateFileNameForKeyValue(key: Any, value: Any, name: String): String =
key.asInstanceOf[String]
// "/r_"+ key.asInstanceOf[String]+"/"+ key.asInstanceOf[String]
}