一、MultipleTextOutputFormat设置一
使用DataSet的key作为文件名称,将DataSet输出到多个文件中。
1.自定义MultipleTextOutputFormat
package code.book.batch.outputformat.scala
import org.apache.hadoop.io.NullWritable
import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat
class MultipleTextOutputFormat001[K, V] extends MultipleTextOutputFormat[K, V] {
/**
* 此方法用于产生文件名称,这里将key_DateTime直接作为文件名称
*
* @param key DataSet的key
* @param value DataSet的value
* @param name DataSet的partition的id(从1开始)
* @return file的name
*/
override def generateFileNameForKeyValue(key: K, value: V, name: String): String =
key.asInstanceOf[String]
/**
* 此方法用于产生文件内容中的key,这里文件内容中的key是就是DataSet的key
*
* @param key DataSet的key
* @param value DataSet的value
* @return file的key
*/
override def generateActualKey(key: K, value: V): K = NullWritable.get().asInstanceOf[K]
/**
* 此方法用于产生文件内容中的value,这里文件内容中的value是就是DataSet的value
*
* @param key DataSet的key
* @param value DataSet的value
* @return file的value
*/
override def generateActualValue(key: K, value: V): V = value.asInstanceOf[V]
}
2.自定义MultipleTextOutputFormat测试入口
package code.book.batch.outputformat.scala
import org.apache.flink.api.scala.hadoop.mapred.HadoopOutputFormat
import org.apache.flink.api.scala.{ExecutionEnvironment, _}
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapred.{FileOutputFormat, JobConf}
/**
* hadoop fs -text /output/flink/MultipleTextOutputFormat/scala/001/lisi
*/
object MultipleTextOutputFormat001Test {
def main(args: Array[String]) {
val env = ExecutionEnvironment.getExecutionEnvironment
val data1 = env.fromCollection(List(("zhangsan", "120"), ("lisi", "123"),
("zhangsan", "309"), ("lisi", "207"), ("wangwu", "315")))
val multipleTextOutputFormat = new MultipleTextOutputFormat001[String, String]()
val jobConf = new JobConf()
val filePath = "hdfs://qingcheng11:9000/output/flink/MultipleTextOutputFormat/scala/001"
FileOutputFormat.setOutputPath(jobConf, new Path(filePath))
val format = new HadoopOutputFormat[String, String](multipleTextOutputFormat, jobConf)
data1.output(format)
env.execute()
}
}
3.自定义MultipleTextOutputFormat执行效果
4.查看hdfs文件