org.apache.spark.repl.Main.interp.command("""
class MySchemaRDD(rdd:org.apache.spark.sql.SchemaRDD) extends java.io.Serializable {
def go() = {
var startstr = ""
var endstr = RECORD_SEPERATOR
val result = rdd.collect
result.foreach( x =>
print(x.mkString(startstr,FIELD_SEPERATOR,endstr))
)
}
def saveto(output: String) = {
import org.apache.hadoop.io.{NullWritable,Text}
var startstr = ""
var endstr = RECORD_SEPERATOR
if(output.startsWith("hdfs:")) {
val outputpath = AutoFileUtil.regularFile(output)
FileUtil.deletePath(outputpath)
rdd.map(x =>
(NullWritable.get(), new Text(x.mkString(FIELD_SEPERATOR)))
).saveAsHadoopFile[
org.apache.hadoop.mapred.TextOutputFormat[NullWritable, Text]
spark定制之三:MySchemaRDD
最新推荐文章于 2023-07-29 09:51:56 发布