def main(args: Array[String]): Unit = { val spark: SparkSession = SparkSession .builder() .master("local[*]") .appName("Test") .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") // .enableHiveSupport() .getOrCreate() import spark.implicits._ var user1 = Seq(("A1","10","张三","上海"),("A2","20","李四","北京"), ("A3","30","王五","南京")).toDF("id","age","name","address") var user2 = Seq(("A2","张飞"),("A3","李逵")).toDF("id","name").toDF() var df: DataFrame = user1.join(user2,user1("id")===user2("id"),"left")//.show() df = renameDataFrame(df,spark) df.show()
}
def renameDataFrame(df:DataFrame,spark:SparkSession): DataFrame ={ val schema: StructType = df.schema val arrSchema = ArrayBuffer[StructField]() val map = Map[String,Int]() schema.foreach(r=>{ val schName = r.name if(map.isEmpty){ map += schName -> 1 arrSchema.append(StructField(schName,r.dataType,true)) }else{ if(map.contains(schName)){ val num = map.get(schName).getOrElse(0) map += schName -> (num+1) arrSchema.append(StructField(schName+"_"+num,r.dataType,true)) }else{ map += schName -> 1 arrSchema.append(StructField(schName,r.dataType,true)) } } }) val mySchema: StructType = StructType(arrSchema) // println(mySchema) spark.createDataFrame(df.rdd,mySchema) }