前言
将两个相同列名的表进行合并,
采用 unionByName
:合并时按照列名进行合并,而不是位置
程序
import org.apache.spark.sql.SparkSession
object test_unionbyname {
val isLocal = true
def main(args: Array[String]): Unit = {
val spark = apply (this.getClass.getSimpleName,isLocal)
import spark.implicits._
val df_table1 = spark.createDataFrame(List(
("nian", 23),
("li", 24)))
.toDF("name", "age")
df_table1.show()
val df_table2 = spark.createDataFrame(List(
("ni", 32),
("dang", 42)))
.toDF("name", "age")
df_table2.show()
val df_res = df_table1.unionByName(df_table2)
df_res.show
}
def apply(appName: String = "SPARK",isLocal: Boolean = true): SparkSession = {
val sparkBuilder = SparkSession.builder().appName(appName)
if (isLocal){
sparkBuilder.master("local[*]")
}
// else {
// sparkBuilder.master("yarn")
// }
// if(executors > 0){
// sparkBuilder.config("spark.executor.instances", executors)
// }
val spark = sparkBuilder
.config("spark.yarn.maxAppAttempts", 1)
.config("spark.sql.sources.partitionOverwriteMode","dynamic")
.config("spark.sql.hive.convertMetastoreParquet","false")
.config("hive.exec.dynamic.partition.mode", "nonstrict") // 非严格模式
.enableHiveSupport()
.getOrCreate()
// .config("spark.speculation",false)
// spark.conf.getAll.foreach(println(_))
spark.sparkContext.setLogLevel("Error")
spark
}
}
结果展示
+----+---+
|name|age|
+----+---+
|nian| 23|
| li| 24|
+----+---+
+----+---+
|name|age|
+----+---+
| ni| 32|
|dang| 42|
+----+---+
+----+---+
|name|age|
+----+---+
|nian| 23|
| li| 24|
| ni| 32|
|dang| 42|
+----+---+