object TypedOperation {
case class Employee(name: String, age: Long, depId: Long, gender: String, salary: Long)
def main(args: Array[String]): Unit = {
println("astron")
val spark = SparkSession
.builder()
.master("local")
.appName("star")
.getOrCreate()
import spark.implicits._
// DF->DS
val demoDf = spark.read.json("d://employee.json")
println(demoDf)
//demoDf.printSchema()
val demoDs = demoDf.as[Employee]
demoDs.show()
val distinctDemoDs = demoDs.distinct();
distinctDemoDs.show()
val dropDuplicatesDemoDs = demoDs.dropDuplicates(Seq("name"))
dropDuplicatesDemoDs.show()
}
}
spark sql dropDuplicates distinct
最新推荐文章于 2023-10-23 21:36:24 发布