// 显示每个类目的缺失值
data.select(data.columns.map(c => sum(col(c).isNull.cast("int")).alias(c)): _*).show
data.groupBy("cate1").count().show(30)
println(data.count()) //3046
def SortedCate(df:DataFrame): DataFrame ={
// 循环类别赋值label
val N = 10
val window = Window.partitionBy(col("cate1")).orderBy(col("play_vv_end").desc)
val top3DF = df.withColumn("topn", row_number().over(window)).where(col("topn") <= N)
.persist(StorageLevel.MEMORY_AND_DISK_SER_2)
println("top3DF",top3DF.count()) // 560
top3DF
}