随机划分
df.randomSplit(Array(0.8,0.2),1115)
df.randomSplit(Array(0.8,0.2),1115(0).count
df.randomSplit(Array(0.8,0.2),1115)(0).toDF.show//第一份转成DF,没有(0)不行
df.randomSplit(Array(0.01,0.9,0.09),1115)(0).toDF.count//可以分成多份,不一定是总和为1,最后的划分是array里面各个数的比例。比如Array(6,2,2)会分成0.6,0.2,0.2的三份
随机采样
val seed = 1115val withreplacement = false //是否有放回
val fraction = 0.2 //抽样比例
df.sample(withreplacement,fraction,seed).showdf.sample(withreplacement,fraction,seed).count
df.sample(false,0.001,seed).showdf.sample(false,0.001,seed).select("bat_batch").distinct.show
重新洗牌
df.shuffle