import com.sun.rowset.internal.Row
import org.apache.spark.storage.StorageLevel
import org.jruby.ext.ffi.StructLayout.Storage
import scala.collection.mutable.{ArrayBuffer, ListBuffer}
//import org.apache.hadoop.hive.serde2.objectinspector.StructField
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
import org.apache.spark.sql.types.{StructField, StructType}
import org.dmg.pmml.True
/*!/usr/bin/env java
# -*- coding:utf-8 -*-
# Author: supeihuang
Time: 2019/4/19 15:59*/
object test{
val spark=SparkSession.builder().master("local").getOrCreate()
def sortedTopOne(data:String):String={
println(data)
var wordCount=data.split(",").map(arr=>(arr,1))
var sc=spark.sparkContext.parallelize(wordCount)
var countKey=sc.reduceByKey(_+_)
var result=countKey.sortBy(_._2,false).keys.collect().toList
println("---"+result)
var result1=result(0)
if(result1== "-1"){
result1=result(1)
}
return result1
}
def main(args: Array[String]): Unit = {
val sc=spark.sparkContext
var list="332684,60070,306590,57608,-1,60070,60070,-1,302720,60070,-1,120537,63672,60848,99634,60070,60070,60795,56737,60070,41737,57085,304525,120516,-1,-1,-1,60070,-1,-1,-1,-1,-1,-1,-1,-1,-1,63673,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1"
var result=sortedTopOne(list)
println("xxx"+result)
}
}
String转Rdd[String]并取出排名前几的keys值
最新推荐文章于 2022-01-26 20:25:42 发布