代码
object SparkTop3Demo{
def main(args: Array[String]): Unit = {
val ssc: SparkSession = SparkSession.builder().appName("top3").master("local[*]").getOrCreate()
val frame: DataFrame = ssc.read.option("delimiter", "|").csv("G:\\MySparl-Learn\\SparkSQL-learn\\inputData\\userVisit.csv").toDF("user","web");
frame.createOrReplaceTempView("userVisit");
val frame1: DataFrame = ssc.sql(
"""
|select user,web,count(*) ct from userVisit group by user,web
|""".stripMargin)
frame1.show()
val frame2: DataFrame = ssc.sql(
"""
|select
|user,
|web,
|ct,
|rk
|from
|(
|select
|user,
|web,
|ct,
|rank() over(partition by web order by ct desc ) rk
|from
|(
|select user,web,count(*) ct from userVisit group by user,web
|) t1
|)t2
|where rk <=3
|""".stripMargin)
frame2.show();
while (true){}
ssc.close()
}
}
数据
user1|/spark
user1|/spark
user1|/spark
user1|/spark
user1|/spark
user1|/spark
user1|/spark
user1|/spark
user1|/spark
user1|/spark
user2|/spark
user2|/spark
user2|/spark
user2|/spark
user2|/spark
user2|/spark
user3|/spark
user3|/spark
user4|/spark
user4|/spark
user4|/spark
user4|/spark
user5|/spark
user1|/flink
user1|/flink
user1|/flink
user1|/flink
user1|/flink
user1|/flink
user1|/flink
user1|/flink
user1|/flink
user1|/flink
user2|/flink
user2|/flink
user2|/flink
user2|/flink
user2|/flink
user2|/flink
user3|/flink
user3|/flink
user4|/flink
user4|/flink
user4|/flink
user4|/flink
user5|/flink
user5|/flink
user5|/flink
user5|/flink
user5|/flink
user5|/flink
user1|/java
user1|/java
user1|/java
user1|/java
user1|/java
user1|/java
user1|/java
user1|/java
user1|/java
user1|/java
user2|/java
user2|/java
user2|/java
user2|/java
user2|/java
user2|/java
user3|/java
user3|/java
user4|/java
user4|/java
user4|/java
user4|/java
user5|/java
user5|/java
user6|/java
user6|/java
user6|/java
user6|/java
user6|/java
user6|/java
user6|/java
user6|/java
user6|/java
user6|/java