import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
object SparkSQL01 {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession.builder().appName(this.getClass.getName).master("local[*]").getOrCreate()
val lines: Dataset[String] = spark.read.textFile("C:\\Users\\LEMMONT\\Desktop\\testdata\\word")
import spark.implicits._
val words: Dataset[String] = lines.flatMap(_.split(" "))
//SQL风格
words.createTempView("t_words") //注册视图
val res: DataFrame = spark.sql("SELECT value,count(1) cou FROM t_words GROUP BY value ORDER BY cou DESC")
res.show()
//DSL风格
val dslRow: Dataset[Row] = words.groupBy($"value" as "word").count()
.withColumnRenamed("count", "counts") //起别名
.orderBy($"counts" desc)
dslRow.show()
//DSL agg实现
//要使用聚合函数前一定要导入聚合函数
import org.apache.spark.sql.functions._
val dlsRow2: Dataset[Row] = words.groupBy($"value" as "word").agg(count("*").as("counts"))
.orderBy($"counts" desc)
dlsRow2.show()
spark.stop()
}
}
输出: