import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.apache.spark.sql.types.{StringType, StructField, StructType}
object SparkSqlWC {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession.builder().appName("WC").master("local[2]").getOrCreate()
import spark.implicits._
val schema: StructType = new StructType(Array(StructField("field",StringType,true)))
val sc = spark.sparkContext
val rdd1 = sc.textFile("i://test.txt").flatMap(_.split(","))
//sparkSQL实现WC
val rdd = rdd1.map(field => {Row(field)})
val df: DataFrame = spark.createDataFrame(rdd,schema)
df.groupBy("field").count().show()
println("spark core实现wc")
println(rdd1.map((_,1)).reduceByKey(_+_).collect().toBuffer)
spark.stop()
}
}
SparkSQL实现WC
最新推荐文章于 2022-06-23 17:44:17 发布