Rdd:
conf = new SparkConf()
sc = new SparkContext(conf)
line = sc.textFile(“”)
line.flatMap(_.split(" ")).map((_, 1)).reduceByKey(_+_).collect().foreach(println)
sc.stop()
dataframe:
data = sparkSession.read.text("src/main/resources/data.txt").as[String]
words = data.flatMap(value => value.split("\\s+"))
groupedWords = words.groupByKey(_.toLowerCase)
counts = groupedWords.count()
counts.show()
sparksql:
from pyspark.sql import SparkSession
spark= SparkSession.builder()
.appName("WordCount")
.master("local")
.getOrCreate()
Data=spark.read.load(‘ ’),format(‘text’)
data.createOrReplaceTempView(‘data’)
result=spark.sql(“select word ,count(*) from data group byword”)
result.show()