Java WordCount
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.*;
import scala.Tuple2;
import java.util.Arrays;
import java.util.Iterator;
public class JavaWordCount {
public static void main(String[] args) {
SparkConf conf = new SparkConf();
conf.setMaster("local[4]").setAppName("test");
JavaSparkContext sc =new JavaSparkContext(conf);
JavaRDD<String> text = sc.textFile("./score");
JavaRDD<String> words = text.flatMap(new FlatMapFunction<String, String>() {
@Override
public Iterator<String> call(String s) throws Exception {
return Arrays.asList(s.split(",")).iterator();
}
});
JavaPairRDD<String, Integer> pairRDD = words.mapToPair(new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String s) throws Exception {
return new Tuple2<>(s, 1);
}
});
JavaPairRDD<String, Integer> reduceRDD = pairRDD.reduceByKey(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer v1, Integer v2) throws Exception {
return v1 + v2;
}
});
reduceRDD.sortByKey().foreach(new VoidFunction<Tuple2<String, Integer>>() {
@Override
public void call(Tuple2<String, Integer> stringIntegerTuple2) throws Exception {
System.out.println(stringIntegerTuple2._1 + " appears " + stringIntegerTuple2._2 + " times.");
}
});
sc.stop();
}
}
Scala WordCount
import org.apache.spark.{SparkConf, SparkContext}
object ScalaWC {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setMaster("local[4]").setAppName("wc")
val sc = new SparkContext(conf)
sc.textFile("./score").flatMap(_.split(","))
.map(new Tuple2(_,1))
.reduceByKey(_+_).sortByKey()
.foreach(x => {
println(s"${x._1} appears ${x._2} times")
})
sc.stop()
}
}
运行结果:
300 appears 2 times
1 appears 2 times
100 appears 2 times
liming appears 2 times
2 appears 2 times
lisi appears 2 times
200 appears 2 times
zhangsan appears 2 times
3 appears 2 times