import java.util.Arrays; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.Function2; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.api.java.function.VoidFunction; import scala.Tuple2; /** * @author 作者 E-mail: * @version 创建时间:2017年8月30日 上午10:02:51 * 类说明 */ public class WordCountSotr { public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("sortAction").setMaster("local"); JavaSparkContext sc = new JavaSparkContext(conf); JavaRDD<String> tf = sc.textFile("G://121.txt"); //去掉空格 JavaRDD<String> flatMap = tf.flatMap(new FlatMapFunction<String, String>() { private static final long serialVersionUID = 1L; @Override public Iterable<String> call(String paramT) throws Exception { return Arrays.asList(paramT.split(" ")) ; } }); //将单词拆分成(words, 1) JavaPairRDD<String, Integer> words = flatMap.mapToPair(new PairFunction<String, String, Integer>() { private static final long serialVersionUID = 1L; @Override public Tuple2<String, Integer> call(String paramT) throws Exception { return new Tuple2<String, Integer>(paramT, 1); } }); // 每个单词出现的次数 JavaPairRDD<String, Integer> reduceByKey = words.reduceByKey(new Function2<Integer, Integer, Integer>() { private static final long serialVersionUID = 1L; @Override public Integer call(Integer paramT1, Integer paramT2) throws Exception { return paramT1 + paramT2; } }); // reduceByKey 数据格式(word,2), (hellon, 3) //进行key,value反转 JavaPairRDD<Integer, String> sortByKey = reduceByKey.mapToPair(new PairFunction<Tuple2<String,Integer>, Integer, String>() { private static final long serialVersionUID = 1L; @Override public Tuple2<Integer, String> call(Tuple2<String, Integer> paramT) throws Exception { return new Tuple2<Integer, String>(paramT._2, paramT._1); } }); JavaPairRDD<Integer, String> reduceByKey2 = sortByKey.reduceByKey(new Function2<String, String, String>() { private static final long serialVersionUID = 1L; @Override public String call(String paramT1, String paramT2) throws Exception { return paramT1 + paramT2; } }); JavaPairRDD<Integer, String> sortByKey2 = reduceByKey2.sortByKey(false); System.out.println("..."+sortByKey2); //再次將value-key 反轉 JavaPairRDD<String, Integer> mapToPair = sortByKey2.mapToPair(new PairFunction<Tuple2<Integer,String>, String, Integer>() { private static final long serialVersionUID = 1L; @Override public Tuple2<String, Integer> call(Tuple2<Integer, String> paramT) throws Exception { return new Tuple2<String, Integer>(paramT._2, paramT._1); } }); mapToPair.foreach(new VoidFunction<Tuple2<String,Integer>>() { private static final long serialVersionUID = 1L; @Override public void call(Tuple2<String, Integer> paramT) throws Exception { System.out.println("sort by key....."+ paramT._1 + "word..."+paramT._2); } }); sc.close(); } }