SPARK算子实例JAVA实现(take,CountByKey,SaveAsTextFile)

package day06;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import scala.Tuple2;
import java.util.*;
//取前几个元素
public class myTake {
    public static void myTake(JavaSparkContext jsc){
        JavaRDD<String> RDD1 = jsc.parallelize(Arrays.asList("aa", "aa", "bb", "cc", "dd"));
        List<String> take = RDD1.take(3);
        System.out.println(take);
    }
//整合相同的key 取出对应的值
    public static void myCountByKey(JavaSparkContext jsc){
        List<Tuple2<String,String>> tuple2s = Arrays.asList(new Tuple2<String,String>("class2", "liao"),
                new Tuple2<String,String>("class2", "ao"), new Tuple2<String,String>("class2", "li"),
                new Tuple2<String,String>("class1", "lao"));
        JavaPairRDD javaPairRDD = jsc.parallelizePairs(tuple2s);
        Map<String,Long> map = javaPairRDD.countByKey();


        for (Map.Entry<String, Long> entry:map.entrySet()){
             System.out.println("k="+entry.getKey()+"v="+entry.getValue());
         }

    }
 //保存文件至hdfs上
    public static void mySaveASTextFile(JavaSparkContext jsc){
        List<String> list =
                Arrays.asList("Hello World", "Hello scala", "Hello Java");
        JavaRDD<String> parallelizeRDD = jsc.parallelize(list);
        JavaRDD<String> flatMapRDD = parallelizeRDD.flatMap(
                new FlatMapFunction<String, String>() {
                    public Iterator<String> call(String v1) throws Exception {
                        return Arrays.asList(v1.split(" ")).iterator();
                    }
                });
        flatMapRDD.saveAsTextFile("hdfs://hadoop-1707-001:9000/save/test001");
        System.out.println("保存成功");
    }
    public static void main(String[] args){
        SparkConf conf = new SparkConf().
                setMaster("local").setAppName("MyAction_Java ");
        JavaSparkContext jsc = new JavaSparkContext(conf);
      //  mySaveASTextFile(jsc);
        myCountByKey(jsc);
        jsc.stop();
    }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值