JavaSpark模板

SparkSession sc = SparkSession
                 .builder()
                 .master("local[*]")
                 .appName("testjob")
                 .getOrCreate();

         JavaSparkContext scs = new JavaSparkContext(sc.sparkContext());

         JavaRDD<String> sturow = scs.textFile("E:\\sparkgbase_onhibe\\src\\main\\resources\\student.txt");

         JavaRDD<Kudu_Student> stumap = sturow.map(row -> {
             Integer id = Integer.parseInt(row.split(",")[0]);
             String name = row.split(",")[1];
             Integer age = Integer.parseInt(row.split(",")[2]);
             String ads = row.split(",")[3];
             String birthday = row.split(",")[4];
             String hobday = row.split(",")[5];
             String gzads = row.split(",")[6];
             Double source = Double.parseDouble(row.split(",")[7]);
             return null;
         });

         Dataset<Row> studf = sc.createDataFrame(stumap, Kudu_Student.class);
         studf.createOrReplaceTempView("kudu_student");
         sc.sql("select * from kudu_student").show();

         JavaRDD<Row> stuRowRDD = sc.sql("select ads,gzads,birthday,count(1)" +
                 "  from kudu_student" +
                 "  group by " +
                 "  ads,gzads,birthday" +
                 "  union all " +
                 "  select ads,gzads,birthday,count(1) " +
                 "  from kudu_student " +
                 "  group by " +
                 "  ads,gzads,birthday").toJavaRDD().coalesce(1);

         JavaPairRDD<String, Integer> stukvrdd = stuRowRDD.flatMapToPair(x -> {
             List<Tuple2<String, Integer>> listTuple = new ArrayList<>();
             listTuple.add(new Tuple2<>(x.get(0) + "|" + x.get(1) + "|" + x.get(2) + "|" + x.get(3), 1));
             return listTuple.iterator();
         });

         JavaPairRDD<String, Integer> stuReduceKey = stukvrdd.reduceByKey((x, y) -> x + y);
         JavaPairRDD<String, Iterable<Integer>> stuGroupKey = stuReduceKey.groupByKey();
         stuGroupKey.foreach(x -> {
             System.out.println("key:"+x._1);
             Iterator<Integer> iter = x._2.iterator();
             while(iter.hasNext()){
                 System.out.println(iter.next());
             }
         });
         Map<String, Long> stringLongMap = stuReduceKey.countByKey();
         System.out.println(stringLongMap);

         scs.close();
         sc.close();
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值