spark系列一:action操作开发实战

实例代码:
1、java版本:
package cn.spark.study.core;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;
public class actiondemo {
 public static void main(String[] args) {
 // reduce();
 // collect();
   // count();
  //take();
  //saveastextfile();
  //countbykey();
 }
    public static void reduce(){
     SparkConf conf = new SparkConf()
    .setAppName("collectionparallelize")
    .setMaster("local");
  JavaSparkContext sc = new JavaSparkContext(conf);
  
  List<Integer> numbers = Arrays.asList(1,2,3,4,5,6,7,8,9,10);
  
  JavaRDD<Integer> numbersrdd = sc.parallelize(numbers);
  int reducerdd = numbersrdd.reduce(new Function2<Integer,Integer,Integer>(){
   private static final long serialVersionUID = 1L;
   @Override
   public Integer call(Integer v1, Integer v2) throws Exception {
    // TODO Auto-generated method stub
    return v1 + v2;
   }
   
  });
  System.out.println(reducerdd);
  sc.close();
    }
    public static void collect(){
     SparkConf conf = new SparkConf()
    .setAppName("collectionparallelize")
    .setMaster("local");
  JavaSparkContext sc = new JavaSparkContext(conf);
  
  List<Integer> numbers = Arrays.asList(1,2,3,4,5,6,7,8,9,10);
  
  JavaRDD<Integer> numbersrdd = sc.parallelize(numbers);
  List<Integer> collectrdd = numbersrdd.collect();
  System.out.println(collectrdd);
  sc.close();
}
    public static void count(){
     SparkConf conf = new SparkConf()
    .setAppName("collectionparallelize")
    .setMaster("local");
  JavaSparkContext sc = new JavaSparkContext(conf);
  
  List<Integer> numbers = Arrays.asList(1,2,3,4,5,6,7,8,9,10);
  
  JavaRDD<Integer> numbersrdd = sc.parallelize(numbers);
  Long collectrdd = numbersrdd.count();
  System.out.println(collectrdd);
  sc.close();
}
    public static void take(){
     SparkConf conf = new SparkConf()
    .setAppName("collectionparallelize")
    .setMaster("local");
  JavaSparkContext sc = new JavaSparkContext(conf);
  
  List<Integer> numbers = Arrays.asList(1,2,3,4,5,6,7,8,9,10);
  
  JavaRDD<Integer> numbersrdd = sc.parallelize(numbers);
  List<Integer> collectrdd = numbersrdd.take(3);
  System.out.println(collectrdd);
  sc.close();
}
    public static void saveastextfile(){
     SparkConf conf = new SparkConf()
    .setAppName("collectionparallelize");
    //.setMaster("local");
  JavaSparkContext sc = new JavaSparkContext(conf);
  
  List<Integer> numbers = Arrays.asList(1,2,3,4,5,6,7,8,9,10);
  
  JavaRDD<Integer> numbersrdd = sc.parallelize(numbers);
  JavaRDD<Integer> trnumbersrdd = numbersrdd.map(new Function<Integer, Integer>(){
   private static final long serialVersionUID = 1L;
   @Override
   public Integer call(Integer v1) throws Exception {
    // TODO Auto-generated method stub
    return v1 * 2;
   }
   
  });
  trnumbersrdd.saveAsTextFile("hdfs://master:9000/savefile");
  sc.close();
    }
    public static void countbykey(){
     SparkConf conf = new SparkConf()
    .setAppName("collectionparallelize")
    .setMaster("local");
  JavaSparkContext sc = new JavaSparkContext(conf);
  
  @SuppressWarnings({ "unchecked", "rawtypes" })
  List<Tuple2<String,Integer>> classscore = Arrays.asList(new Tuple2<String,Integer>("class1",80),
    new Tuple2<String,Integer>("class2",75),
    new Tuple2<String,Integer>("class1",50),
    new Tuple2<String,Integer>("class2",75));
  JavaPairRDD<String, Integer> lines = sc.parallelizePairs(classscore);
  Map<String, Object> mapvalue = lines.countByKey();
  System.out.println(mapvalue);
  sc.close();
}
}

2、scala版本:
package com.spark.study.core
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
object actiondemo {
  def main(args:Array[String]){
    //reduec()
    //collect()
    //count()
    //take()
    //saveasfile()
    countbykey()
  }
 
def reduce(){
    val conf = new SparkConf()
                .setAppName("collectionparallelize")
                .setMaster("local");
     val sc = new SparkContext(conf);
     val numbers = Array(1,2,3,4,5,6,7,8,9,10)
     val numbersrdd = sc.parallelize(numbers, 5)
     val total = numbersrdd.reduce(_ + _)
     println(total)
  }
def collect(){
    val conf = new SparkConf()
                .setAppName("collectionparallelize")
                .setMaster("local");
     val sc = new SparkContext(conf);
     val numbers = Array(1,2,3,4,5,6,7,8,9,10)
     val numbersrdd = sc.parallelize(numbers, 5)
     val maprdd = numbersrdd.map(f => f * 2)
     val total = maprdd.collect()
     for(cc<-total)
       println(cc)
  }
def count(){
    val conf = new SparkConf()
                .setAppName("collectionparallelize")
                .setMaster("local");
     val sc = new SparkContext(conf);
     val numbers = Array(1,2,3,4,5,6,7,8,9,10)
     val numbersrdd = sc.parallelize(numbers, 5)
     val maprdd = numbersrdd.map(f => f * 2)
     val total = maprdd.count()
    
       println(total)
  }
def take(){
    val conf = new SparkConf()
                .setAppName("collectionparallelize")
                .setMaster("local");
     val sc = new SparkContext(conf);
     val numbers = Array(1,2,3,4,5,6,7,8,9,10)
     val numbersrdd = sc.parallelize(numbers, 5)
     val maprdd = numbersrdd.map(f => f * 2)
     val total = maprdd.take(3)
     for(cc<-total)
       println(cc)
  }
def saveasfile(){
    val conf = new SparkConf()
                .setAppName("collectionparallelize")
               
     val sc = new SparkContext(conf);
     val numbers = Array(1,2,3,4,5,6,7,8,9,10)
     val numbersrdd = sc.parallelize(numbers, 5)
     val maprdd = numbersrdd.map(f => f * 2)
     val total = maprdd.saveAsTextFile("hdfs://master:9000/sa")
   
  }
def countbykey(){
    val conf = new SparkConf()
                .setAppName("collectionparallelize")
                .setMaster("local");
     val sc = new SparkContext(conf);
     val arr = Array(Tuple2("class1",59),Tuple2("class2",58),Tuple2("class1",89),Tuple2("class2",53))
     val arrrdd =sc.parallelize(arr, 3)
     val sortrdd = arrrdd.countByKey()
     println(sortrdd)
    
}
}

来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/30541278/viewspace-2153542/,如需转载,请注明出处,否则将追究法律责任。

转载于:http://blog.itpub.net/30541278/viewspace-2153542/

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值