Spark RDD 行动

测试过程中会出现大量INFO,影响调试

修改conf下log配置文件

log4j.rootCategory=WARN, console


package com.fei.simple_project;

import org.apache.spark.api.java.function.Function2;

import scala.Serializable;

public class AvgCount implements Serializable{
	public int total;
	public int num;
	public double avg(){
		return total/(double)num;
	}
	public AvgCount(int total, int num){
		this.total = total;
		this.num = num;
	}
	
}

package com.fei.simple_project;

import java.util.Arrays;
import java.util.Map;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.storage.StorageLevel;


public class App 
{
    public static void main( String[] args )
    {
        SparkConf conf = new SparkConf().setAppName("Simple Application");
        JavaSparkContext sc = new JavaSparkContext(conf);
        
        //使用parallelize创建RDD
        JavaRDD<Integer> lines1 = sc.parallelize(Arrays.asList(1,3,2,4));
        lines1.persist(StorageLevel.MEMORY_ONLY());
        System.out.println("222222222Count is:"+lines1.count());
        
        JavaRDD<String> lines2 = sc.parallelize(Arrays.asList("pandas", "like","i like pandas"));
        lines2.persist(StorageLevel.MEMORY_ONLY());
        System.out.println("333333333333Count is:"+lines2.count());
        
        //行动操作
        //reduce, 常见行动操作
      Integer reduceResult = lines1.reduce(new Function2<Integer, Integer, Integer>(){
		public Integer call(Integer x, Integer y) throws Exception {
			return x+y;
		}
      });
      System.out.println("44444444444444444reduceResult is:"+reduceResult);
      //fold, 类似reduce,接受初始值,作为每个分区第一次调用时的结果。对这个初始值多次计算不能影响结果,例如+表示0,*表示1
    Integer foldResult = lines1.fold(0, new Function2<Integer, Integer, Integer>(){
		public Integer call(Integer x, Integer y) throws Exception {
			return x+y;
		}
    });
    System.out.println("555555555555foldResult is:"+foldResult);
    //aggregate, 类似fold,但是返回值不一定和输入类型一致
	Function2<AvgCount, Integer, AvgCount> addAndCount = new Function2<AvgCount, Integer, AvgCount>(){
	public AvgCount call(AvgCount a, Integer x){
		a.total += x;
		a.num+=1;
		return a;
	}
	};
	Function2<AvgCount, AvgCount, AvgCount> combine = new Function2<AvgCount, AvgCount, AvgCount>(){
	public AvgCount call(AvgCount a, AvgCount b){
		a.total += b.total;
		a.num+=b.num;;
		return a;
	}	
	};    
    AvgCount init = new AvgCount(0, 0);
    AvgCount result = lines1.aggregate(init, addAndCount, combine);
    System.out.println("6666666666 avg  is:"+result.avg());
    //top,从RDD中 返回经排序后的前几个元素,从大到小
    System.out.println("7777777777777 top  is:");
    for(Integer it:lines1.top(4)){
  	  System.out.println(it+"   ");
    }    

    //take,随机提取几个元素
    System.out.println("88888888888 take  is:");
    for(Integer it:lines1.take(4)){
    	  System.out.println(it+"   ");
      }        
 
  //takeSample,数据采样
    System.out.println("9999999999999 takeSample  is:");
    for(Integer it:lines1.takeSample(true, 2,3)){
    	  System.out.println(it+"   ");
      }        
    
    //takeOrdered,按提供顺序返回,经排序,例如从1小到达
    System.out.println("9999999999999 takeOrdered  is:");
    for(Integer it:lines1.takeOrdered(4)){
    	  System.out.println(it+"   ");
      }        
    
    //countByValue,获取map对
    Map<Integer , Long> cbv = lines1.countByValue();
    System.out.println("9999999999999 countByValue  is:");
    for(Integer it: cbv.keySet()){
    	  System.out.println(it+"   "+cbv.get(it)+" ");
      }        
    
    //foreach,遍历rdd内容
    System.out.println("9999999999999 foreach  is:");
    lines1.foreach(new VoidFunction<Integer>(){
		public void call(Integer t) throws Exception {
			 System.out.println(t+" ");
		}
    });

 
    }
}


222222222Count is:4                                                             
333333333333Count is:3
44444444444444444reduceResult is:10
555555555555foldResult is:10
6666666666 avg  is:2.5
7777777777777 top  is:
4   
3   
2   
1   
88888888888 take  is:
1   
3   
2   
4   
9999999999999 takeSample  is:
1   
3   
9999999999999 takeOrdered  is:
1   
2   
3   
4   
9999999999999 countByValue  is:
4   1 
1   1 
2   1 
3   1 
9999999999999 foreach  is:
1 
2 
3 
4 


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值