Spark算子汇总-Cartesian

Cartesian算子操作。

Github项目上已包含Spark所有操作DEMO。

Java版本:

package com.huangyueran.spark.operator;

import java.util.Arrays;
import java.util.List;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.VoidFunction;

import scala.Tuple2;

/**
 * @category 两个RDD进行笛卡尔积合并--The two RDD are Cartesian product merging
 * @author huangyueran
 * @time 2019-7-21 16:38:20
 */
public class Cartesian {

	public static void main(String[] args) {
		/**
		 * SparkConf:第一步创建一个SparkConf,在这个对象里面可以设置允许模式Local Standalone yarn
		 * AppName(可以在Web UI中看到) 还可以设置Spark运行时的资源要求
		 */
		SparkConf conf = new SparkConf().setAppName("Cartesian").setMaster("local");

		/**
		 * 基于SparkConf的对象可以创建出来一个SparkContext Spark上下文
		 * SparkContext是通往集群的唯一通道,SparkContext在创建的时候还会创建任务调度器
		 */
		JavaSparkContext sc = new JavaSparkContext(conf);

		cartesian(sc);
	}

	private static void cartesian(JavaSparkContext sc) {
	    List<String> names = Arrays.asList("张三", "李四", "王五");
	    List<Integer> scores = Arrays.asList(60, 70, 80);

	    JavaRDD<String> namesRDD = sc.parallelize(names);
	    JavaRDD<Integer> scoreRDD = sc.parallelize(scores);

	    /**
		 *  =====================================
		 *   |             两个RDD进行笛卡尔积合并                                        |
		 *   |             The two RDD are Cartesian product merging     |                                                                                                                                                                                                                                    | 
		 *   =====================================
		 */
	    JavaPairRDD<String, Integer> cartesianRDD = namesRDD.cartesian(scoreRDD);
	    
	    cartesianRDD.foreach(new VoidFunction<Tuple2<String, Integer>>() {
	        public void call(Tuple2<String, Integer> t) throws Exception {
	            System.out.println(t._1 + "\t" + t._2());
	        }
	    });
	}
	
}

 

Scala版本:

package com.hyr.spark.operator

import org.apache.spark.{SparkConf, SparkContext}

/** *****************************************************************************
  *
  * @date 2019-08-07 17:04
  * @author: <a href=mailto:huangyr>黄跃然</a>
  * @Description: 两个RDD进行笛卡尔积合并--The two RDD are Cartesian product merging
  ******************************************************************************/
object Cartesian {


  def cartesian(sparkContext: SparkContext): Unit = {
    val names = List("张三", "李四", "王五")
    val scores = List(60, 70, 90)

    val namesRDD = sparkContext.parallelize(names)
    val scoresRDD = sparkContext.parallelize(scores)

    val cartesianRDD = namesRDD.cartesian(scoresRDD)

    cartesianRDD.foreach(tuple => {
      println("key:"+tuple._1+"\tvalue:"+tuple._2)
    })

  }

  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf setAppName "Cartesian" setMaster "local"
    val sparkContext = new SparkContext(sparkConf)

    cartesian(sparkContext)
  }

}

Github地址:https://github.com/huangyueranbbc/SparkDemo  

转载于:https://my.oschina.net/u/4074730/blog/3085004

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值