import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import java.util.Arrays;
import java.util.List;
/**
* cartesian(otherDataSet) 算子:
* 求两个RDD的笛卡尔积,返回值为JavaPairRDD类型。
*
* 输出结果为:[(1,a), (1,b), (1,c), (2,a), (2,b), (2,c), (3,a), (3,b), (3,c), (4,a), (4,b), (4,c)]
*/
public class CartesianDemo {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setMaster("local").setAppName("spark");
JavaSparkContext sc = new JavaSparkContext(conf);
List<Integer> list = Arrays.asList(1, 2, 3, 4);
List<String> list1 = Arrays.asList("a","b","c");
JavaRDD<Integer> javaRDD = sc.parallelize(list);
JavaRDD<String> javaRDD1 = sc.parallelize(list1);
//cartesian算子
JavaPairRDD<Integer,String> javaRDD2 = javaRDD.cartesian(javaRDD1);
System.err.println(javaRDD2.collect());
}
}
Java Spark算子:cartesian
最新推荐文章于 2022-09-17 23:01:28 发布