import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;
import java.util.Arrays;
/**
* Created by hadoop on 17-2-23.
*/
public class JavaMyPairRDD {
public static void main(String[] args) throws Exception{
SparkConf conf = new SparkConf().setAppName("PairRDD");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<String> lines = sc.parallelize(Arrays.asList("1 a","2 b"));
PairFunction<String,String,String> keyData = new PairFunction<String, String, String>() {
@Override
public Tuple2<String, String> call(String s) throws Exception {
return new Tuple2(s.split(" ")[0],s);
}
};
JavaPairRDD<String,String> pairs = lines.mapToPair(keyData);
System.out.println(pairs.collect());
}
}
<spark>pairRDD
最新推荐文章于 2021-02-18 18:50:29 发布