/**
* Created by Administrator on 2016/8/9 .
*/
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import scala.Tuple2;
import java.util.*;
public class PageRank{
public static void main(String[] args){
SparkConf conf=new SparkConf();
conf.setAppName("pagerank");
conf.setMaster("local");
conf.set("spark.testing.memory", "500000000");//设置运行内存大小
JavaSparkContext sc=new JavaSparkContext(conf);
//partitionBy()只对kv RDD起作用, 进行该操作后,将相同key值的数据放到同一机器上,并进行持久化操作,对后续循环中的join操作进行优化,使得省去join操作 shuffle的开销
ArrayList<String> list=new ArrayList<String>(4);
list.add("A,D");//网页之间的连接关系,A页面链接到网页D
list.add("B,A");
list.add(
spark实现简单的pagerank
最新推荐文章于 2024-04-15 15:28:53 发布