spark java join

package com;


//rdd learnning http://blog.csdn.net/dream_an/article/details/50524340
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;


import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochRequestProto;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.mllib.linalg.Vector;
import org.codehaus.janino.Java;


import scala.Tuple2;
import scala.annotation.meta.field;


public class spark_join {


public static void main(String[] args) {
// TODO Auto-generated method stub
SparkConf conf=new SparkConf().setAppName("spark_join_test").setMaster("local");
   JavaSparkContext sc =new JavaSparkContext(conf);
   
   //处理评级  得到 id ,平均
JavaRDD<String > textfile = sc.textFile("/usr/local/spark/ml-1m/ratings.dat");
JavaPairRDD<Integer, Double> tmp = textfile.mapToPair(
s ->new Tuple2<Integer, Double>( Integer.valueOf(s.split("::")[1]).intValue(),Double.parseDouble(s.split("::")[2])));
//tmp.foreach(a -> System.out.println(a._1+"=>"+a._2));
JavaPairRDD<Integer, Double> tmp1= tmp.reduceByKey((a,b)->(a+b));  //每个电影的总的得分
JavaPairRDD<Integer, Integer> tmp2 = tmp.mapToPair(s -> new Tuple2<Integer, Integer>(s._1, 1));
JavaPairRDD<Integer, Integer> tmp3= tmp2.reduceByKey((a,b)->(a+b)); //每个电影的评分个数
JavaPairRDD<Integer, Tuple2<Double, Integer>> tmp4 = tmp1.join(tmp3) ; //连接两个RDD
JavaPairRDD<Integer, Double>movieScores=tmp4.mapToPair(s->new Tuple2<Integer, Double>(s._1, s._2._1/s._2._2));// //id ,平均评级
//movieScores.foreach(a -> System.out.println(a._1+"=>"+a._2));

//读取movie数据,得到 MovieID::标题::流派
JavaRDD<String> movie=  sc.textFile("/usr/local/spark/ml-1m/movies.dat");
JavaPairRDD<Integer, String> movieskey=movie.mapToPair(s->new Tuple2<Integer, String>(Integer.valueOf(s.split("::")[0]).intValue(),s.split("::")[1]));
//movieskey.foreach(a -> System.out.println(a._1+"=>"+a._2));


JavaPairRDD<Integer, Tuple2<Double, String>> result =movieScores.join(movieskey).filter(s-> s._2._1>4.0);
result.foreach(a->System.out.println(a._1+"    "+a._2._1+"  "+a._2._2));
}


}


输出:

1084    4.096209912536443  Bonnie and Clyde (1967)
3007    4.013559322033898  American Movie (1999)
2493    4.142857142857143  Harmonists, The (1997)
3517    4.5  Bells, The (1926)
1    4.146846413095811  Toy Story (1995)
1780    4.125  Ayn Rand: A Sense of Life (1997)
2351    4.207207207207207  Nights of Cabiria (Le Notti di Cabiria) (1957)
759    4.101694915254237  Maya Lin: A Strong Clear Vision (1994)
1300    4.1454545454545455  My Life as a Dog (Mitt liv som hund) (1985)
1947    4.057818659658344  West Side Story (1961)
2819    4.040752351097178  Three Days of the Condor (1975)
162    4.063136456211812  Crumb (1994)
1228    4.1875923190546525  Raging Bull (1980)
306    4.227544910179641  Three Colors: Red (1994)
1132    4.259090909090909  Manon of the Spring (Manon des sources) (1986)
2132    4.074074074074074  Who's Afraid of Virginia Woolf? (1966)
720    4.426940639269406  Wallace & Gromit: The Best of Aardman Animation (1996)
2917    4.031746031746032  Body Heat (1981)
1066    4.1657142857142855  Shall We Dance? (1937)
2972    4.015384615384615  Red Sorghum (Hong Gao Liang) (1987)
922    4.491489361702127  Sunset Blvd. (a.k.a. Sunset Boulevard) (1950)
446    4.082677165354331  Farewell My Concubine (1993)
1795    4.5  Callej�n de los milagros, El (1995)
2930    4.4  Return with Honor (1998)
1117    4.25  Eighth Day, The (Le Huiti�me jour ) (1996)
1545    4.068493150684931  Ponette (1996)
3435    4.415607985480944  Double Indemnity (1944)
1207    4.425646551724138  To Kill a Mockingbird (1962)
1294    4.124658780709736  M*A*S*H (1970)
2194    4.007985803016859  Untouchables, The (1987)
3022    4.368932038834951  General, The (1927)
858    4.524966261808367  Godfather, The (1972)
1196    4.292976588628763  Star Wars: Episode V - The Empire Strikes Back (1980)
3090    4.141242937853107  Matewan

.......




  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值