一、Java代码
SparkConf conf = new SparkConf();
conf.setMaster("local").setAppName("jsonRDD");
JavaSparkContext sc = new JavaSparkContext(conf);
SQLContext sqlContext = new SQLContext(sc);
JavaRDD<String> nameRDD = sc.parallelize(Arrays.asList(
"{\"name\":\"zhangsan\",\"age\":\"18\"}",
"{\"name\":\"lisi\",\"age\":\"19\"}",
"{\"name\":\"wangwu\",\"age\":\"20\"}"
));
JavaRDD<String> scoreRDD = sc.parallelize(Arrays.asList(
"{\"name\":\"zhangsan\",\"score\":\"100\"}",
"{\"name\":\"lisi\",\"score\":\"200\"}",
"{\"name\":\"wangwu\",\"score\":\"300\"}"
));
DataFrame namedf = sqlContext.read().json(nameRDD);
DataFrame scoredf = sqlContext.read().json(scoreRDD);
namedf.registerTempTable("name");
scoredf.registerTempTable("score");
DataFrame result = sqlContext.sql("select name.name,name.age,score.score from name,score where name.name = score.name");
result.show();
sc.stop();
二、Scala代码
val conf = new SparkConf()
conf.setMaster("local").setAppName("jsonrdd")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
val nameRDD = sc.makeRDD(Array(
"{\"name\":\"zhangsan\",\"age\":18}",
"{\"name\":\"lisi\",\"age\":19}",
"{\"name\":\"wangwu\",\"age\":20}"
))
val scoreRDD = sc.makeRDD(Array(
"{\"name\":\"zhangsan\",\"score\":100}",
"{\"name\":\"lisi\",\"score\":200}",
"{\"name\":\"wangwu\",\"score\":300}"
))
val nameDF = sqlContext.read.json(nameRDD)
val scoreDF = sqlContext.read.json(scoreRDD)
nameDF.registerTempTable("name")
scoreDF.registerTempTable("score")
val result = sqlContext.sql("select name.name,name.age,score.score from name,score where name.name = score.name")
result.show()
sc.stop()