第1关 SparkSQL加载和保存
package com. educoder. bigData. sparksql2;
import org. apache. spark. sql. AnalysisException;
import org. apache. spark. sql. SaveMode;
import org. apache. spark. sql. SparkSession;
public class Test1 {
public static void main ( String[ ] args) throws AnalysisException {
SparkSession spark = SparkSession
. builder ( )
. appName ( "test1" )
. master ( "local" )
. getOrCreate ( ) ;
spark. read ( ) . format ( "json" ) . load ( "people.json" ) . write ( ) . mode ( SaveMode. Append) . save ( "people" ) ;
spark. read ( ) . format ( "json" ) . load ( "people1.json" ) . write ( ) . mode ( SaveMode. Append) . save ( "people" ) ;
spark. read ( ) . load ( "people" ) . show ( ) ;
}
}
第2关 Parquet文件介绍
package com. educoder. bigData. sparksql2;
import org. apache. spark. sql. AnalysisException;
import org. apache. spark. sql. SparkSession;
public class Test2 {
public static void main ( String[ ] args) throws AnalysisException {
SparkSession spark = SparkSession
. builder ( )
. appName ( "test1" )
. master ( "local" )
. getOrCreate ( ) ;
spark. read ( ) . format ( "json" ) . load ( "people.json" ) . write ( ) . parquet ( "people/id=1" ) ;
spark. read ( ) . format ( "json" ) . load ( "people1.json" ) . write ( ) . parquet ( "people/id=2" ) ;
spark. read ( ) . load ( "people" ) . show ( ) ;
}
}
第3关 json文件介绍
package com. educoder. bigData. sparksql2;
import org. apache. spark. sql. AnalysisException;
import org. apache. spark. sql. SparkSession;
public class Test3 {
public static void main ( String[ ] args) throws AnalysisException {
SparkSession spark = SparkSession
. builder ( )
. appName ( "test1" )
. master ( "local" )
. getOrCreate ( ) ;
spark. read ( ) . format ( "json" ) . load ( "people.json" ) . createOrReplaceTempView ( "people" ) ;
spark. read ( ) . format ( "json" ) . load ( "people1.json" ) . createOrReplaceTempView ( "people1" ) ;
spark. sql ( "select avg(salary) from ( select salary from people union all select salary from people1) a" ) . show ( ) ;
}
}
第4关 JDBC读取数据源
package com. educoder. bigData. sparksql2;
import org. apache. spark. sql. Dataset;
import org. apache. spark. sql. Row;
import org. apache. spark. sql. SaveMode;
import org. apache. spark. sql. SparkSession;
public class Test4 {
public static void case4 ( SparkSession spark ) {
Dataset< Row> load = spark. read ( ) . format ( "json" ) . load ( "people.json" ) ;
load. write ( )
. format ( "jdbc" )
. option ( "url" , "jdbc:mysql://127.0.0.1:3306/test?useUnicode=true&characterEncoding=utf-8" )
. option ( "dbtable" , "people" )
. option ( "user" , "root" )
. option ( "password" , "123123" )
. mode ( SaveMode. Overwrite)
. save ( ) ;
Dataset< Row> load1 = spark. read ( ) . format ( "json" ) . load ( "people1.json" ) ;
load1. write ( )
. format ( "jdbc" )
. option ( "url" , "jdbc:mysql://127.0.0.1:3306/test?useUnicode=true&characterEncoding=utf-8" )
. option ( "dbtable" , "people" )
. option ( "user" , "root" )
. option ( "password" , "123123" )
. mode ( SaveMode. Append)
. save ( ) ;
Dataset< Row> load2 = spark. read ( )
. format ( "jdbc" )
. option ( "url" , "jdbc:mysql://127.0.0.1:3306/test?useUnicode=true&characterEncoding=utf-8" )
. option ( "dbtable" , "people" )
. option ( "user" , "root" )
. option ( "password" , "123123" ) . load ( ) ;
load2. show ( ) ;
}
}