package sparkSQl;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.DataFrameReader;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SQLContext;
import java.util.HashMap;
import java.util.Map;public classmysqlToDataFrame {public static voidmain(String[] args) {//首先新建一个sparkconf定义参数
SparkConf conf = new SparkConf().setMaster("local").setAppName("JDBCDataSource");//创建sparkContext,是通往spark集群的唯一通道
JavaSparkContext sc = newJavaSparkContext(conf);//新建一个sparksql
SQLContext sqlContext = newSQLContext(sc);//sparksql连接mysql
/** 方法1:分别将两张表中的数据加载为DataFrame
**/
/*Map options = new HashMap();
options.put("url","jdbc:mysql://localhost:3306/tset");
options.put("driver","com.mysql.jdbc.Driver");
options.put("user","root");
options.put("password","admin");
options.put("dbtable","information");
Dataset myinfromation = sqlContext.read().format("jdbc").options(options).load();
//如果需要多张表,则需要再put一遍
options.put("dbtable","score");
Dataset scores = sqlContext.read().format("jdbc").options(options).load();*/
//方法2:分别将mysql中两张表的数据加载为DataFrame
DataFrameReader reader = sqlContext.read().format("jdbc");
reader.option("url","jdbc:mysql://127.0.0.1:3306/test?serverTimezone=GMT");
reader.option("driver","com.mysql.cj.jdbc.Driver");
reader.option("user","root");
reader.option("password","admin");
reader.option("dbtable","information");
Dataset myinformation=reader.load();
reader.option("dbtable","score");
Dataset scores=reader.load();//将两个DataFrame转换为javapairrdd,执行join操作
myinformation.registerTempTable("info");
scores.registerTempTable("score");//定义sql语句
String sql = "select info.name,age"
+"from info join score"
+"on(info.name=score.name)"
+"where score.score>90";
Dataset sql2=sqlContext.sql(sql);
sql2.show();
}
}