1、读取外部配置文件
package com.study;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
/**
* @author : yulei
* @data : 2018/9/10 11:00
* @Version : 1.0
**/
public class TestSparkSQL {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder()
.appName("Java Spark SQL basic example")
.config("spark.master", "local")
.getOrCreate();
Dataset<Row> df = spark.read().json("file:///d:/scala/json.dat");
//创建临时视图 ,供后面使用SQL语句
df.createOrReplaceTempView("customers");
df.show();
Dataset<Row> df2 = spark.sql("select * from customers where age > 13");
df2.show();
}
}
其中,json.dat 中的内容是:
{"id":"1","name":"tom","age":"12"}
{"id":"2","name":"tome","age":"13"}
{"id":"3","name":"tomsli","age":"14"}
{"id":"4","name":"tomsom","age":"15"}
运行代码返回的结果是:
+---+---+------+
|age| id| name|
+---+---+------+
| 12| 1| tom|
| 13| 2| tome|
| 14| 3|tomsli|
| 15| 4|tomsom|
+---+---+------+
+---+---+------+
|age| id| name|
+---+---+------+
| 14| 3|tomsli|
| 15| 4|tomsom|
+---+---+------+
全局的视图创建可以使用如下方式:
// Register the DataFrame as a global temporary view
df.createGlobalTempView("people");
// Global temporary view is tied to a system preserved database `global_temp`
spark.sql("SELECT * FROM global_temp.people").show();
// +----+-------+
// | age| name|
// +----+-------+
// |null|Michael|
// | 30| Andy|
// | 19| Justin|
// +----+-------+
// Global temporary view is cross-session
spark.newSession().sql("SELECT * FROM global_temp.people").show();
// +----+-------+
// | age| name|
// +----+-------+
// |null|Michael|
// | 30| Andy|
// | 19| Justin|
// +----+-------+
2、和JDBC结合,样例是连接的mysql。
SparkConf conf = new SparkConf();
conf.setMaster("local") ;
conf.setAppName("SQLJava");
SparkSession sess = SparkSession.builder()
.appName("SQLJava")
.config("spark.master","local")
.getOrCreate();
String url = "jdbc:mysql://127.0.0.1:43306/hive" ;
String table = "DBS" ;
//查询数据库
Dataset<Row> df = sess.read()
.format("jdbc")
.option("url", url)
.option("dbtable", table)
.option("user", "root")
.option("password", "Bigdata-123")
.option("driver", "com.mysql.jdbc.Driver")
.load();
df.show();
3、和Hive的结合
首先创建hive 表,这里在spark-shell 里创建
1) 启动spark-shell
spark-shell --master local[4]
2) 创建hive 表
scala> spark.sql("create table mydb.tt(id int ,name string, age int) row format delimited fields terminated by ',' lines terminated by '\n' stored as textfile ")
res4: org.apache.spark.sql.DataFrame = []
3) 加载数据到hive 表
scala> spark.sql("load data local inpath 'file:///root/download/data.txt' into table mydb.tt")
res11: org.apache.spark.sql.DataFrame = []
其中data.txt 内容如下:
1,tom,12
2,hanmeimei,23
3,lilei,32
4,mayun,50
5,tengxun1,22
4、查询:
scala> val df =spark.sql("select * from mydb.tt ");
df: org.apache.spark.sql.DataFrame = [id: int, name: string ... 1 more field]
scala> df.show
+---+---------+---+
| id| name|age|
+---+---------+---+
| 1| tom| 12|
| 2|hanmeimei| 23|
| 3| lilei| 32|
| 4| mayun| 50|
| 5| tengxun1| 22|
+---+---------+---+