注意:以下操作中,内部表和外部表使用的数据都是parquet格式的数据,其它类型的数据不行
示例代码
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
public class test19 {
public static void main(String[] args) throws Exception {
SparkSession spark = SparkSession
.builder()
.config("spark.driver.host", "localhost")
.appName("TableTypeTest")
.master("local")
.getOrCreate();
spark.sparkContext().setLogLevel("ERROR");
//创建数据库 twq
spark.sql("CREATE DATABASE IF NOT EXISTS twq " +
"COMMENT 'Test database' LOCATION '" + Utils.BASE_PATH + "/spark-db'");
//设置当前的数据库
spark.catalog().setCurrentDatabase("twq");
//显示所有的表
spark.catalog().listTables().show();
// +----+--------+-----------+---------+-----------+
// |name|database|description|tableType|isTemporary|
// +----+--------+-----------+---------+-----------+
// +----+--------+-----------+---------+-----------+
//1:创建外部表 ,加载的parquet数据
spark.catalog().createTable("trackerSession_other",
Utils.BASE_PATH + "/trackerSession");
spark.sql("select * from trackerSession_other").show();
// +--------------------+-------------------+-------+------------+---------+--------------------+--------------+-----------+---------------+------------+
// | session_id|session_server_time| cookie|cookie_label| ip| landing_url|pageview_count|click_count| domain|domain_label|
// +--------------------+-------------------+-------+------------+---------+--------------------+--------------+-----------+---------------+------------+
// |520815c9-bdd4-40c...|2017-09-04 12:00:00|cookie1| 固执|127.0.0.3|https://www.baidu...| 1| 2| www.baidu.com| level1|
// |912a4b47-6984-476...|2017-09-04 12:45:01|cookie1| 固执|127.0.0.3|https://tieba.bai...| 1| 2|tieba.baidu.com| -|
// |79534f7c-b4dc-4bc...|2017-09-04 12:00:01|cookie2| 有偏见|127.0.0.4|https://www.baidu...| 3| 1| www.baidu.com| level1|
// +--------------------+-------------------+-------+------------+---------+--------------------+--------------+-----------+---------------+------------+
//2:创建内部表
spark.sql("create table person(name string, age int) using parquet");
Dataset<Row> person_other = spark.read().json(Utils.BASE_PATH + "/people.json");
person_other.createOrReplaceTempView("person_other");
spark.sql("insert into table person select name, age from person_other");
spark.sql("select * from person").show();
// +-------+---+
// | name|age|
// +-------+---+
// |Michael| 29|
// | Andy| 30|
// | Justin| 19|
// +-------+---+
//显示所有的表
spark.catalog().listTables().show();
// +------------+--------+-----------+---------+-----------+
// | name|database|description|tableType|isTemporary|
// +------------+--------+-----------+---------+-----------+
// | person| twq| null| MANAGED| false|
// |person_other| null| null|TEMPORARY| true|
// |trackersession_other| twq| null| EXTERNAL| false|
// +------------+--------+-----------+---------+-----------+
//外部表被删除,数据还在
spark.sql("drop table trackerSession_other");
//内部表被删除,数据也被删除了
spark.sql("drop table person");
//显示所有的表
spark.catalog().listTables().show();
// +------------+--------+-----------+---------+-----------+
// | name|database|description|tableType|isTemporary|
// +------------+--------+-----------+---------+-----------+
// |person_other| null| null|TEMPORARY| true|
// +------------+--------+-----------+---------+-----------+
spark.stop();
}
}