实训项目地址:https://www.educoder.net/shixuns/pbmkl5vt/challenges。
第1关:海量数据导入:SparkSQL大数据导入处理
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql._
object SparkSQLHive {
def main(args: Array[String]) = {
val sparkConf=new SparkConf().setAppName("PageRank")
val sc=new SparkContext(sparkConf)
val spark = SparkSession.builder.master("local").appName("tester").enableHiveSupport().getOrCreate()
spark.sql("use default")
import spark.implicits._
//drop table if it exists
spark.sql("DROP TABLE IF EXISTS vertices")
spark.sql("DROP TABLE IF EXISTS edges")
//create table here
spark.sql("CREATE TABLE IF NOT EXISTS vertices(ID BigInt,Title String)ROW FORMAT DELIMITED F