1.场景
def etlQzChapter(ssc: SparkContext, sparkSession: SparkSession) = {
import sparkSession.implicits._ //隐式转换
ssc.textFile("/user/atguigu/ods/QzChapter.log").filter(item => {
val obj = ParseJsonData.getJsonData(item)
obj.isInstanceOf[JSONObject]
}).mapPartitions(partitions => {
partitions.map(item => {
val jsonObject = ParseJsonData.getJsonData(item)
val chapterid = jsonObject.getIntValue("chapterid")
val chapterlistid = jsonObject.getIntValue("chapterlistid")
val chaptername = jsonObject.getString("chaptername")
val sequence = jsonObject.getString("sequence")
val showstatus = jsonObject.getString("showstatus")
val status = jsonObject.getString("status")
val creator = jsonObject.getString("creator")
val createtime = jsonObject.getString("createtime")
val courseid = jsonObject.getIntValue("courseid")
val chapternum = jsonObject.getIntValue("chapternum")
val outchapterid = jsonObject.getIntValue("outchapterid")
val dt = jsonObject.getString("dt")
val dn = jsonObject.getString("dn")
(chapterid, chapterlistid, chaptername, sequence, showstatus, creator, createtime,
courseid, chapternum, outchapterid, dt, dn)
})
}).toDF().coalesce(1).write.mode(SaveMode.Append).insertInto("dwd.dwd_qz_chapter")
}
2.报错
Exception in thread "main" org.apache.spark.sql.AnalysisException: Table or view not found: dwd.dwd_qz_chapter;
3.日志分析
表 dwd.dwd_qz_chapter没有
4.解决方案
spark向表插入数据前要先创建表
create external table `dwd`.`dwd_qz_chapter`(
chapterid int ,
chapterlistid int ,
chaptername string ,
sequence string ,
showstatus string ,
creator string ,
createtime timestamp,
courseid int ,
chapternum int,
outchapterid int)
partitioned by(
dt string,
dn string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
STORED AS PARQUET TBLPROPERTIES('parquet.compression'='SNAPPY');