SparkSql处理日志后保存到mysql中

最新推荐文章于 2024-07-10 21:40:32 发布

wtzhm

最新推荐文章于 2024-07-10 21:40:32 发布

阅读量1.4k

点赞数 1

分类专栏： sparksql 文章标签： sparksql 保存数据到mysql中 spark sql mysql

本文链接：https://blog.csdn.net/wtzhm/article/details/84784227

版权

sparksql 专栏收录该内容

22 篇文章 1 订阅

订阅专栏

SparkSql处理后的数据保存到Mysql中

1.原始JDBC

object SqlUtils {
    /**
      * 获取连接
    */
    def getConnection():Connection = {
        DriverManager.getConnection("jdbc:mysql://192.168.126.31:3306/sparklog?user=root&password=Zhm@818919")
    }

    /**
      * 释放资源
    */
    def relaseSource(conn :Connection,pst:PreparedStatement): Unit ={
        try{
            if(pst!=null){
                pst.close()
            }
        }catch{
            case e:Exception =>{
                 e.printStackTrace()
            }
        }finally {
            if(conn!=null){
                conn.close()
            }
        }
    }
}

2. 建立dao

object TopNDao {

    /**
      * 批量插入最受欢迎的TopN课程
      */
    def insertDayVideoAccessTopN(list: ListBuffer[DayVideoVisitBean]) {
        var conneciton: Connection = null
        var pstmt: PreparedStatement = null
        try {
            conneciton = SqlUtils.getConnection()
            //设置手动提交
            conneciton.setAutoCommit(false)
            val sql = "insert into day_video_access_topn (day,cmsId,times) values(?,?,?)"
            pstmt = conneciton.prepareStatement(sql)
            for(ele <- list){
                pstmt.setString(1,ele.day)
                pstmt.setLong(2,ele.cmsId)
                pstmt.setLong(3,ele.times)
                pstmt.addBatch()
            }
            // 执行批量处理
            pstmt.executeBatch()
             //手工提交
            conneciton.commit()
        } catch {
            case e: Exception => {
                e.printStackTrace()
            }
        } finally {
            SqlUtils.relaseSource(conneciton, pstmt)
        }
    }
}

3. sparksql 处理业务逻辑

/**
 * 入口类
 */
object TopNJob {
    def main(args: Array[String]): Unit = {
        val spark = SparkSession.builder().appName("TopNJob").master("local[2]").getOrCreate()
        val df = spark.read.format("parquet").load("file:///D:\\test_data\\log\\cleanLog\\part-*")
        df.createOrReplaceTempView("tb_course")
		  //最受欢迎的TopN课程
		  mostPopular(spark, df, "20180110")
        spark.stop()
    }
	
	//Row(ip, url, cmsType, cmsId, traffic, city, time, day)
    def mostPopular(spark: SparkSession, df: DataFrame, day: String): Unit = {
        //sparksql 逻辑处理业务得到结果
        val result = spark.sql("select day ,cmsId, count(1) as times from tb_course " +
          "where day = " + day + " and cmsType='video' " +
          "group by cmsId,day order by times desc limit 10 ")

        //处理结果保存到数据库中
        result.foreachPartition(x => {
            var list = new ListBuffer[DayVideoVisitBean]
            x.foreach(y => {
                val day = y.getAs[String]("day")
                val cmsId = y.getAs[Long]("cmsId")
                val times = y.getAs[Long]("times")
                list.append(DayVideoVisitBean(day, cmsId, times))
            })
            TopNDao.insertDayVideoAccessTopN(list)
        })
    }
}

// 实体类
case class DayVideoVisitBean(day:String ,cmsId :Long,times:Long)