spark 通过 scala 读取数据 并且存入 mysql

import java.sql.{Connection,Driver,DriverManager,ResultSet,PreparedStatement}
import org.apache.spark.{SparkConf, SparkContext}

//把每个jsp的访问量,直接保存到Oracle数据中
 // spark-submit --master local --class SaveMySQL SaveSQL.jar
object SaveMySQL {
  def main(args: Array[String]): Unit = {
    //定义SparkContext对象
    val conf = new SparkConf().setAppName("SaveSQL")

    //创建SparkContext对象
    val sc = new SparkContext(conf)

    //读入数据
    //日志:192.168.88.1 - - [30/Jul/2017:12:54:38 +0800] "GET /MyDemoWeb/hadoop.jsp HTTP/1.1" 200 242
    val logData = sc.textFile("/root/log.txt").map(
      line => {
        //解析字符串:找到hadoo.jsp
        //得到: .jsp位置
        val index1 = line.indexOf("\"") //得到的是第一个双引号的位置
        val index2 = line.lastIndexOf("\"") //得到的是最后一个双引号的位置

        //子串:GET /MyDemoWeb/hadoop.jsp HTTP/1.1
        val line1 = line.substring(index1 + 1, index2)
        val index3 = line1.indexOf(" ") //子串中第一个空格
        val index4 = line1.lastIndexOf(" ") //子串中最后一个空格

        //子串:/MyDemoWeb/hadoop.jsp
        val line2 = line1.substring(index3 + 1, index4)

        //得到jsp名字: hadoop.jsp
        val jspPage = line2.substring(line2.lastIndexOf("/") + 1)

        //返回: 格式:  (hadoop.jsp,1)
        (jspPage, 1)
      }
    )
    //logData.foreach(println)
    logData.foreachPartition(saveToMySQL)
  }
  //create database sparktest
  //create table mytest( id INT AUTO_INCREMENT,url varchar(200),num INT,PRIMARY KEY(id));
  //定义一个函数,将每个分区中的数据保存到MySQL中
  // 注意上传 mysql-connector-java-5.1.47.jar 到 SPARK_HOME/jars/下面,否则找不到jar包
  def saveToMySQL(it: Iterator[(String, Int)]) = {
    // 加载 MySQL驱动类
    // 如果不添加 也可以自动加载
    //classOf[com.mysql.jdbc.Driver]
    // 格式
    //val conn_str = "jdbc:mysql://localhost:3306/DBNAME?user=DBUSER&password=DBPWD"
    // 定义继承配置
    val dbName: String = "sparktest"
    val tableName:String = "mytest"
    val dbUser: String = "root"
    val dbPasswd: String = "123456"
    val dbHost: String = "127.0.0.1"
    val dbPort: String = "3306"
    // 代码
    val conn_str = "jdbc:mysql://" + dbHost + ":" + dbPort + "/" + dbName + "?useSSL=false&user=" + dbUser + "&password=" + dbPasswd
    // 获取连接
    val conn: Connection = DriverManager.getConnection(conn_str)
    println("______________________________________________________________________________________________________")
    println("______________________________________________________________________________________________________")
    println("_________________________________get Result from table________________________________________________")
    println("______________________________________________________________________________________________________")
    println("______________________________________________________________________________________________________")
    // 读取当前数据
    try {
      // 配置为只读模式
      val statement = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY)
      val QuerySql = "SELECT * FROM "+tableName
      // 执行查询语句
      val rs = statement.executeQuery(QuerySql)
      // 遍历获取到的结果
      while (rs.next) {
        println(rs.getString("url"))
      }
    } catch {
      case e: Exception => e.printStackTrace
    }
    println("______________________________________________________________________________________________________")
    println("______________________________________________________________________________________________________")
    println("_________________________________insert Data    ______________________________________________________")
    println("______________________________________________________________________________________________________")
    println("______________________________________________________________________________________________________")
    // 插入数据
    var pst: PreparedStatement = null
    var InsertSql = "insert into "+tableName+" (url,num) values(?,?)"
    try {
      pst = conn.prepareStatement(InsertSql)
      //保存数据
      it.foreach(data => {
        pst.setString(1, data._1)
        pst.setInt(2, data._2)
        pst.executeUpdate()
      })
    } catch {
      case e1: Exception => println("Some Error : " + e1.getMessage)
    } finally {
      if (pst != null) pst.close()
      if (conn != null) conn.close()
    }
  }
}

sbt 配置文件:

name := "SaveSQL"

version := "0.1"

scalaVersion := "2.11.12"

libraryDependencies ++= Seq(
  "org.apache.spark" %% "spark-core"%"2.1.0")

// https://mvnrepository.com/artifact/mysql/mysql-connector-java
libraryDependencies += "mysql" % "mysql-connector-java" % "5.1.47"
  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值