Spark批处理写入ClickHouse

package jdbc

import cn.lcy.common.SparkUtils
import org.apache.spark.SparkConf
import org.apache.spark.sql.{DataFrame, SparkSession}

object WriteClickHouse {
  def main(args: Array[String]): Unit = {
      val sparkConf: SparkConf = SparkUtils.sparkConf(this.getClass.getCanonicalName)
      val spark: SparkSession = SparkUtils.sparkSession(sparkConf)

    val dataFrame: DataFrame = spark.read.json("data/order.json")

    val createTSql: String = ClickHouseUtil.createTableSql("orderjson", dataFrame)
    ClickHouseUtil.createTable(createTSql)

    ClickHouseUtil.insert("orderjson",dataFrame)

    spark.close()

  }

}

工具类:

package jdbc

import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.types.{DataType, DoubleType, FloatType, IntegerType, LongType, StringType, StructType}
import ru.yandex.clickhouse.ClickHouseDataSource

import java.sql.{Connection, PreparedStatement, Statement}

object ClickHouseUtil {

  def getInsertSql(tableName:String,dataFrame: DataFrame) = {
    val names: Array[String] = dataFrame.schema.fieldNames
    val placeholder: String = (1 to names.length).map(_ =>"?").mkString(",")

    s"insert into ${tableName}(${names.mkString(",")}) values(${placeholder})"
  }

  def insert(tableName: String, dataFrame: DataFrame) = {
    val sql = getInsertSql(tableName, dataFrame)
    println(sql)
    dataFrame.foreachPartition(
      it=>{
        val connection: Connection = getConn()
        val statement: PreparedStatement = connection.prepareStatement(sql)
        it.foreach(
          row=>{
            row.schema.fieldNames.foreach(
              fieldName => {
                val index: Int = row.schema.fieldIndex(fieldName)
                statement.setObject(index+1, row.get(index))
              }
            )

            statement.addBatch()
          }
        )

        statement.executeBatch()
      }
    )
  }

  def createTable(createTSql: String) = {
    val connection: Connection = getConn()
    val statement: Statement = connection.createStatement()
    statement.execute(createTSql)
    statement.close()
    connection.close()
  }

  def getConn(): Connection = {
     val url = "jdbc:clickhouse://node2.itcast.cn:8123/default"
     val source = new ClickHouseDataSource(url)
     source.getConnection("root","123456")
   }

  def createTableSql(tableName:String,dataFrame:DataFrame,primaryKey:String="id") = {
    val schema: StructType = dataFrame.schema
    val tableFields: Seq[String] = schema.map(field => {
      val dataType: DataType = field.dataType

      val fieldType: String = dataType match {
        case StringType => "String"
        case IntegerType => "Int32"
        case FloatType => "Float32"
        case DoubleType => "Float64"
        case LongType => "Int64"
        case _ => throw new RuntimeException(s"not supported data type ${dataType}")
      }

      s"${field.name} ${fieldType}"
    })

    s"create table if not exists ${tableName}(${tableFields.mkString(",")}) ENGINE=MergeTree() order by ${primaryKey} settings index_granularity=8192"
  }
}

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小手追梦

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值