package jdbc
import cn.lcy.common.SparkUtils
import org.apache.spark.SparkConf
import org.apache.spark.sql.{DataFrame, SparkSession}
object WriteClickHouse {
def main(args: Array[String]): Unit = {
val sparkConf: SparkConf = SparkUtils.sparkConf(this.getClass.getCanonicalName)
val spark: SparkSession = SparkUtils.sparkSession(sparkConf)
val dataFrame: DataFrame = spark.read.json("data/order.json")
val createTSql: String = ClickHouseUtil.createTableSql("orderjson", dataFrame)
ClickHouseUtil.createTable(createTSql)
ClickHouseUtil.insert("orderjson",dataFrame)
spark.close()
}
}
工具类:
package jdbc
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.types.{DataType, DoubleType, FloatType, IntegerType, LongType, StringType, StructType}
import ru.yandex.clickhouse.ClickHouseDataSource
import java.sql.{Connection, PreparedStatement, Statement}
object ClickHouseUtil {
def getInsertSql(tableName:String,dataFrame: DataFrame) = {
val names: Array[String] = dataFrame.schema.fieldNames
val placeholder: String = (1 to names.length).map(_ =>"?").mkString(",")
s"insert into ${tableName}(${names.mkString(",")}) values(${placeholder})"
}
def insert(tableName: String, dataFrame: DataFrame) = {
val sql = getInsertSql(tableName, dataFrame)
println(sql)
dataFrame.foreachPartition(
it=>{
val connection: Connection = getConn()
val statement: PreparedStatement = connection.prepareStatement(sql)
it.foreach(
row=>{
row.schema.fieldNames.foreach(
fieldName => {
val index: Int = row.schema.fieldIndex(fieldName)
statement.setObject(index+1, row.get(index))
}
)
statement.addBatch()
}
)
statement.executeBatch()
}
)
}
def createTable(createTSql: String) = {
val connection: Connection = getConn()
val statement: Statement = connection.createStatement()
statement.execute(createTSql)
statement.close()
connection.close()
}
def getConn(): Connection = {
val url = "jdbc:clickhouse://node2.itcast.cn:8123/default"
val source = new ClickHouseDataSource(url)
source.getConnection("root","123456")
}
def createTableSql(tableName:String,dataFrame:DataFrame,primaryKey:String="id") = {
val schema: StructType = dataFrame.schema
val tableFields: Seq[String] = schema.map(field => {
val dataType: DataType = field.dataType
val fieldType: String = dataType match {
case StringType => "String"
case IntegerType => "Int32"
case FloatType => "Float32"
case DoubleType => "Float64"
case LongType => "Int64"
case _ => throw new RuntimeException(s"not supported data type ${dataType}")
}
s"${field.name} ${fieldType}"
})
s"create table if not exists ${tableName}(${tableFields.mkString(",")}) ENGINE=MergeTree() order by ${primaryKey} settings index_granularity=8192"
}
}