SparkSql篇3:SparkSql读写kudu
spark操作kudu的方式有很多,spark封装了KuduContext,里面有kudu的增删改查
本文主要正对sparksql,利用外部数据源方式,直接从sql层面进行读写kudu
废话不多说,直接上干货
package kudu
import org.apache.kudu.spark.kudu.KuduContext
import org.apache.spark.SparkContext
import org.apache.spark.sql.SparkSession
import org.apache.kudu.spark.kudu
object sparkKuduReadAndWrite {
def main(args: Array[String]): Unit = {
val sparkSession = SparkSession.builder()
.master("local")
.appName("appName")
.config("spark.testing.memory","471859200")
.getOrCreate()
// 从kudu中读
val kudureader= {sparkSession.read
.format("org.apache.kudu.spark.kudu")
.option("kudu.master","cdh2:7051")
.option("kudu.table","spark_kudu")
.load()}
// 在hive中建立临时表
kudureader.createTempView("xydate1")
sparkSession.sqlContext.cacheTable("xydate1")
// 像操作hive一样操作es(es的临时表)
val datas= sparkSession.sql("select * from xydate1")
{datas.write
.format("org.apache.kudu.spark.kudu")
.option("kudu.master", "cdh2:7051")
.option("kudu.table", "xytest")
.mode("append").save()}
sparkSession.sqlContext.uncacheTable("xydate1")
sparkSession.stop()
}
}
pom如下:
<dependency>
<groupId>org.apache.kudu</groupId>
<artifactId>kudu-client</artifactId>
<version>${kudu.version}</version>
</dependency>
<dependency>
<groupId>org.apache.kudu</groupId>
<artifactId>kudu-spark2_2.11</artifactId>
<version>${kudu.version}</version>
</dependency>