示例数据的可视化
连接PG数据库的准备
Spark连接PG需要用到以下jar包
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>42.2.14</version>
</dependency>
解析geo_json的准备
解析geometry字符串需要以下的jar包,解析成geo_json格式
<dependency>
<groupId>com.vividsolutions</groupId>
<artifactId>jts</artifactId>
<version>1.8</version>
</dependency>
完整示例代码
package com.sgeoc.analyzer
import com.vividsolutions.jts.io.WKBReader
import org.apache.spark.ml.clustering.KMeans
import org.apache.spark.sql.SparkSession
object App {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().appName("ARTPerSer")
.master("local[*]").getOrCreate()
// val df = spark.read.format("libsvm").csv("E:/spark-2.4.3-bin-hadoop2.7/data/mllib/kmeans_data.txt")
// df.show()
val url= "jdbc:postgresql://xxx:xxx/xxx?user=xxx&password=xxx"
val dataDF = spark.read.format("jdbc")
.option("url",url)
.option("dbtable","fangzhuangtong")
.option("user","xxx")
.option("password","xxx")
.load()
val convertToPos = spark.udf.register("convertToPos",(geom:String)=>{
val g = new WKBReader().read(WKBReader.hexToBytes(geom))
Array(g.getCoordinate.x,g.getCoordinate.y)
})
//val geo = new WKBReader().read(WKBReader.hexToBytes("0101000020E6100000A92AAF4097A759406FDB25B7E1F54240"))
dataDF.createOrReplaceTempView("tmp_fangzhuangtong")
val sql = "select geom from tmp_fangzhuangtong"
import spark.implicits._
val posDF = spark.sql(sql).select(convertToPos('geom) as "features")
posDF.show()
val kmean = new KMeans().setK(3).setSeed(1L)
val model = kmean.fit(posDF)
model.transform(posDF).show(false)
model.clusterCenters.foreach(println)
spark.close()
}
}
代码结果
[102.62283054500615,37.932703753925786]
[102.900756612634,37.479060480297484]
[103.1387133379702,36.97090604847232]