读取文件后
val jsonStr = Source.fromFile("data/nyc-borough-boundaries-polygon.geojson").mkString
val featureCollection = FeatureExtraction.parseJson(jsonStr)
//排序(区域最大的放最前面)
import scala.collection.JavaConversions._
val features = featureCollection.features.sortBy(feature=>(feature.properties("boroughCode"), -feature.getGeometry().calculateArea2D()))
//广播
val featureBc = spark.sparkContext.broadcast(features)
/**
* 根据经纬度查询所处区域,创建UDF函数
* @param x
* @param y
* @return
*/
def findBorough(x:Double,y:Double):String= {
val feature: Option[Feature] = featureBc.value.find(feature=>GeometryEngine.contains(feature.getGeometry(),new Point(x,y), SpatialReference.create(4326)))
feature.map(p=>p.properties("borough")).getOrElse("NA")
}
//绑定udf函数
spark.udf.register("findBorough", findBorough _)
下面是解析json数据
import com.esri.core.geometry.{Geometry, GeometryEngine}
import org.json4s.JsonAST.JObject
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization
case class FeatureCollection(features: List[Feature])
case class Feature(properties: Map[String, String], geometry: JObject) {
def getGeometry(): Geometry = {
import org.json4s.jackson.JsonMethods._
val mapGeo = GeometryEngine.geoJsonToGeometry(compact(render(geometry)), 0, Geometry.Type.Unknown)
mapGeo.getGeometry
}
}
object FeatureExtraction {
// 完成具体的 JSON 解析工作
def parseJson(json: String): FeatureCollection = {
import org.json4s.jackson.Serialization.read
// 1. 导入一个 formats 隐式转换
implicit val formats = Serialization.formats(NoTypeHints)
// 2. JSON -> Obj
val featureCollection = read[FeatureCollection](json)
featureCollection
}
}