1

我一直在做这个教程:https ://databricks.com/notebooks/geomesa-h3-notebook.html

多边形函数(polygonToH3)完美地工作,但是多多边形函数给出了奇怪的结果。预计有数百万个 H3 索引,但在至少 10,000 平方公里的区域内只生成了几百个索引

val multiPolygonToH3 = udf{ (geometry: Geometry, resolution: Int) => 
  var points: List[GeoCoord] = List()
  var holes: List[java.util.List[GeoCoord]] = List()
  if (geometry.getGeometryType == "MultiPolygon") {
    val numGeometries = geometry.getNumGeometries()
    if (numGeometries > 0) {
      points = List(
        geometry
          .getGeometryN(0)
          .getCoordinates()
          .toList
          .map(coord => new GeoCoord(coord.y, coord.x)): _* )
    }
    if (numGeometries > 1) {
      holes = (1 to (numGeometries - 1)).toList.map(n => {
        List(
          geometry
            .getGeometryN(n)
            .getCoordinates()
            .toList
            .map(coord => new GeoCoord(coord.y, coord.x)): _*).asJava 
      })
    }
  }
  H3.instance.polyfill(points, holes.asJava, resolution).toList 
}
This is the code that calls the above function: 

def ConvertMultiPolyH3(iLGA:Int) : Boolean = {

  var batch_start = System.currentTimeMillis()
  val res = 12
  var sSQL = s"""SELECT * from tca_test_dl.lga 
             WHERE lga_wkt is not null and lga_id = $iLGA and GeometryType = 'MultiPolygon'""".stripMargin
  val dfLGA = sparkSession.sql(sSQL)

  val wktdfLGA = dfLGA.withColumn("lga_geom", st_geomFromWKT(col("lga_wkt")))
                        .withColumn("lga_id", col("lga_id").cast(LongType))
                        .withColumn("state_id", col("state_id").cast(LongType)).cache

  val dfLGA_H3 = wktdfLGA.withColumn("h3_index", multiPolygonToH3(col("lga_geom"),lit(res))).withColumn("h3_index", explode($"h3_index"))
  dfLGA_H3.printSchema()

  val dfLGA_New = dfLGA_H3.drop("lga_wkt", "lga_geom")
  //dfLGA_New.write.mode("append").format("delta").partitionBy("lga_id").save("/mnt/cont-tca-test/delta_lake/lga_h3")
  println(dfLGA_New.count())
  var batch_time = (System.currentTimeMillis() - batch_start)/1000.0/60.0
  batch_time = BigDecimal(batch_time).setScale(2, BigDecimal.RoundingMode.HALF_UP).toDouble
  println(s"LGA H3 conversion for LGA: $iLGA has been processed in $batch_time minutes")

  dfLGA.unpersist()
  wktdfLGA.unpersist()
  dfLGA_H3.unpersist()
  dfLGA_New.unpersist()
  val result = true
  return result
}
4

0 回答 0