paleolimbot / wk

Lightweight Well-Known Geometry Parsing

Home Page:https://paleolimbot.github.io/wk

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

wk_collection creates null geometry collection for sfc objects

JosiahParry opened this issue · comments

When creating a collection from an sfc object using wk an error occurs.

x <- sf::st_multipoint(matrix(runif(10, -90, 90), ncol = 2))
wk::wk_collection(sf::st_sfc(x)) 
#> Geometry set for 1 feature 
#> Geometry type: GEOMETRYCOLLECTION
#> Dimension:     XY
#> Bounding box:  xmin: -62.02268 ymin: -60.93753 xmax: 59.29722 ymax: 50.98169
#> CRS:           NA
#> Error in UseMethod("st_as_text"): no applicable method for 'st_as_text' applied to an object of class "NULL"

Hmm...this seems like a problem with the sfc writer:

x <- sf::st_multipoint(matrix(runif(10, -90, 90), ncol = 2))
wk::wk_debug(
  x,
  wk::wk_collection_filter(
    wk::sfc_writer()
  )
) |> 
  unclass() |> 
  lapply(unclass)
#> initialize (dirty = 0  -> 1)
#> vector_start: MULTIPOINT B[1] <0x16d9dd700> => WK_CONTINUE
#>   feature_start (1): <0x16d9dd700>  => WK_CONTINUE
#>     geometry_start (<none>): MULTIPOINT[5] <0x16d9dd660> => WK_CONTINUE
#>       geometry_start (1): POINT[1] <0x16d9dd578> => WK_CONTINUE
#>         coord (1): <0x16d9dd578> (-1.084597 -0.168548)  => WK_CONTINUE
#>       geometry_end (1)  => WK_CONTINUE
#>       geometry_start (2): POINT[1] <0x16d9dd578> => WK_CONTINUE
#>         coord (1): <0x16d9dd578> (-84.752540 71.532830)  => WK_CONTINUE
#>       geometry_end (2)  => WK_CONTINUE
#>       geometry_start (3): POINT[1] <0x16d9dd578> => WK_CONTINUE
#>         coord (1): <0x16d9dd578> (80.387185 -57.938251)  => WK_CONTINUE
#>       geometry_end (3)  => WK_CONTINUE
#>       geometry_start (4): POINT[1] <0x16d9dd578> => WK_CONTINUE
#>         coord (1): <0x16d9dd578> (-79.067438 21.643537)  => WK_CONTINUE
#>       geometry_end (4)  => WK_CONTINUE
#>       geometry_start (5): POINT[1] <0x16d9dd578> => WK_CONTINUE
#>         coord (1): <0x16d9dd578> (81.961796 -87.892438)  => WK_CONTINUE
#>       geometry_end (5)  => WK_CONTINUE
#>     geometry_end (<none>)  => WK_CONTINUE
#>   feature_end (1): <0x16d9dd700>  => WK_CONTINUE
#> vector_end: <0x16d9dd700>
#> deinitialize
#> [[1]]
#> [[1]][[1]]
#> NULL


wk::wk_debug(
  x,
  wk::wk_collection_filter(
    wk::wkt_format_handler()
  )
)
#> initialize (dirty = 0  -> 1)
#> vector_start: MULTIPOINT B[1] <0x16d9de810> => WK_CONTINUE
#>   feature_start (1): <0x16d9de810>  => WK_CONTINUE
#>     geometry_start (<none>): MULTIPOINT[5] <0x16d9de770> => WK_CONTINUE
#>       geometry_start (1): POINT[1] <0x16d9de688> => WK_CONTINUE
#>         coord (1): <0x16d9de688> (-1.084597 -0.168548)  => WK_CONTINUE
#>       geometry_end (1)  => WK_CONTINUE
#>       geometry_start (2): POINT[1] <0x16d9de688> => WK_CONTINUE
#>         coord (1): <0x16d9de688> (-84.752540 71.532830)  => WK_CONTINUE
#>       geometry_end (2)  => WK_CONTINUE
#>       geometry_start (3): POINT[1] <0x16d9de688> => WK_CONTINUE
#>         coord (1): <0x16d9de688> (80.387185 -57.938251)  => WK_CONTINUE
#>       geometry_end (3)  => WK_CONTINUE
#>       geometry_start (4): POINT[1] <0x16d9de688> => WK_CONTINUE
#>         coord (1): <0x16d9de688> (-79.067438 21.643537)  => WK_CONTINUE
#>       geometry_end (4)  => WK_CONTINUE
#>       geometry_start (5): POINT[1] <0x16d9de688> => WK_CONTINUE
#>         coord (1): <0x16d9de688> (81.961796 -87.892438)  => WK_CONTINUE
#>       geometry_end (5)  => WK_CONTINUE
#>     geometry_end (<none>)  => WK_CONTINUE
#>   feature_end (1): <0x16d9de810>  => WK_CONTINUE
#> vector_end: <0x16d9de810>
#> deinitialize
#> [1] "GEOMETRYCOLLECTION (MULTIPOINT ((-1.084597 -0.1685479), (-84.75254 71.53283), (80.38719 -57.93825), (-79.06744 21.64354), (81.9618 -87.89244)))"

It looks like vector_start may be lying about the type that's about to show up. Parsing the WKT into sfc seems ok:

wk::wk_debug(
  wk::wkt("GEOMETRYCOLLECTION (MULTIPOINT ((-1.084597 -0.1685479), (-84.75254 71.53283), (80.38719 -57.93825), (-79.06744 21.64354), (81.9618 -87.89244)))"),
  wk::sfc_writer()  
)
#> initialize (dirty = 0  -> 1)
#> vector_start: <Unknown type / 0>[1] <0x16ced94c8> => WK_CONTINUE
#>   feature_start (1): <0x16ced94c8>  => WK_CONTINUE
#>     geometry_start (<none>): GEOMETRYCOLLECTION[UNKNOWN] <0x16ced93d0> => WK_CONTINUE
#>       geometry_start (1): MULTIPOINT[UNKNOWN] <0x16ced9330> => WK_CONTINUE
#>         geometry_start (1): POINT[UNKNOWN] <0x16ced9240> => WK_CONTINUE
#>           coord (1): <0x16ced9240> (-1.084597 -0.168548)  => WK_CONTINUE
#>         geometry_end (1)  => WK_CONTINUE
#>         geometry_start (2): POINT[UNKNOWN] <0x16ced9240> => WK_CONTINUE
#>           coord (1): <0x16ced9240> (-84.752540 71.532830)  => WK_CONTINUE
#>         geometry_end (2)  => WK_CONTINUE
#>         geometry_start (3): POINT[UNKNOWN] <0x16ced9240> => WK_CONTINUE
#>           coord (1): <0x16ced9240> (80.387190 -57.938250)  => WK_CONTINUE
#>         geometry_end (3)  => WK_CONTINUE
#>         geometry_start (4): POINT[UNKNOWN] <0x16ced9240> => WK_CONTINUE
#>           coord (1): <0x16ced9240> (-79.067440 21.643540)  => WK_CONTINUE
#>         geometry_end (4)  => WK_CONTINUE
#>         geometry_start (5): POINT[UNKNOWN] <0x16ced9240> => WK_CONTINUE
#>           coord (1): <0x16ced9240> (81.961800 -87.892440)  => WK_CONTINUE
#>         geometry_end (5)  => WK_CONTINUE
#>       geometry_end (1)  => WK_CONTINUE
#>     geometry_end (<none>)  => WK_CONTINUE
#>   feature_end (1): <0x16ced94c8>  => WK_CONTINUE
#> vector_end: <0x16ced94c8>
#> deinitialize
#> [[1]]
#> [[1]]
#>            [,1]        [,2]
#> [1,]  -1.084597  -0.1685479
#> [2,] -84.752540  71.5328300
#> [3,]  80.387190 -57.9382500
#> [4,] -79.067440  21.6435400
#> [5,]  81.961800 -87.8924400
#> attr(,"class")
#> [1] "XY"         "MULTIPOINT" "sfg"       
#> 
#> attr(,"class")
#> [1] "XY"                 "GEOMETRYCOLLECTION" "sfg"               
#> 
#> attr(,"precision")
#> [1] 0
#> attr(,"bbox")
#>      xmin      ymin      xmax      ymax 
#> -84.75254 -87.89244  81.96180  71.53283 
#> attr(,"class")
#> [1] "bbox"
#> attr(,"crs")
#> $input
#> [1] NA
#> 
#> $wkt
#> [1] NA
#> 
#> attr(,"class")
#> [1] "crs"
#> attr(,"n_empty")
#> [1] 0
#> attr(,"class")
#> [1] "sfc_GEOMETRYCOLLECTION" "sfc"

Created on 2023-07-08 with reprex v2.0.2

Maybe a better comparison:

x <- sf::st_multipoint(matrix(runif(10, -90, 90), ncol = 2))
wk::wk_handle(
  x,
  wk::wk_collection_filter(
    wk::wk_debug_filter(
      wk::sfc_writer()
    )
  )
)
#> initialize (dirty = 0  -> 1)
#> vector_start: GEOMETRYCOLLECTION B[UNKNOWN] <0x107ea1128> => WK_CONTINUE
#>   feature_start (1): <0x107ea1128>  => WK_CONTINUE
#>     geometry_start (<none>): GEOMETRYCOLLECTION[UNKNOWN] <0x107ea10d0> => WK_CONTINUE
#>       geometry_start (1): MULTIPOINT[5] <0x16d0af2f0> => WK_CONTINUE
#>         geometry_start (1): POINT[1] <0x16d0af208> => WK_CONTINUE
#>           coord (1): <0x16d0af208> (-49.422422 -74.503830)  => WK_CONTINUE
#>         geometry_end (1)  => WK_CONTINUE
#>         geometry_start (2): POINT[1] <0x16d0af208> => WK_CONTINUE
#>           coord (1): <0x16d0af208> (-52.331832 -89.760795)  => WK_CONTINUE
#>         geometry_end (2)  => WK_CONTINUE
#>         geometry_start (3): POINT[1] <0x16d0af208> => WK_CONTINUE
#>           coord (1): <0x16d0af208> (52.331655 35.076556)  => WK_CONTINUE
#>         geometry_end (3)  => WK_CONTINUE
#>         geometry_start (4): POINT[1] <0x16d0af208> => WK_CONTINUE
#>           coord (1): <0x16d0af208> (-67.254758 50.019924)  => WK_CONTINUE
#>         geometry_end (4)  => WK_CONTINUE
#>         geometry_start (5): POINT[1] <0x16d0af208> => WK_CONTINUE
#>           coord (1): <0x16d0af208> (34.726618 67.814707)  => WK_CONTINUE
#>         geometry_end (5)  => WK_CONTINUE
#>       geometry_end (2)  => WK_CONTINUE
#>     geometry_end (<none>)  => WK_CONTINUE
#>   feature_end (1): <0x107ea1128>  => WK_CONTINUE
#> vector_end: <0x107ea1128>
#> deinitialize
#> Geometry set for 1 feature 
#> Geometry type: GEOMETRYCOLLECTION
#> Dimension:     XY
#> Bounding box:  xmin: -67.25476 ymin: -89.76079 xmax: 52.33166 ymax: 67.81471
#> CRS:           NA
#> Error in UseMethod("st_as_text"): no applicable method for 'st_as_text' applied to an object of class "NULL"

wk::wk_debug(
  wk::wkt("GEOMETRYCOLLECTION (MULTIPOINT ((-1.084597 -0.1685479), (-84.75254 71.53283), (80.38719 -57.93825), (-79.06744 21.64354), (81.9618 -87.89244)))"),
  wk::sfc_writer()  
)
#> initialize (dirty = 0  -> 1)
#> vector_start: <Unknown type / 0>[1] <0x16d0ad4c8> => WK_CONTINUE
#>   feature_start (1): <0x16d0ad4c8>  => WK_CONTINUE
#>     geometry_start (<none>): GEOMETRYCOLLECTION[UNKNOWN] <0x16d0ad3d0> => WK_CONTINUE
#>       geometry_start (1): MULTIPOINT[UNKNOWN] <0x16d0ad330> => WK_CONTINUE
#>         geometry_start (1): POINT[UNKNOWN] <0x16d0ad240> => WK_CONTINUE
#>           coord (1): <0x16d0ad240> (-1.084597 -0.168548)  => WK_CONTINUE
#>         geometry_end (1)  => WK_CONTINUE
#>         geometry_start (2): POINT[UNKNOWN] <0x16d0ad240> => WK_CONTINUE
#>           coord (1): <0x16d0ad240> (-84.752540 71.532830)  => WK_CONTINUE
#>         geometry_end (2)  => WK_CONTINUE
#>         geometry_start (3): POINT[UNKNOWN] <0x16d0ad240> => WK_CONTINUE
#>           coord (1): <0x16d0ad240> (80.387190 -57.938250)  => WK_CONTINUE
#>         geometry_end (3)  => WK_CONTINUE
#>         geometry_start (4): POINT[UNKNOWN] <0x16d0ad240> => WK_CONTINUE
#>           coord (1): <0x16d0ad240> (-79.067440 21.643540)  => WK_CONTINUE
#>         geometry_end (4)  => WK_CONTINUE
#>         geometry_start (5): POINT[UNKNOWN] <0x16d0ad240> => WK_CONTINUE
#>           coord (1): <0x16d0ad240> (81.961800 -87.892440)  => WK_CONTINUE
#>         geometry_end (5)  => WK_CONTINUE
#>       geometry_end (1)  => WK_CONTINUE
#>     geometry_end (<none>)  => WK_CONTINUE
#>   feature_end (1): <0x16d0ad4c8>  => WK_CONTINUE
#> vector_end: <0x16d0ad4c8>
#> deinitialize
#> Geometry set for 1 feature 
#> Geometry type: GEOMETRYCOLLECTION
#> Dimension:     XY
#> Bounding box:  xmin: -84.75254 ymin: -87.89244 xmax: 81.9618 ymax: 71.53283
#> CRS:           NA
#> GEOMETRYCOLLECTION (MULTIPOINT ((-1.084597 -0.1...

Created on 2023-07-08 with reprex v2.0.2

I think this is an off-by-one bug in the collection filter.

collection_filter->part_id++;

In the sfc writer, geometries are being written to geom[part_id + 1].

SET_VECTOR_ELT(writer->geom[writer->recursion_level - 1], part_id, geom);

I've written a patch which fixes this bug. Should I submit a PR?

Absolutely! My recollection is that the collection filter is hard, so thank you for looking into it.