## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----naive, eval = requireNamespace("arrow", quietly = TRUE), message = FALSE---- library(arrow) library(tibble) library(a5R) # A real A5 cell — Edinburgh at resolution 20 cell <- a5_lonlat_to_cell(-3.19, 55.95, resolution = 20) a5_u64_to_hex(cell) # Write to Parquet as uint64 (the standard interchange format) tf <- tempfile(fileext = ".parquet") arrow::write_parquet( arrow::arrow_table(cell_id = a5_cell_to_arrow(cell)), tf ) # Read it back naively — arrow silently converts uint64 to double (naive <- tibble(arrow::read_parquet(tf))) cell_as_dbl <- naive$cell_id # The double can't distinguish this cell from nearby IDs cell_as_dbl == cell_as_dbl + 1 # TRUE — silent corruption cell_as_dbl == cell_as_dbl + 100 # still TRUE ## ----bridge, eval = requireNamespace("arrow", quietly = TRUE)----------------- library(a5R) library(tibble) # Six cities across the globe — some will have bit 63 set (origin >= 6) cities <- tibble( name = c("Edinburgh", "Tokyo", "São Paulo", "Nairobi", "Anchorage", "Sydney"), lon = c( -3.19, 139.69, -46.63, 36.82, -149.90, 151.21), lat = c( 55.95, 35.69, -23.55, -1.29, 61.22, -33.87) ) cities$cell <- a5_lonlat_to_cell(cities$lon, cities$lat, resolution = 10) cities ## ----enrich, eval = requireNamespace("arrow", quietly = TRUE)----------------- edinburgh <- cities$cell[1] cities$resolution <- a5_get_resolution(cities$cell) cities$dist_from_edinburgh_km <- as.numeric( a5_cell_distance(cities$cell, rep(edinburgh, nrow(cities)), units = "km") ) cities ## ----parquet_write, eval = requireNamespace("arrow", quietly = TRUE)---------- tf <- tempfile(fileext = ".parquet") arrow_tbl <- arrow::arrow_table( name = cities$name, cell_id = a5_cell_to_arrow(cities$cell), cell_res = cities$resolution, dist_from_edinburgh_km = cities$dist_from_edinburgh_km ) arrow_tbl$schema arrow::write_parquet(arrow_tbl, tf) ## ----parquet_read, eval = requireNamespace("arrow", quietly = TRUE)----------- pq <- arrow::read_parquet(tf, as_data_frame = FALSE) # Recover cells from the uint64 column, bind with the rest of the data recovered_cells <- a5_cell_from_arrow(pq$column(1)) result <- as.data.frame(pq) result$cell <- recovered_cells result <- tibble::as_tibble(result[c("name", "cell", "cell_res", "dist_from_edinburgh_km")]) result ## ----verify, eval = requireNamespace("arrow", quietly = TRUE)----------------- identical(format(cities$cell), format(result$cell))