diff --git a/DESCRIPTION b/DESCRIPTION index abdf852..559f8c2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: melodi Title: Retrieve Data from the Insee Melodi APIs -Version: 1.0.0 +Version: 1.0.1 Authors@R: c( person("Cédric", "Bobinec", email = "cedric.bobinec@insee.fr", role = c("aut", "cre")), person("Christophe", "Goudeau", role = "aut", email = "christophe.goudeau@insee.fr"), @@ -30,7 +30,8 @@ Imports: zip, labelled, tibble, - stats + stats, + purrr VignetteBuilder: knitr URL: https://InseeFrLab.github.io/melodi/, https://github.com/InseeFrLab/melodi BugReports: https://github.com/InseeFrLab/melodi/issues diff --git a/NEWS.md b/NEWS.md index 9bbf3c2..498fae5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +# melodi 1.0.1 +- #12 : `get_range_geo` : ajout GEO_OBJECT_LABEL + remplacement des boucles par purrr + # melodi 1.0.0 - Relèvement du seuil de `get_data` à **100 000** lignes (amélioration de l'API Melodi) - Centralisation en **options** des paramètres techniques d'appel à l'API, plutôt qu'en paramètre (ou caché) dans les fonctions diff --git a/R/get_range_geo.R b/R/get_range_geo.R index 3ce1074..3765256 100644 --- a/R/get_range_geo.R +++ b/R/get_range_geo.R @@ -27,59 +27,30 @@ get_range_geo <- function( message("Request dataset range : ", url) dataset <- httr2::request(url) |> - httr2::req_user_agent(getOption("rmelodi.req_user_agent")) |> httr2::req_perform() |> httr2::resp_body_json(simplifyVector = FALSE) - range <- dataset[["range"]] - # Keep GEO only - range <- Filter(function(x) { - x[["concept"]][["code"]] == "GEO" - }, range) + range_geo <- dataset$range |> + purrr::keep(\(x) x$concept$code == "GEO") |> + purrr::pluck(1, "values") - if (length(range) == 0) { + if (is.null(range_geo) || length(range_geo) == 0) { stop("Error: 'GEO' dimension is not present in the dataset.") } - # for null cases (English GEO labels...) - safe_extract <- function(x) { - if (is.null(x)) NA else x - } - - codebook_list <- list() - - for (i in seq_along(range)) { - # concepts returned by the API are effectively dimensions of the dataset - dimension <- range[[i]][["concept"]][["code"]] |> safe_extract() - dimension_label <- range[[i]][["concept"]][["label"]][[lang]] |> safe_extract() - - values <- range[[i]][["values"]] |> safe_extract() - - for (j in seq_along(values)) { - value <- values[[j]][["code"]] |> safe_extract() - value_label <- values[[j]][["label"]][[lang]] |> safe_extract() - value_id <- values[[j]][["id"]] |> safe_extract() - - # Créer la liste sans geo_object si dimension_geo est FALSE - codebook_list[[length(codebook_list) + 1]] <- list( - dimension = dimension, - dimension_label = dimension_label, - value = value, - value_label = value_label, - value_id = value_id - ) - } - } - - codebook_df <- do.call(rbind, lapply(codebook_list, as.data.frame)) - rownames(codebook_df) <- NULL - - codebook_df <- codebook_df |> - dplyr::arrange(dimension, value) |> - tidyr::separate(value_id, into = c("GEO_REF", "GEO_OBJECT", "GEO"), sep = "-") |> - dplyr::mutate(GEO_LABEL = value_label) |> - dplyr::select(GEO_REF, GEO_OBJECT, GEO, GEO_LABEL) |> + # Unfold + codebook_df <- tibble::tibble( + GEO_LABEL = purrr::map_chr(range_geo, ~ .x$label[[lang]] %||% NA_character_), + GEO_OBJECT_LABEL = purrr::map_chr(range_geo, ~ .x$type[[lang]] %||% NA_character_), + value_id = purrr::map_chr(range_geo, "id", .default = NA_character_) + ) |> + tidyr::separate_wider_delim( + value_id, + delim = "-", + names = c("GEO_REF", "GEO_OBJECT", "GEO"), + ) |> + dplyr::select(GEO_REF, GEO_OBJECT, GEO_OBJECT_LABEL, GEO, GEO_LABEL) |> dplyr::arrange(GEO_OBJECT, GEO) return(codebook_df) diff --git a/R/globals.R b/R/globals.R index 270a242..075b423 100644 --- a/R/globals.R +++ b/R/globals.R @@ -11,6 +11,6 @@ globalVariables( # get_data "value", "GEO", # get_range_geo - "GEO_REF", "GEO_OBJECT", "GEO_LABEL" + "GEO_REF", "GEO_OBJECT", "GEO_LABEL", "GEO_OBJECT_LABEL", "value_id" ) ) diff --git a/tests/testthat/test-get_range_geo.R b/tests/testthat/test-get_range_geo.R index 05aee76..b3b887a 100644 --- a/tests/testthat/test-get_range_geo.R +++ b/tests/testthat/test-get_range_geo.R @@ -5,7 +5,7 @@ vcr::use_cassette("get_range_geo_ec_deces", { expect_s3_class(range_geo, "data.frame") expect_true( all( - c("GEO_REF", "GEO_OBJECT", "GEO", "GEO_LABEL") %in% + c("GEO_REF", "GEO_OBJECT", "GEO_OBJECT_LABEL", "GEO", "GEO_LABEL") %in% colnames(range_geo) ) ) @@ -20,13 +20,21 @@ vcr::use_cassette("get_range_geo_ec_deces", { object = unique(range_geo$GEO_OBJECT), expected = c("DEP", "FRANCE", "OTHER", "REG") ) - # Check one label is OK + # Check one GEO label is OK expect_equal( object = range_geo %>% dplyr::filter(GEO == "44" & GEO_OBJECT == "DEP") %>% dplyr::pull(GEO_LABEL), expected = "Loire-Atlantique" ) + # Check one GEO_OBJECT label is OK + expect_equal( + object = range_geo %>% + dplyr::filter(GEO_OBJECT == "DEP") %>% + dplyr::pull(GEO_OBJECT_LABEL) %>% + unique(), + expected = "Département" + ) }) })