Skip to content

Commit

Permalink
Merge pull request #34 from antaldaniel/master
Browse files Browse the repository at this point in the history
roxygen and tidyselect changes
  • Loading branch information
antaldaniel authored Dec 1, 2023
2 parents 94a6eb7 + bb7b652 commit 3a6dbbb
Show file tree
Hide file tree
Showing 24 changed files with 285 additions and 191 deletions.
28 changes: 11 additions & 17 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,25 +1,19 @@
Type: Package
Package: retroharmonize
Title: Ex Post Survey Data Harmonization
Version: 0.2.5.002
Date: 2022-09-24
Version: 0.2.5.003
Date: 2023-12-01
Authors@R: c(
person("Daniel", "Antal", , "[email protected]", role = c("aut", "cre"),
person(given = "Daniel",
family = "Antal",
email = "[email protected]",
role = c("aut", "cre"),
comment = c(ORCID = "0000-0001-7513-6760")),
person(given = "Marta",
family = "Kolczynska",
role = c("ctb"),
email = "[email protected]",
comment = c(ORCID = "0000-0003-4981-0437")),
person(given = "Pyry",
family = "Kantanen",
role = "ctb",
comment = c(ORCID = "0000-0003-2853-2765")),
person(given = "Leo",
family = " Lahti",
role = "ctb",
comment = c(ORCID = "0000-0001-5537-637X")
)
family = "Kolczynska",
role = c("ctb"),
email = "[email protected]",
comment = c(ORCID = "0000-0003-4981-0437"))
)
Maintainer: Daniel Antal <[email protected]>
Description: Assist in reproducible retrospective (ex-post) harmonization
Expand Down Expand Up @@ -69,6 +63,6 @@ Config/testthat/edition: 3
Encoding: UTF-8
Language: en-US
LazyData: true
RoxygenNote: 7.2.1
RoxygenNote: 7.2.3
X-schema.org-isPartOf: http://ropengov.org/
X-schema.org-keywords: ropengov
2 changes: 1 addition & 1 deletion R/collect_val_labels.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' Collect labels from metadata file
#' @title Collect labels from metadata file
#'
#' @param metadata A metadata data frame created by
#' \code{\link{metadata_create}}.
Expand Down
38 changes: 19 additions & 19 deletions R/create_codebook.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,32 +83,32 @@ create_codebook <- function ( metadata = NULL,
if ( n_labelled_numeric > 0 ) {
# These area cases when the labels are of class numeric
valid_labelled_numeric <- metadata_labelled_numeric %>%
filter ( grepl( "labelled", .data$class_orig )) %>%
filter ( grepl( "labelled", class_orig )) %>%
select ( all_of(c("entry", "id", "filename", "var_name_orig", "var_label_orig", "valid_labels"))) %>%
unnest_longer( .data$valid_labels) %>%
unnest_longer( valid_labels) %>%
rlang::set_names ( c("entry", "id", "filename", "var_name_orig","var_label_orig", "val_code_orig", "val_label_orig")) %>%
mutate (
# This is the valid observation range
label_range = "valid",
val_code_orig = as.character(.data$val_code_orig))
val_code_orig = as.character(val_code_orig))

na_labelled_numeric <- metadata[num_labels ,] %>%
filter ( grepl( "labelled", .data$class_orig )) %>%
filter ( grepl( "labelled", class_orig )) %>%
select ( all_of(c("entry", "id", "filename", "var_name_orig", "var_label_orig", "na_labels"))) %>%
unnest_longer( .data$na_labels) %>%
unnest_longer( na_labels) %>%
purrr::set_names ( c("entry", "id", "filename", "var_name_orig", "var_label_orig", "val_code_orig", "val_label_orig")) %>%
mutate (
# This is the missing observation range
label_range = "missing") %>%
filter ( !is.na(.data$val_code_orig) ) %>%
mutate ( val_code_orig = as.character(.data$val_code_orig) )
filter ( !is.na(val_code_orig) ) %>%
mutate ( val_code_orig = as.character(val_code_orig) )


num_labels <- valid_labelled_numeric %>%
dplyr::bind_rows (
na_labelled_numeric
) %>%
dplyr::arrange( .data$entry, .data$val_code_orig ) %>%
dplyr::arrange( entry, val_code_orig ) %>%
left_join ( metadata %>% select ( any_of(c("entry", "id", "filename", "na_range",
"n_labels", "n_valid_labels", "n_na_labels",
user_vars))),
Expand All @@ -126,34 +126,34 @@ create_codebook <- function ( metadata = NULL,
if ( n_labelled_character > 0) {
# These area cases when the na_labels are of class character
valid_labelled_character <- metadata_labelled_character %>%
filter ( grepl( "labelled", .data$class_orig )) %>%
filter ( grepl( "labelled", class_orig )) %>%
select ( all_of(c("entry", "id", "filename", "var_name_orig", "var_label_orig", "valid_labels"))) %>%
unnest_longer( .data$valid_labels) %>%
unnest_longer( valid_labels) %>%
rlang::set_names ( c("entry", "id", "filename", "var_name_orig","var_label_orig", "val_code_orig", "val_label_orig")) %>%
mutate (
# This is the valid observation range
label_range = "valid") %>%
mutate ( val_code_orig = as.character(.data$val_code_orig) )
mutate ( val_code_orig = as.character(val_code_orig) )


na_labelled_character <- metadata[char_labels ,] %>%
filter ( grepl( "labelled", .data$class_orig )) %>%
filter ( grepl( "labelled", class_orig )) %>%
select ( all_of(c("entry", "id", "filename", "var_name_orig", "var_label_orig", "na_labels"))) %>%
unnest_longer( .data$na_labels) %>%
unnest_longer( na_labels) %>%
purrr::set_names ( c("entry", "id", "filename", "var_name_orig", "var_label_orig",
"val_code_orig", "val_label_orig")) %>%
mutate (
# This is the missing observation range
label_range = "missing") %>%
filter ( !is.na(.data$val_code_orig)) %>%
mutate ( val_code_orig = as.character(.data$val_code_orig) )
filter ( !is.na(val_code_orig)) %>%
mutate ( val_code_orig = as.character(val_code_orig) )


char_labels <- valid_labelled_character %>%
dplyr::bind_rows (
na_labelled_character
) %>%
dplyr::arrange( .data$entry, .data$val_code_orig ) %>%
dplyr::arrange( entry, val_code_orig ) %>%
left_join ( metadata %>% select ( any_of(c("entry", "id", "filename", "na_range",
"n_labels", "n_valid_labels", "n_na_labels",
user_vars))),
Expand All @@ -178,14 +178,14 @@ create_codebook <- function ( metadata = NULL,
left_join ( user_data[0,], by = "entry" )
} else if ( n_labelled_character == 0 ) {
num_labels %>%
dplyr::arrange (.data$entry)
dplyr::arrange (entry)
} else if ( n_labelled_numeric == 0 ) {
char_labels %>%
dplyr::arrange (.data$entry)
dplyr::arrange (entry)
} else {
num_labels %>%
bind_rows ( char_labels) %>%
dplyr::arrange (.data$entry)
dplyr::arrange (entry)
}
}

Expand Down
22 changes: 10 additions & 12 deletions R/crosswalk.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
#' data frames, where the variable names, and optionally the variable labels, and the missing
#' value range is harmonized (the same names, labels, codes are used.)
#' @importFrom dplyr filter select mutate distinct_all relocate across everything
#' @importFrom rlang .data
#' @importFrom assertthat assert_that
#' @family harmonization functions
#' @examples
Expand Down Expand Up @@ -90,7 +89,7 @@ crosswalk_surveys <- function(crosswalk_table,
msg = "selection must have rows")

select_to_harmonize <- selection %>%
filter ( !is.na(.data$val_label_orig) )
filter ( !is.na(val_label_orig) )

vars_to_harmonize <- unique(select_to_harmonize$var_name_target)

Expand All @@ -102,7 +101,7 @@ crosswalk_surveys <- function(crosswalk_table,
for ( this_var in vars_to_harmonize ) {

correspondence_table <- select_to_harmonize %>%
filter ( .data$var_name_target == this_var )
filter ( var_name_target == this_var )

assert_that(is.numeric(correspondence_table$val_numeric_target),
msg = "Error in relabel_survey: 'val_numeric_target' must be a numeric vector")
Expand Down Expand Up @@ -138,15 +137,15 @@ crosswalk_surveys <- function(crosswalk_table,
subset_survey <- function(this_survey) {

survey_id <- attr(this_survey, "id")
assertthat::assert_that(length(survey_id)>0,
msg = "Error in subset_survey(): survey_id has 0 length.")
assert_that(length(survey_id)>0,
msg = "Error in subset_survey(): survey_id has 0 length.")

tmp <- this_survey %>%
mutate ( id = survey_id ) %>%
relocate ( .data$id, .before = everything())
relocate ( id, .before = everything())

selection <- crosswalk_table %>%
filter ( .data$id == survey_id ) %>%
filter ( id == survey_id ) %>%
distinct_all()


Expand Down Expand Up @@ -322,8 +321,8 @@ crosswalk_table_create <- function(metadata) {
if (nrow(metadata)==1) {
fn_labels(x=metadata[1,])
} else {
ctable_list <- lapply ( 1:nrow(metadata), function(x) fn_labels(metadata[x,]) )
ctable <- suppressMessages(purrr::reduce ( ctable_list, full_join ))
ctable_list <- lapply (1:nrow(metadata), function(x) fn_labels(metadata[x,]))
ctable <- suppressMessages(purrr::reduce(ctable_list, full_join))
ctable
}
}
Expand All @@ -332,7 +331,6 @@ crosswalk_table_create <- function(metadata) {
#' @rdname crosswalk_table_create
#' @param ctable A table to validate if it is a crosswalk table.
#' @importFrom dplyr tally group_by across filter
#' @importFrom rlang .data
#' @family metadata functions
#' @export

Expand All @@ -351,8 +349,8 @@ is.crosswalk_table <- function(ctable) {
distinct_all() %>%
group_by ( across(c("var_name_target", "id"))) %>%
tally() %>%
filter ( .data$n>1) %>%
select (.data$var_name_target ) %>%
filter ( n>1) %>%
select (var_name_target ) %>%
unlist()

error_msg <- paste(unique(duplicates), collapse = ', ')
Expand Down
4 changes: 2 additions & 2 deletions R/document_survey_item.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ document_survey_item <- function(x) {
tbl_length <- nrow(coding)

list (
code_table = dplyr::bind_cols(coding, labelling) %>%
mutate ( missing = ifelse (.data$values %in% attr(x, "na_values"),
code_table = bind_cols(coding, labelling) %>%
mutate ( missing = ifelse (values %in% attr(x, "na_values"),
TRUE, FALSE)),
history_var_name = c(
c("name" = original_x_name ),
Expand Down
20 changes: 10 additions & 10 deletions R/harmonize_survey_variables.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,15 @@ harmonize_survey_variables <- function( crosswalk_table,
new_names <- tibble( var_name_orig = names(this_survey)) %>%
left_join (
crosswalk_table %>%
filter (.data$id == survey_id) %>%
select ( .data$var_name_orig, .data$var_name_target ) %>%
filter (id == survey_id) %>%
select ( var_name_orig, var_name_target ) %>%
distinct_all(),
by = "var_name_orig",
) %>%
mutate ( var_name_target = ifelse (.data$var_name_orig == "rowid",
mutate ( var_name_target = ifelse (var_name_orig == "rowid",
yes = "rowid",
no = .data$var_name_target)) %>%
select ( .data$var_name_target ) %>% unlist() %>% as.character()
no = var_name_target)) %>%
select ( var_name_target ) %>% unlist() %>% as.character()

rlang::set_names(this_survey, nm = new_names )

Expand All @@ -96,15 +96,15 @@ harmonize_survey_variables <- function( crosswalk_table,
new_names <- tibble( var_name_orig = names(this_survey)) %>%
left_join (
crosswalk_table %>%
filter (.data$id == survey_id) %>%
select ( .data$var_name_orig, .data$var_name_target ) %>%
filter (id == survey_id) %>%
select ( var_name_orig, var_name_target ) %>%
distinct_all(),
by = "var_name_orig",
) %>%
mutate ( var_name_target = ifelse (.data$var_name_orig == "rowid",
mutate ( var_name_target = ifelse (var_name_orig == "rowid",
yes = "rowid",
no = .data$var_name_target)) %>%
select ( .data$var_name_target ) %>% unlist() %>% as.character()
no = var_name_target)) %>%
select ( var_name_target ) %>% unlist() %>% as.character()

this_survey <- rlang::set_names(this_survey, nm = new_names )
saveRDS(this_survey, file = file.path(export_path, x), version = 2 )
Expand Down
11 changes: 5 additions & 6 deletions R/metadata_create.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ metadata_waves_create <- function(survey_list) {
#' @importFrom labelled na_values na_range val_labels var_label
#' @importFrom purrr map
#' @importFrom assertthat assert_that
#' @importFrom rlang .data
#' @family metadata functions
#' @return A nested data frame with metadata and the range of
#' labels, na_values and the na_range itself.
Expand Down Expand Up @@ -219,14 +218,14 @@ metadata_survey_create <- function(survey) {

return_df <- metadata %>%
left_join ( range_df %>%
group_by ( .data$var_name_orig ) %>%
group_by ( var_name_orig ) %>%
tidyr::nest(),
by = "var_name_orig") %>%
tidyr::unnest ( cols = "data" ) %>%
ungroup() %>%
mutate ( n_na_labels = as.numeric(.data$n_na_labels),
n_valid_labels = as.numeric(.data$n_valid_labels),
n_labels = as.numeric(.data$n_labels)) %>%
mutate ( n_na_labels = as.numeric(n_na_labels),
n_valid_labels = as.numeric(n_valid_labels),
n_labels = as.numeric(n_labels)) %>%
as.data.frame()

change_label_to_empty <- function() {
Expand All @@ -247,7 +246,7 @@ metadata_survey_create <- function(survey) {
no = return_df$na_labels )

return_df %>%
select ( -.data$label_type )
select ( -label_type )
}


Expand Down
4 changes: 2 additions & 2 deletions R/pull_survey.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' Pull a survey from a survey list
#' @title Pull a survey from a survey list
#'
#' Pull a survey by survey code or id.
#' @description Pull a survey by survey code or id.
#'
#' @param survey_list A list of surveys
#' @param id The id of the requested survey. If \code{NULL} use
Expand Down
13 changes: 7 additions & 6 deletions R/read_spss.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#' \code{tibble::\link[tibble:as_tibble]{as_tibble}} for details.
#' @inheritParams read_rds
#' @importFrom haven read_spss read_sav write_sav is.labelled
#' @importFrom assertthat assert_that
#' @importFrom tibble rowid_to_column as_tibble
#' @importFrom fs path_ext_remove path_file is_file
#' @importFrom labelled var_label
Expand Down Expand Up @@ -58,7 +59,7 @@ read_spss <- function(file,

source_file_info <- valid_file_info(file)

safely_read_haven_spss <- purrr::safely(.f = haven::read_spss)
safely_read_haven_spss <- safely(.f = haven::read_spss)

tmp <- safely_read_haven_spss (file = file,
user_na = user_na,
Expand All @@ -78,13 +79,13 @@ read_spss <- function(file,

all_vars <- names(tmp)

assertthat::assert_that(length(all_vars)>0,
msg = "The SPSS file has no names.")
assert_that(length(all_vars)>0,
msg = "The SPSS file has no names.")

filename <- fs::path_file(file)
filename <- path_file(file)

if ( is.null(id) ) {
id <- fs::path_ext_remove ( filename )
id <- path_ext_remove(filename)
}

if ( is.null(doi)) {
Expand Down Expand Up @@ -161,7 +162,7 @@ read_spss <- function(file,
return_survey <- survey (return_df, id=id, filename=filename, doi=doi)

object_size <- as.numeric(object.size(as_tibble(return_df)))
attr(return_survey, "object_size") <- object_size
attr(return_survey, "object_size") <- object_size
attr(return_survey, "source_file_size") <- source_file_info$size

return_survey
Expand Down
Loading

0 comments on commit 3a6dbbb

Please sign in to comment.