Load packages


sim_meta <- function(
  possible_samp_est =
    tibble(
      sample = list("pps", "pps",
                    "rds", "rds", "rds",
                    "tls", "tls", "tls",
                    c("rds", "pps"), c("rds", "tls")),
      estimator = c("ht", "nsum",
                    "sspse", "chords", "multiplier",
                    "ht", "nsum", "recapture",
                    "recapture", "recapture"),
      rel_bias = c(1, runif(9, 0.5, 3)),
      absolute_bias = .95 * rel_bias,
      se = (1 + (1 - rel_bias)^2) * 20),
  estimands =
    tibble(estimand = rnbinom(n = 8, size = 10, mu = 300),
           study = paste0("study_", 1:8)),
  pop = TRUE
) {

  tidyr::expand_grid(possible_samp_est, estimands) %>%
    dplyr::mutate(
      estimate = floor(estimand * (absolute_bias)),
      sim_ID = 1,
      inquiry = "hidden_size") %>%
    dplyr::arrange(inquiry, study, sample, estimator) %>%
    purrr::when(
      pop ~ dplyr::select(., sim_ID, study, inquiry, sample, estimator, estimand, estimate, se, everything()) %>% as.data.frame(),
      ~ get_meta_sample(data = .) %>% dplyr::filter(meta == 1)
    )

}

sim_pop <- function(sims = 100,
                    possible_samp_est =
                      tibble(
                        sample = list("pps", "pps",
                                      "rds", "rds", "rds",
                                      "tls", "tls", "tls",
                                      c("rds", "pps"), c("rds", "tls")),
                        estimator = c("ht", "nsum",
                                      "sspse", "chords", "multiplier",
                                      "ht", "nsum", "recapture",
                                      "recapture", "recapture"),
                        rel_bias = c(1, runif(9, 0.5, 3)),
                        absolute_bias = .95 * rel_bias,
                        se = (1 + (1 - rel_bias)^2) * 20),
                    estimands =
                      tibble(estimand = rnbinom(n = 8, size = 10, mu = 300),
                             study = paste0("study_", 1:8)),
                    pop = TRUE) {
  lapply(1:100,
         function(x) sim_meta(possible_samp_est, estimands, pop) %>% dplyr::mutate(sim_ID = x)) %>%
    bind_rows()
}

Simple meta study simulation

Declaration

meta_population <-
  declare_population(sims = 100,
                     possible_samp_est =
                       tibble(
                         sample = list("pps", "pps",
                                       "rds", "rds", "rds",
                                       "tls", "tls", "tls",
                                       c("rds", "pps"), c("rds", "tls")),
                         estimator = c("ht", "nsum",
                                       "sspse", "chords", "multiplier",
                                       "ht", "nsum", "recapture",
                                       "recapture", "recapture"),
                         rel_bias = c(1, runif(9, 0.5, 3)),
                         absolute_bias = .8 * rel_bias,
                         se = (1 + (1 - rel_bias)^2) * 20),
                     estimands =
                       tibble(estimand = rnbinom(n = 8, size = 10, mu = 300),
                              study = paste0("study_", 1:8)),
                     pop = TRUE,
                     handler = sim_pop)

meta_inquiry <-
  declare_inquiry(study_estimand = "hidden_size",
                  samp_est_benchmark = "pps_ht",
                  handler = get_meta_estimands)

meta_sample <-
  declare_sampling(sampling_variable = "meta",
                   selection_variables = c("sample", "estimator"),
                   samples_per_study = 2, estimator_per_sample = 3,
                   force = list(sample = "pps", estimator = "ht"),
                   handler = get_meta_sample)

meta_estimator <-
  declare_estimator(sampling_variable = "meta",
                    which_estimand = "hidden_size",
                    benchmark = list(sample = "pps", estimator = "ht"),
                    control_params = list(
                      iter = 4000, chains = 4, thin = 5,
                      seed = 872312,
                      cores = 1),
                    stan_handler = get_meta_stan2,
                    handler = get_meta_estimates)

meta_study <- meta_population + meta_inquiry + meta_sample + meta_estimator

Run simulations


requireNamespace(c("doParallel", "parallel"))
doParallel::registerDoParallel(cores = 50)

set.seed(872312)
meta_simulations <-
  plyr::llply(
    as.list(1:200),
    .fun = function(x) {
      base::suppressWarnings(
        try(DeclareDesign::simulate_design(meta_study, sims = 1) %>%
          dplyr::mutate(
            sim_ID = x
          ))
      )
    },
    .parallel = TRUE
  )

meta_simulations <- 
  meta_simulations[sapply(meta_simulations, function(x) class(x) == "data.frame")] %>%
  dplyr::bind_rows()

saveRDS(meta_simulations, file = here::here("inst/extdata/meta_sim_simple.rds"))

Diagnose meta study


meta_simulations <- 
  readRDS(system.file("extdata", "meta_sim_simple.rds", package = "hiddenmeta"))

study_diagnosands <-
  declare_diagnosands(
    mean_estimand = mean(estimand),
    mean_estimate = mean(estimate),
    sd_estimate = sd(estimate),
    mean_se = mean(se),
    bias = mean(estimate - estimand),
    rmse = sqrt(mean((estimate - estimand) ^ 2))
  )


(
  table_meta_simple <- 
    diagnose_design(simulations_df = meta_simulations,
                    diagnosands = study_diagnosands) %>%
    reshape_diagnosis(digits = 2) %>%
    dplyr::select(-'Design') %>%
    dplyr::filter(`Mean Estimate` != "NA") 
) %>%
  DT::datatable(options = list(scrollX = TRUE, pageLength = 15))

Use actual study designs


study_map <- c("Brazil (FF)" = "ff_brazil",
               "Tunisia (UMass)" = "umass_tunisia",
               "Brazil (Stanford)" = "stanford_brazil",
               "Pakistan (JHU)" = "jhu_pakistan",
               "Costa Rica (John Jay)" = "johnjay_costarica",
               "Tanzania (John Jay)" = "johnjay_tanzania",
               "Morocco (NORC)" = "norc_marocco",
               "USA (RTI)" = "rti_usa")

study_designs_base <- 
  readRDS(system.file("extdata", "multi_sim_real_original.rds", package = "hiddenmeta")) %>% 
  dplyr::filter(!is.na(estimator), !is.nan(se)) %>%
    dplyr::mutate(
      study = purrr::map_chr(inquiry, ~ sapply(.x, function(x) {
        unname(study_map)[sapply(unname(study_map), function(stud) grepl(stud, x, fixed = TRUE))]
      })),
      inquiry =
        purrr::map2_chr(study, inquiry, ~ gsub(pattern = paste0(.x, "_"), replacement = "", x = .y, fixed = TRUE)),
      estimator =
        purrr::map2_chr(inquiry, estimator, ~ gsub(pattern = paste0(.x, "_"), replacement = "", x = .y, fixed = TRUE)),
      estimator_full = estimator,
      estimator = purrr::map_chr(estimator_full,
                                 ~ tail(strsplit(.x, "_")[[1]], 1)),
      sampling =
        purrr::map2_chr(estimator, estimator_full,
                        ~ gsub(pattern = paste0("_", .x), replacement = "", x = .y, fixed = TRUE)),
      sample = purrr::map(sampling, ~ strsplit(.x, split = "_")[[1]]),
      absolute_bias = abs(estimate/estimand)
    ) %>% 
  dplyr::select(-design, -sampling, -estimator_full) %>% 
  dplyr::filter(inquiry == "hidden_size") %>% 
  dplyr::group_by(sim_ID, study, inquiry) %>% 
  dplyr::mutate(
    rel_bias = estimate/estimate[estimator == "ht"]
  ) %>% 
  dplyr::ungroup() %>% 
  dplyr::select(sim_ID, study, inquiry, sample, estimator, estimand, estimate, se, rel_bias, absolute_bias
)

# remove all sampling-estimators observed only once
meta_population <-
  study_designs_base %>% 
  dplyr::group_by(sim_ID, sample, estimator) %>% 
  dplyr::filter(n() >= 3) %>% 
  dplyr::ungroup() %>% 
  declare_population(.)

meta_sample <-
  declare_sampling(sampling_variable = "meta",
                   handler = function(data, sampling_variable) {
                     data %>% 
                       dplyr::mutate(
                         "{ sampling_variable }" := as.integer(sim_ID == sample(x = unique(sim_ID), size = 1))
                       )
                   })

meta_inquiry <-
  declare_inquiry(study_estimand = "hidden_size",
                  samp_est_benchmark = "pps_ht",
                  handler = get_meta_estimands)

meta_estimator <-
  declare_estimator(
    sampling_variable = "meta",
    which_estimand = "hidden_size",
    benchmark = list(sample = "pps", estimator = "ht"),
    rel_bias_prior = list(mean = 1, se = 5),
    control_params = list(
      iter = 8000, chains = 8, thin = 10,
      seed = 872312,
      cores = 1),
    stan_handler = get_meta_stan3,
    handler = get_meta_estimates)

meta_study <- meta_population + meta_inquiry + meta_sample + meta_estimator

Run simulations


requireNamespace(c("doParallel", "parallel"))
doParallel::registerDoParallel(cores = 50)

set.seed(872312)
meta_simulations <-
  plyr::llply(
    as.list(1:100),
    .fun = function(x) {
      base::suppressWarnings(
        DeclareDesign::simulate_design(meta_study, sims = 1) %>%
              dplyr::mutate(
                sim_ID = x
              )
      )
    },
    .parallel = TRUE
  )

meta_simulations <- 
  meta_simulations[sapply(meta_simulations, function(x) class(x) == "data.frame")] %>%
  dplyr::bind_rows()

saveRDS(meta_simulations, file = here::here("inst/extdata/meta_sim_real_original.rds"))

Diagnose meta study


meta_simulations <- 
  readRDS(system.file("extdata", "meta_sim_real_original.rds", package = "hiddenmeta"))

study_diagnosands <-
  declare_diagnosands(
    mean_estimand = mean(estimand),
    mean_estimate = mean(estimate),
    sd_estimate = sd(estimate),
    mean_se = mean(se),
    bias = mean(estimate - estimand),
    rmse = sqrt(mean((estimate - estimand) ^ 2)),
    prior = mean(prior),
    prior_sd = mean(prior_sd)
  )

(
  table_meta_original <- 
    diagnose_design(simulations_df = meta_simulations,
                    diagnosands = study_diagnosands) %>%
    reshape_diagnosis(digits = 2) %>%
    dplyr::select(-'Design') %>%
    dplyr::filter(`Mean Estimate` != "NA") 
) %>%
  DT::datatable(options = list(scrollX = TRUE, pageLength = 15))

Leave 1 out exercise


for (i in seq_along(study_map)) {
  
  cat("Doing simulations leaving study", names(study_map)[i], "out: ")
  
  study_designs_base <- 
    readRDS(system.file("extdata", "multi_sim_real_original.rds", package = "hiddenmeta")) %>% 
    dplyr::filter(!is.na(estimator), !is.nan(se)) %>%
    dplyr::mutate(
      study = purrr::map_chr(inquiry, ~ sapply(.x, function(x) {
        unname(study_map)[sapply(unname(study_map), function(stud) grepl(stud, x, fixed = TRUE))]
      })),
      inquiry =
        purrr::map2_chr(study, inquiry, ~ gsub(pattern = paste0(.x, "_"), replacement = "", x = .y, fixed = TRUE)),
      estimator =
        purrr::map2_chr(inquiry, estimator, ~ gsub(pattern = paste0(.x, "_"), replacement = "", x = .y, fixed = TRUE)),
      estimator_full = estimator,
      estimator = purrr::map_chr(estimator_full,
                                 ~ tail(strsplit(.x, "_")[[1]], 1)),
      sampling =
        purrr::map2_chr(estimator, estimator_full,
                        ~ gsub(pattern = paste0("_", .x), replacement = "", x = .y, fixed = TRUE)),
      sample = purrr::map(sampling, ~ strsplit(.x, split = "_")[[1]]),
      absolute_bias = abs(estimate/estimand)
    ) %>% 
    dplyr::filter(study != unname(study_map)[i]) %>%
    dplyr::select(-design, -sampling, -estimator_full) %>% 
    dplyr::filter(inquiry == "hidden_size") %>% 
    dplyr::group_by(sim_ID, study, inquiry) %>% 
    dplyr::mutate(
      rel_bias = estimate/estimate[estimator == "ht"]
    ) %>% 
    dplyr::ungroup() %>% 
    dplyr::select(sim_ID, study, inquiry, sample, estimator, estimand, estimate, se, rel_bias, absolute_bias
    )
  
  # remove all sampling-estimators observed only once
  meta_population <-
    study_designs_base %>% 
    dplyr::group_by(sim_ID, sample, estimator) %>% 
    dplyr::filter(n() >= 3) %>% 
    dplyr::ungroup() %>% 
    declare_population(.)
  
  meta_study <- meta_population + meta_inquiry + meta_sample + meta_estimator
  
  
  requireNamespace(c("doParallel", "parallel"))
  doParallel::registerDoParallel(cores = 25)
  
  set.seed(872312)
  meta_simulations <-
    plyr::llply(
      as.list(1:50),
      .fun = function(x) {
        base::suppressWarnings(
          DeclareDesign::simulate_design(meta_study, sims = 1) %>%
            dplyr::mutate(
              sim_ID = x
            )
        )
      },
      .parallel = TRUE
    )
  
  meta_simulations <- 
    meta_simulations[sapply(meta_simulations, function(x) class(x) == "data.frame")] %>%
    dplyr::bind_rows()
  
  saveRDS(meta_simulations, 
          file = here::here(paste0("inst/extdata/meta_sim_real_excl_", unname(study_map)[i], ".rds")))
  
  cat("done!\n")
  
}

study_diagnosands <-
  declare_diagnosands(
    mean_estimand = mean(estimand),
    mean_estimate = mean(estimate),
    sd_estimate = sd(estimate),
    mean_se = mean(se),
    bias = mean(estimate - estimand),
    rmse = sqrt(mean((estimate - estimand) ^ 2)),
    prior = mean(prior),
    prior_sd = mean(prior_sd)
  )

leave1out_sims <- 
  lapply(study_map, function(x) {
    
    readRDS(system.file("extdata", paste0("meta_sim_real_excl_", x, ".rds"), package = "hiddenmeta")) %>% 
      diagnose_design(simulations_df = .,
                      diagnosands = study_diagnosands) %>%
      .$diagnosands_df %>% 
      dplyr::filter(!is.na(mean_estimate)) %>% 
      dplyr::mutate(study_excluded = x,
                    "{ x }" := rmse/mean_estimand) %>% 
      dplyr::select(c(inquiry, all_of(x)))
    
  }) 

(
  table_leave1out <- 
    readRDS(system.file("extdata", paste0("meta_sim_real_original.rds"), package = "hiddenmeta")) %>% 
    diagnose_design(simulations_df = .,
                    diagnosands = study_diagnosands) %>%
    .$diagnosands_df %>% 
    dplyr::filter(!is.na(mean_estimate)) %>% 
    dplyr::mutate(all = abs(rmse/mean_estimand)) %>% 
    dplyr::select(c(inquiry, all)) %>% 
    purrr::reduce(.x = leave1out_sims, .f = left_join, by = "inquiry", .init = .) %>% 
    dplyr::filter(!(inquiry %in% c("rel_bias_tls_recap", "rel_bias_pps_ht")) & !grepl("hidden_size", inquiry)) %>%
    dplyr::mutate(
      across(all_of(unname(study_map)), ~ ifelse(is.na(.), "-", format(., digits = 2))),
      all = format(all, digits = 2),
      # inquiry = gsub("_hidden_size", "size", inquiry),
      inquiry = gsub("rel_bias_", "", inquiry)
    ) %>% 
    dplyr::rename(Estimand = inquiry, "Benchmark" = all) %>% 
    dplyr::rename_with(~ plyr::mapvalues(., from = unname(study_map), to = names(study_map)))
) %>%
  DT::datatable(options = list(scrollX = TRUE, pageLength = 15))