Introduction to hiddenmeta • hiddenmeta

Install and load packages


install.packages("DeclareDesign")

devtools::install_github("gerasy1987/hiddenmeta", build_vignettes = TRUE)


library(hiddenmeta)
library(DeclareDesign)

Step 1. Provide study design features


## STUDY 1
study_1 <- 
  list(
    pop = 
      list(
        handler = get_study_population,
        
        # network structure setup
        network_handler = sim_block_network,
        network_handler_args = 
          list(N = 1000, K = 2, prev_K = c(known = .1, hidden = .2), rho_K = 0,
               p_edge_within = list(known = c(0.1, 0.1), hidden = c(0.1, 0.3)),
               p_edge_between = list(known = 0.02, hidden = 0.02),
               directed = FALSE),
        
        # groups
        group_names = c("known", "hidden"),
        
        # probability of visibility (show-up) for each group
        p_visible = list(known = 1, hidden = .8),
        
        # probability of service utilization in hidden population
        # for service multiplier
        add_groups = 
          list(
            service_use = "rbinom(.N, 1, 0.25)",
            "paste0('loc_', 1:10) := lapply(rep(.2, times = 10), function(add) rbinom(.N, 1, 0.05 + hidden * add))",
            target = 0.3,
            "paste0('known_', 2:10) := lapply(2:10, function(x) rbinom(.N, 1, 0.3))")
      ),
    sample = 
      list(
        rds = list(handler = sample_rds,
                   # RDS parameters
                   sampling_variable = "rds",
                   hidden_var = "hidden", # default
                   n_seed = 20,
                   n_coupons = 3,
                   add_seeds = 3,
                   target_type = "sample",
                   target_n_rds = 60),
        tls = list(handler = sample_tls,
                   sampling_variable = "tls",
                   hidden_var = NULL,
                   # TLS sampling parameters
                   target_n_clusters = 5,
                   target_cluster_type = "fixed",
                   target_per_cluster = 15,
                   clusters = paste0("loc_", 1:10)),
        pps = list(handler = sample_pps,
                   sampling_variable = "pps",
                   # prop sampling parameters
                   sampling_frame = NULL,
                   strata = NULL,
                   cluster = NULL,
                   target_n_pps = 150),
        lts = list(handler = sample_rds,
                   sampling_variable = "lts",
                   hidden_var = "hidden",
                   n_seed = 50,
                   n_coupons = 3,
                   n_waves = 3,
                   target = "waves",
                   target_n_rds = 200,
                   linktrace = "all")
      ),
    inquiries = list(handler = get_study_estimands,
                     in_groups = "^target$",
                     out_groups = "^known$", 
                     hidden_var = "hidden"),
    estimators = 
      list(
        rds = 
          list(ss = list(handler = get_study_est_ss,
                         sampling_var = "hidden",
                         hidden_var = "target", # estimates prevalence withing RDS
                         n_coupons = 3,
                         total = 1000,
                         prefix = "rds",
                         label = "ss"),
               sspse = list(handler = get_study_est_sspse,
                            prior_mean = 200,
                            mcmc_params = list(interval = 5, burnin = 2000, samplesize = 500),
                            total = 1000,
                            prefix = "rds",
                            label = "rds_sspse"),
               chords = list(handler = get_study_est_chords, 
                             type = "mle",
                             seed_condition = "rds_from == -999",
                             n_boot = 100,
                             prefix = "rds",
                             label = "rds_chords"),
               multiplier = list(handler = get_study_est_multiplier, 
                                 service_var = "service_use",
                                 seed_condition = "rds_from == -999",
                                 n_boot = 100,
                                 prefix = "rds",
                                 label = "rds_multi")),
        tls =
          list(ht = list(handler = get_study_est_ht,
                         hidden_var = "hidden",
                         weight_var = "tls_weight",
                         survey_design = ~ tls_loc_sampled,
                         prefix = "tls",
                         label = "tls_ht"),
               nsum = list(handler = get_study_est_nsum,
                           known = c("known", paste0("known_", 2:10)),
                           hidden = "hidden_visible_out",
                           survey_design = ~ tls_loc_sampled,
                           n_boot = 100,
                           prefix = "tls",
                           label = "tls_nsum"),
               recap = list(handler = get_study_est_recapture,
                            capture_parse = 
                              "strsplit(x = unique(na.omit(tls_locs_sampled)), split = ';')[[1]]",
                            # sample_condition = "tls == 1",
                            model = "Mt",
                            hidden_variable = "hidden",
                            label = "tls_recap"),
               mse = list(handler = get_study_est_mse,
                          capture_parse = 
                            "strsplit(x = unique(na.omit(tls_locs_sampled)), split = ';')[[1]]",
                          # sample_condition = "tls == 1",
                          method = "stepwise",
                          hidden_variable = "hidden",
                          label = "tls_mse")),
        pps = 
          list(ht = list(handler = get_study_est_ht,
                         hidden_var = "hidden",
                         prefix = "pps",
                         label = "pps_ht"),
               nsum = list(handler = get_study_est_nsum,
                           known = c("known", paste0("known_", 2:10)),
                           hidden = "hidden_visible_out",
                           survey_design = ~ pps_cluster + strata(pps_strata),
                           n_boot = 100,
                           prefix = "pps",
                           label = "pps_nsum")),
        all = 
          list(recap1 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds", "pps"),
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_pps_recap"),
               recap2 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds"),
                             capture_parse = 
                               "strsplit(x = unique(na.omit(tls_locs_sampled)), split = ';')[[1]]",
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_tls_recap"),
               mse1 = list(handler = get_study_est_mse,
                             capture_vars = c("rds", "pps"),
                             method = "stepwise",
                             hidden_variable = "hidden",
                             label = "rds_pps_mse"),
               mse2 = list(handler = get_study_est_mse,
                             capture_vars = c("rds"),
                             capture_parse = 
                               "strsplit(x = unique(na.omit(tls_locs_sampled)), split = ';')[[1]]",
                             method = "stepwise",
                             hidden_variable = "hidden",
                             label = "rds_tls_mse")),
        lts =
          list(link = list(handler = get_study_est_linktrace,
                           total = 200,
                           strata = "hidden",
                           gibbs_params = list(n_samples = 5L, 
                                               chain_samples = 250L, 
                                               chain_burnin = 50L),
                           priors = list(p_n = 0L, p_l = 0.1, p_b = 1L),
                           prefix = "lts",
                           label = "link"))
      )
  )

Step 2. Declare study population (M)odel


study_population <-
  eval(as.call(c(list(declare_population), study_1$pop)))

set.seed(872312)
example_pop <- study_population()

example_pop[sample(1:min(.N, 1000))] %>%  
  DT::datatable(options = list(scrollX = TRUE, pageLength = 15))

Simulated population network


g <-
  example_pop %$% {
    hiddenmeta:::retrieve_graph(links) %>%
      igraph::set_vertex_attr("name", value = name) %>%
      igraph::set_vertex_attr("type", value = type)
  }

pal <- 
  grDevices::palette.colors(n = length(unique(igraph::V(g)$type)), 
                            palette = "Dark2",
                            recycle = TRUE) %>% 
  `names<-`(unique(igraph::V(g)$type))

GGally::ggnet2(g, mode = "fruchtermanreingold",
               edge.alpha = .5, edge.size = .2, 
               node.alpha = .9, node.size = 2.5, node.color = "type",
               palette = pal,
               color.legend = "type",
               max_size = 4, shape = 16,
               legend.size = 12, legend.position = "bottom") +
  ggplot2::coord_equal()

Step 3. Declare study (I)nquiries or Estimands


study_estimands <- 
  eval(as.call(c(list(declare_inquiry), study_1$inquiries)))

Step 4. Declare study (D)ata strategy or Sampling

The sampling procedures are additive in a sense that each procedure appends several columns relevant to the sampling procedure and particular draw based on population simulation, but does not change the study population data frame (unless you specify drop_nonsampled = TRUE).


study_sample_rds <- 
  eval(as.call(c(list(declare_sampling), study_1$sample$rds)))


study_sample_pps <- 
  eval(as.call(c(list(declare_sampling), study_1$sample$pps)))


study_sample_tls <- 
  eval(as.call(c(list(declare_sampling), study_1$sample$tls)))


study_sample_lts <- 
  eval(as.call(c(list(declare_sampling), study_1$sample$lts)))

Step 5. Declare study (A)nswer strategy or estimators


est_ss <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$rds$ss)))
est_sspse <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$rds$sspse)))
est_chords <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$rds$chords)))
est_multi <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$rds$multiplier)))

est_ht_pps <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$pps$ht)))
est_nsum_pps <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$pps$nsum)))

est_ht_tls <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$tls$ht)))
est_nsum_tls <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$tls$nsum)))
est_recap_tls <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$tls$recap)))
est_mse_tls <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$tls$mse)))

est_recap_rds_pps <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$all$recap1)))
est_recap_rds_tls <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$all$recap2)))
est_mse_rds_pps <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$all$mse1)))
est_mse_rds_tls <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$all$mse2)))

est_link <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$lts$link)))

Step 6. Declare full study


study1 <- 
  # model (population)
  study_population + 
  # inquiries
  study_estimands + 
  # data strategy (samples)
  study_sample_rds + study_sample_pps + study_sample_tls + study_sample_lts + 
  # answer strategy (estimators)
  est_ss + est_sspse + est_chords +
  est_multi +
  est_nsum_pps + est_ht_pps +
  est_nsum_tls + est_ht_tls + est_recap_tls + est_mse_tls +
  est_recap_rds_pps + est_recap_rds_tls + est_mse_rds_pps + est_mse_rds_tls + 
  est_link

Draw population and relevant samples


# can draw full data
study1_data <- draw_data(study1)

study1_data[sample(1:.N)] %>% 
  DT::datatable(options = list(scrollX = TRUE, pageLength = 15))

Draw inquiries


# can draw estimands of interest
draw_estimands(study1) %>% 
  kable(digits = 2)

inquiry	estimand
known_size	89.00
known_prev	0.09
hidden_size	183.00
hidden_prev	0.18
target_size	304.00
target_prev	0.30
target_size_in_hidden	51.00
target_prev_in_hidden	0.28
hidden_size_in_known	18.00
hidden_prev_in_known	0.20

Draw estimates


# can draw estimates
draw_estimates(study1) %>% 
  kable(digits = 2)

estimator	estimate	se	inquiry
target_prev_in_hidden_ss	0.44	0.10	target_prev_in_hidden
hidden_size_rds_sspse	160.50	88.73	hidden_size
hidden_size_rds_chords	42.00	51.16	hidden_size
hidden_size_rds_multi	176.67	32.49	hidden_size
hidden_size_pps_nsum	170.34	25.80	hidden_size
hidden_size_pps_ht	246.67	39.44	hidden_size
hidden_prev_pps_ht	0.25	0.04	hidden_prev
hidden_size_tls_nsum	252.44	40.23	hidden_size
hidden_size_tls_ht	131.41	41.37	hidden_size
hidden_prev_tls_ht	0.13	0.04	hidden_prev
hidden_size_tls_recap	190.95	11.69	hidden_size
hidden_size_tls_mse	182.52	8.49	hidden_size
hidden_size_rds_pps_recap	164.57	29.82	hidden_size
hidden_size_rds_tls_recap	190.95	11.69	hidden_size
hidden_size_rds_pps_mse	170.77	36.05	hidden_size
hidden_size_rds_tls_mse	182.52	8.49	hidden_size
hidden_size_link	148.60	7.31	hidden_size

Step 7. Diagnose study design


requireNamespace(c("doParallel", "parallel"))
doParallel::registerDoParallel(cores = 10)

set.seed(872312)
study1_simulations <-
  plyr::llply(
    as.list(1:100),
    .fun = function(x) {
      base::suppressWarnings(
        DeclareDesign::simulate_design(study1, sims = 1) %>%
          dplyr::mutate(
            sim_ID = x
          )
      )
    },
    .parallel = TRUE
  ) %>%
  dplyr::bind_rows()

saveRDS(study1_simulations, here::here("inst/extdata/study1_sim.rds"))


study_diagnosands <-
 declare_diagnosands(
    mean_estimand = mean(estimand),
    mean_estimate = mean(estimate),
    sd_estimate = sd(estimate),
    mean_se = mean(se),
    bias = mean(estimate - estimand),
    rmse = sqrt(mean((estimate - estimand) ^ 2))
  )

study1_simulations <- 
  readRDS(system.file("extdata", "study1_sim.rds", package = "hiddenmeta"))

diagnose_design(simulations_df = study1_simulations, 
                diagnosands = study_diagnosands) %>% 
  reshape_diagnosis() %>% 
  dplyr::select(-'Design') %>%
  DT::datatable(options = list(scrollX = TRUE, pageLength = 15))

Diagnosis of multiple studies

Additional study designs


study_2 <- 
  list(
    pop = 
      list(
        handler = get_study_population,
        network_handler = sim_block_network,
        network_handler_args = 
          list(N = 5000, K = 3, 
               prev_K = c(frame = .5, known = .2, hidden = .1), 
               rho_K = c(.05, .05, .05),
               p_edge_within = list(frame = c(0.05, 0.05), 
                                    known = c(0.1, 0.05), 
                                    hidden = c(0.2, 0.9)),
               p_edge_between = list(frame = 0.05, 
                                     known = 0.1, 
                                     hidden = 0.01),
               directed = FALSE),
        
        group_names = c("frame", "known", "hidden"),
        
        # probability of visibility (show-up) for each group
        p_visible = list(frame = 1, known = 1, hidden = .6),
        
        # probability of service utilization in hidden population
        # for service multiplier
        
        add_groups = 
          list(service_use = "rbinom(.N, 1, 0.25 + hidden * 0.3)",
               "paste0('loc_', 1:8) := lapply(runif(8, 0.05,.3), function(add) rbinom(.N, 1, 0.1 + hidden * add))",
               known_2 = 0.1, known_3 = 0.2)
      ),
    sample = 
      list(
        rds = list(handler = sample_rds,
                   # RDS parameters
                   sampling_variable = "rds",
                   hidden_var = "hidden",
                   n_seed = 100,
                   n_coupons = 3,
                   n_waves = 2,
                   target_type = "waves",
                   target_n_rds = 2),
        tls = list(handler = sample_tls,
                   sampling_variable = "tls",
                   # TLS sampling parameters
                   target_n_clusters = 8,
                   target_cluster_type = "fixed",
                   target_per_cluster = 40,
                   clusters = paste0("loc_", 1:10)),
        pps = list(handler = sample_pps,
                   sampling_variable = "pps",
                   # prop sampling parameters
                   sampling_frame = "frame",
                   strata = NULL,
                   cluster = NULL,
                   target_n_pps = 800),
        link_trace = list(handler = sample_rds,
                            sampling_variable = "link_trace",
                            hidden_var = "hidden",
                            n_seed = 50,
                            n_coupons = 3,
                            add_seeds = 0,
                            target = "sample",
                            target_n_rds = 150,
                            linktrace = TRUE)
      ),
    inquiries = list(handler = get_study_estimands,
                     out_groups = "^known$|^frame$", 
                     hidden_var = "hidden"),
    estimators = 
      list(
        rds = 
          list(sspse = list(handler = get_study_est_sspse, 
                            label = "rds_sspse",
                            prior_mean = 450,
                            mcmc_params = list(interval = 5, burnin = 2000, samplesize = 500),
                            total = 5000,
                            prefix = "rds"),
               chords = list(handler = get_study_est_chords, 
                             type = "jeffreys",
                             label = "rds_chords",
                             seed_condition = "rds_from == -999",
                             n_boot = 100,
                             prefix = "rds"),
               multiplier = list(handler = get_study_est_multiplier, 
                                 service_var = "service_use",
                                 seed_condition = "rds_from == -999",
                                 n_boot = 100,
                                 prefix = "rds",
                                 label = "rds_multi")),
        tls =
          list(ht = list(handler = get_study_est_ht,
                         weight_var = "tls_weight",
                         survey_design = ~ tls_loc_sampled,
                         prefix = "tls",
                         label = "tls_ht"),
               nsum = list(handler = get_study_est_nsum,
                           known = c("known", "frame"),
                           hidden = "hidden_visible_out",
                           survey_design = ~ tls_loc_sampled,
                           n_boot = 100,
                           prefix = "tls",
                           label = "tls_nsum"),
               recap = list(handler = get_study_est_recapture,
                            capture_vars = paste0("loc_", 1:8),
                            sample_condition = "tls == 1",
                            model = "Mt",
                            hidden_variable = "hidden",
                            label = "tls_recap")),
        pps = 
          list(
            ht = list(handler = get_study_est_ht, 
                      label = "pps_ht"),
            nsum = list(handler = get_study_est_nsum,
                        known = c("frame", "known"),
                        hidden = "hidden_visible_out",
                        survey_design = ~ pps_cluster + strata(pps_strata),
                        n_boot = 100,
                        label = "pps_nsum")),
        all = 
          list(recap1 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds", "pps"),
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_pps_recap"),
               recap2 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds", paste0("loc_", 1:8)),
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_tls_recap")),
        link_trace =
          list(link_trace = list(handler = get_study_est_linktrace,
                                   total = 300,
                                   strata = "hidden",
                                   gibbs_params = list(n_samples = 5L, 
                                                       chain_samples = 250L, 
                                                       chain_burnin = 50L),
                                   priors = list(p_n = 0L, p_l = 0.1, p_b = 1L),
                                   prefix = "link_trace",
                                   label = "link_trace"))
      )
  )

study_3 <- 
  list(
    pop = 
      list(
        handler = get_study_population,
        
        # network structure setup
        network_handler = sim_block_network,
        network_handler_args = 
          list(N = 2000, K = 2, prev_K = c(known = .3, hidden = .1), rho_K = .3,
               p_edge_within = list(known = c(0.05, 0.1), hidden = c(0.05, 0.7)),
               p_edge_between = list(known = 0.05, hidden = 0.05),
               directed = FALSE),
        
        # groups
        group_names = c("known", "hidden"),
        
        # probability of visibility (show-up) for each group
        p_visible = list(known = 1, hidden = .7),
        
        # probability of service utilization in hidden population
        # for service multiplier
        add_groups = 
          list(service_use = "rbinom(n(), 1, 0.25 + hidden * 0.05)",
               "paste0('loc_', 1:8) := lapply(c(.2, .1, .3, .2, .4, .1, .3, .2), function(add) rbinom(.N, 1, 0.1 + hidden * add))",
               known_2 = 0.1, known_3 = 0.2)
      ),
    sample = 
      list(
        rds = list(handler = sample_rds,
                   # RDS parameters
                   sampling_variable = "rds",
                   hidden_var = "hidden", # default
                   n_seed = 20,
                   n_coupons = 3,
                   target_type = "sample",
                   target_n_rds = 100),
        tls = list(handler = sample_tls,
                   sampling_variable = "tls",
                   # TLS sampling parameters
                   target_n_clusters = 4,
                   target_cluster_type = "fixed",
                   target_per_cluster = 30,
                   clusters = paste0("loc_", 1:8)),
        pps = list(handler = sample_pps,
                   sampling_variable = "pps",
                   # prop sampling parameters
                   sampling_frame = NULL,
                   strata = NULL,
                   cluster = NULL,
                   target_n_pps = 400),
        link_trace = list(handler = sample_rds,
                            sampling_variable = "link_trace",
                            hidden_var = "hidden",
                            n_seed = 50,
                            n_coupons = 3,
                            add_seeds = 0,
                            target = "sample",
                            target_n_rds = 150,
                            linktrace = TRUE)
      ),
    inquiries = list(handler = get_study_estimands,
                     out_groups = "^known$", 
                     hidden_var = "hidden"),
    estimators = 
      list(
        rds = 
          list(sspse = list(handler = get_study_est_sspse,
                            prior_mean = 200,
                            mcmc_params = list(interval = 5, burnin = 2000, samplesize = 500),
                            total = 2000,
                            prefix = "rds", 
                            label = "rds_sspse"),
               chords = list(handler = get_study_est_chords, 
                             type = "jeffreys",
                             seed_condition = "rds_from == -999",
                             n_boot = 100,
                             prefix = "rds",
                             label = "rds_chords"),
               multiplier = list(handler = get_study_est_multiplier, 
                                 service_var = "service_use",
                                 seed_condition = "rds_from == -999",
                                 n_boot = 100,
                                 prefix = "rds",
                                 label = "rds_multi")),
        tls =
          list(ht = list(handler = get_study_est_ht,
                         weight_var = "tls_weight",
                         survey_design = ~ tls_loc_sampled,
                         prefix = "tls",
                         label = "tls_ht"),
               nsum = list(handler = get_study_est_nsum,
                           known = c("known", "known_2", "known_3"),
                           hidden = "hidden_visible_out",
                           survey_design = ~ tls_loc_sampled,
                           n_boot = 100,
                           prefix = "tls",
                           label = "tls_nsum"),
               recap = list(handler = get_study_est_recapture,
                            capture_vars = paste0("loc_", 1:6),
                            # sample_condition = "tls == 1",
                            model = "Mt",
                            hidden_variable = "hidden",
                            label = "tls_recap")),
        pps = 
          list(ht = list(handler = get_study_est_ht,
                         prefix = "pps",
                         label = "pps_ht"),
               nsum = list(handler = get_study_est_nsum,
                           known = c("known", "known_2", "known_3"),
                           hidden = "hidden_visible_out",
                           survey_design = ~ pps_cluster + strata(pps_strata),
                           n_boot = 100,
                           prefix = "pps",
                           label = "pps_nsum")),
        all = 
          list(recap1 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds", "pps"),
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_pps_recap"),
               recap2 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds", paste0("loc_", 1:4)),
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_tls_recap")),
        link_trace =
          list(link_trace = list(handler = get_study_est_linktrace,
                                   total = 300,
                                   strata = "hidden",
                                   gibbs_params = list(n_samples = 5L, 
                                                       chain_samples = 250L, 
                                                       chain_burnin = 50L),
                                   priors = list(p_n = 0L, p_l = 0.1, p_b = 1L),
                                   prefix = "link_trace",
                                   label = "link_trace"))
      )
  )

Declare multiple studies


multi_population <- 
  declare_population(handler = get_multi_populations, 
                     pops_args = list(study_1 = study_1$pop,
                                      # study_2 = study_2$pop,
                                      study_3 = study_3$pop))

multi_sampling <- 
  declare_sampling(handler = get_multi_samples, 
                   samples_args = list(study_1 = study_1$sample,
                                       # study_2 = study_2$sample,
                                       study_3 = study_3$sample)) 

multi_inquiry <- 
  declare_inquiry(handler = get_multi_estimands, 
                  inquiries_args = list(study_1 = study_1$inquiries,
                                        # study_2 = study_2$inquiries,
                                        study_3 = study_3$inquiries)) 

multi_estimators <-
  declare_estimator(handler = get_multi_estimates,
                    estimators_args = list(study_1 = study_1$estimators,
                                           # study_2 = study_2$estimators,
                                           study_3 = study_3$estimators))

multi_study <- multi_population + multi_sampling + multi_inquiry + multi_estimators

# set.seed(872312)
# draw_estimands(multi_study) %>%
#   kable()
# 
# draw_estimates(multi_study) %>%
#   kable()

Diagnose multiple studies


# multi_simulations <- simulate_design(multi_study, sims = 2)

requireNamespace(c("doParallel", "parallel"))
doParallel::registerDoParallel(cores = 40)

set.seed(872312)
multi_simulations <-
  plyr::llply(
    as.list(1:100),
    .fun = function(x) {
      base::suppressWarnings(
        DeclareDesign::simulate_design(multi_study, sims = 1) %>%
          dplyr::mutate(
            sim_ID = x
          )
      )
    },
    .parallel = TRUE
  ) %>%
  dplyr::bind_rows()

saveRDS(multi_simulations, file = here::here("inst/extdata/multi_sim.rds"))


multi_simulations <- 
  readRDS(system.file("extdata", "multi_sim.rds", package = "hiddenmeta"))

diagnose_design(simulations_df = multi_simulations, 
                diagnosands = study_diagnosands) %>% 
  reshape_diagnosis(digits = 2) %>%
  dplyr::select(-'Design') %>%
  dplyr::filter(`Mean Estimate` != "NA") %>% 
  DT::datatable(options = list(scrollX = TRUE, pageLength = 15))

Diagnosis of meta-study

Conduct multi-study design for as many sampling-estimator pairs in each study as possible, then diagnose the multi-study design. Calculate average (across simulations) estimand and bias of sampling-estimator for each of the studies and estimator sampling strategies. These will serve as population we will be drawing population-sampling-estimator triads
Estimands include:
- Average estimand by inquiry label (within study)
- Average bias of specific sampling-estimator pair (across studies) compared to truth
- Average relative bias of sampling-estimator pair (across studies) compared to “gold standard”
- Ratio of average bias to costs of sampling-estimator pair
As mentioned before sampling will consist of drawing population-sampling-estimator triads from the population presuming that each study uses at least two sampling strategies at a time
Once we draw sample we use Stan model to estimate study-specific estimands and sampling-estimator specific errors (biases)
We have all parts to conduct the full cycle of meta-analyses


meta_population <- 
  declare_population(multi_design = multi_study, n_sim = 3, parallel = FALSE, 
                     handler = get_meta_population)

meta_inquiry <- 
  declare_inquiry(study_estimand = "hidden_size",
                  samp_est_benchmark = "pps_ht", 
                  handler = get_meta_estimands)

meta_sample <- 
  declare_sampling(sampling_variable = "meta",
                   selection_variables = c("sample", "estimator"),
                   samples_per_study = 2, estimator_per_sample = 3, 
                   force = list(sample = "pps", estimator = "ht"),
                   handler = get_meta_sample)

meta_estimator <- 
  declare_estimator(sampling_variable = "meta",
                    which_estimand = "hidden_size",
                    benchmark = list(sample = "pps", estimator = "ht"),
                    control_params = list(
                      iter = 8000, chains = 8, thin = 10,
                      seed = 872312,
                      cores = 1),
                    stan_handler = get_meta_stan,
                    handler = get_meta_estimates)

meta_study <- meta_population + meta_inquiry + meta_sample + meta_estimator

# set.seed(872312)
# draw_estimands(meta_study) %>% 
#   kable()
# 
# draw_estimates(meta_study) %>% 
#   kable()


requireNamespace(c("doParallel", "parallel"))
doParallel::registerDoParallel(cores = 20)

set.seed(872312)
meta_simulations <-
  plyr::llply(
    as.list(1:100),
    .fun = function(x) {
      base::suppressWarnings(
        DeclareDesign::simulate_design(meta_study, sims = 1) %>%
          dplyr::mutate(
            sim_ID = x
          )
      )
    },
    .parallel = TRUE
  ) %>%
  dplyr::bind_rows()

saveRDS(meta_simulations, file = here::here("inst/extdata/meta_sim.rds"))


meta_simulations <- 
  readRDS(system.file("extdata", "meta_sim.rds", package = "hiddenmeta"))

diagnose_design(simulations_df = meta_simulations, 
                diagnosands = study_diagnosands) %>% 
  reshape_diagnosis(digits = 2) %>%
  dplyr::select(-'Design') %>%
  dplyr::filter(`Mean Estimate` != "NA") %>% 
  DT::datatable(options = list(scrollX = TRUE, pageLength = 15))