Install and load packages


install.packages("DeclareDesign")

devtools::install_github("gerasy1987/hiddenmeta", build_vignettes = TRUE)

Step 1. Provide study design features


## STUDY 1
study_1 <- 
  list(
    pop = 
      list(
        handler = get_study_population,
        
        # network structure setup
        network_handler = sim_block_network,
        network_handler_args = 
          list(N = 1000, K = 2, prev_K = c(known = .1, hidden = .2), rho_K = 0,
               p_edge_within = list(known = c(0.1, 0.1), hidden = c(0.1, 0.3)),
               p_edge_between = list(known = 0.02, hidden = 0.02),
               directed = FALSE),
        
        # groups
        group_names = c("known", "hidden"),
        
        # probability of visibility (show-up) for each group
        p_visible = list(known = 1, hidden = .8),
        
        # probability of service utilization in hidden population
        # for service multiplier
        add_groups = 
          list(
            service_use = "rbinom(.N, 1, 0.25)",
            "paste0('loc_', 1:10) := lapply(rep(.2, times = 10), function(add) rbinom(.N, 1, 0.05 + hidden * add))",
            target = 0.3,
            "paste0('known_', 2:10) := lapply(2:10, function(x) rbinom(.N, 1, 0.3))")
      ),
    sample = 
      list(
        rds = list(handler = sample_rds,
                   # RDS parameters
                   sampling_variable = "rds",
                   hidden_var = "hidden", # default
                   n_seed = 20,
                   n_coupons = 3,
                   add_seeds = 3,
                   target_type = "sample",
                   target_n_rds = 60),
        tls = list(handler = sample_tls,
                   sampling_variable = "tls",
                   hidden_var = NULL,
                   # TLS sampling parameters
                   target_n_clusters = 5,
                   target_cluster_type = "fixed",
                   target_per_cluster = 15,
                   clusters = paste0("loc_", 1:10)),
        pps = list(handler = sample_pps,
                   sampling_variable = "pps",
                   # prop sampling parameters
                   sampling_frame = NULL,
                   strata = NULL,
                   cluster = NULL,
                   target_n_pps = 150),
        lts = list(handler = sample_rds,
                   sampling_variable = "lts",
                   hidden_var = "hidden",
                   n_seed = 50,
                   n_coupons = 3,
                   n_waves = 3,
                   target = "waves",
                   target_n_rds = 200,
                   linktrace = "all")
      ),
    inquiries = list(handler = get_study_estimands,
                     in_groups = "^target$",
                     out_groups = "^known$", 
                     hidden_var = "hidden"),
    estimators = 
      list(
        rds = 
          list(ss = list(handler = get_study_est_ss,
                         sampling_var = "hidden",
                         hidden_var = "target", # estimates prevalence withing RDS
                         n_coupons = 3,
                         total = 1000,
                         prefix = "rds",
                         label = "ss"),
               sspse = list(handler = get_study_est_sspse,
                            prior_mean = 200,
                            mcmc_params = list(interval = 5, burnin = 2000, samplesize = 500),
                            total = 1000,
                            prefix = "rds",
                            label = "rds_sspse"),
               chords = list(handler = get_study_est_chords, 
                             type = "mle",
                             seed_condition = "rds_from == -999",
                             n_boot = 100,
                             prefix = "rds",
                             label = "rds_chords"),
               multiplier = list(handler = get_study_est_multiplier, 
                                 service_var = "service_use",
                                 seed_condition = "rds_from == -999",
                                 n_boot = 100,
                                 prefix = "rds",
                                 label = "rds_multi")),
        tls =
          list(ht = list(handler = get_study_est_ht,
                         hidden_var = "hidden",
                         weight_var = "tls_weight",
                         survey_design = ~ tls_loc_sampled,
                         prefix = "tls",
                         label = "tls_ht"),
               nsum = list(handler = get_study_est_nsum,
                           known = c("known", paste0("known_", 2:10)),
                           hidden = "hidden_visible_out",
                           survey_design = ~ tls_loc_sampled,
                           n_boot = 100,
                           prefix = "tls",
                           label = "tls_nsum"),
               recap = list(handler = get_study_est_recapture,
                            capture_parse = 
                              "strsplit(x = unique(na.omit(tls_locs_sampled)), split = ';')[[1]]",
                            # sample_condition = "tls == 1",
                            model = "Mt",
                            hidden_variable = "hidden",
                            label = "tls_recap"),
               mse = list(handler = get_study_est_mse,
                          capture_parse = 
                            "strsplit(x = unique(na.omit(tls_locs_sampled)), split = ';')[[1]]",
                          # sample_condition = "tls == 1",
                          method = "stepwise",
                          hidden_variable = "hidden",
                          label = "tls_mse")),
        pps = 
          list(ht = list(handler = get_study_est_ht,
                         hidden_var = "hidden",
                         prefix = "pps",
                         label = "pps_ht"),
               nsum = list(handler = get_study_est_nsum,
                           known = c("known", paste0("known_", 2:10)),
                           hidden = "hidden_visible_out",
                           survey_design = ~ pps_cluster + strata(pps_strata),
                           n_boot = 100,
                           prefix = "pps",
                           label = "pps_nsum")),
        all = 
          list(recap1 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds", "pps"),
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_pps_recap"),
               recap2 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds"),
                             capture_parse = 
                               "strsplit(x = unique(na.omit(tls_locs_sampled)), split = ';')[[1]]",
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_tls_recap"),
               mse1 = list(handler = get_study_est_mse,
                             capture_vars = c("rds", "pps"),
                             method = "stepwise",
                             hidden_variable = "hidden",
                             label = "rds_pps_mse"),
               mse2 = list(handler = get_study_est_mse,
                             capture_vars = c("rds"),
                             capture_parse = 
                               "strsplit(x = unique(na.omit(tls_locs_sampled)), split = ';')[[1]]",
                             method = "stepwise",
                             hidden_variable = "hidden",
                             label = "rds_tls_mse")),
        lts =
          list(link = list(handler = get_study_est_linktrace,
                           total = 200,
                           strata = "hidden",
                           gibbs_params = list(n_samples = 5L, 
                                               chain_samples = 250L, 
                                               chain_burnin = 50L),
                           priors = list(p_n = 0L, p_l = 0.1, p_b = 1L),
                           prefix = "lts",
                           label = "link"))
      )
  )

Step 2. Declare study population (M)odel


study_population <-
  eval(as.call(c(list(declare_population), study_1$pop)))

set.seed(872312)
example_pop <- study_population()

example_pop[sample(1:min(.N, 1000))] %>%  
  DT::datatable(options = list(scrollX = TRUE, pageLength = 15))

Simulated population network


g <-
  example_pop %$% {
    hiddenmeta:::retrieve_graph(links) %>%
      igraph::set_vertex_attr("name", value = name) %>%
      igraph::set_vertex_attr("type", value = type)
  }

pal <- 
  grDevices::palette.colors(n = length(unique(igraph::V(g)$type)), 
                            palette = "Dark2",
                            recycle = TRUE) %>% 
  `names<-`(unique(igraph::V(g)$type))

GGally::ggnet2(g, mode = "fruchtermanreingold",
               edge.alpha = .5, edge.size = .2, 
               node.alpha = .9, node.size = 2.5, node.color = "type",
               palette = pal,
               color.legend = "type",
               max_size = 4, shape = 16,
               legend.size = 12, legend.position = "bottom") +
  ggplot2::coord_equal()

Step 3. Declare study (I)nquiries or Estimands


study_estimands <- 
  eval(as.call(c(list(declare_inquiry), study_1$inquiries)))

Step 4. Declare study (D)ata strategy or Sampling

The sampling procedures are additive in a sense that each procedure appends several columns relevant to the sampling procedure and particular draw based on population simulation, but does not change the study population data frame (unless you specify drop_nonsampled = TRUE).


study_sample_rds <- 
  eval(as.call(c(list(declare_sampling), study_1$sample$rds)))

study_sample_pps <- 
  eval(as.call(c(list(declare_sampling), study_1$sample$pps)))

study_sample_tls <- 
  eval(as.call(c(list(declare_sampling), study_1$sample$tls)))

study_sample_lts <- 
  eval(as.call(c(list(declare_sampling), study_1$sample$lts)))

Step 5. Declare study (A)nswer strategy or estimators


est_ss <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$rds$ss)))
est_sspse <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$rds$sspse)))
est_chords <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$rds$chords)))
est_multi <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$rds$multiplier)))

est_ht_pps <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$pps$ht)))
est_nsum_pps <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$pps$nsum)))

est_ht_tls <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$tls$ht)))
est_nsum_tls <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$tls$nsum)))
est_recap_tls <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$tls$recap)))
est_mse_tls <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$tls$mse)))

est_recap_rds_pps <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$all$recap1)))
est_recap_rds_tls <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$all$recap2)))
est_mse_rds_pps <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$all$mse1)))
est_mse_rds_tls <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$all$mse2)))

est_link <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$lts$link)))

Step 6. Declare full study


study1 <- 
  # model (population)
  study_population + 
  # inquiries
  study_estimands + 
  # data strategy (samples)
  study_sample_rds + study_sample_pps + study_sample_tls + study_sample_lts + 
  # answer strategy (estimators)
  est_ss + est_sspse + est_chords +
  est_multi +
  est_nsum_pps + est_ht_pps +
  est_nsum_tls + est_ht_tls + est_recap_tls + est_mse_tls +
  est_recap_rds_pps + est_recap_rds_tls + est_mse_rds_pps + est_mse_rds_tls + 
  est_link

Draw population and relevant samples


# can draw full data
study1_data <- draw_data(study1)

study1_data[sample(1:.N)] %>% 
  DT::datatable(options = list(scrollX = TRUE, pageLength = 15))

Draw inquiries


# can draw estimands of interest
draw_estimands(study1) %>% 
  kable(digits = 2)
inquiry estimand
known_size 89.00
known_prev 0.09
hidden_size 183.00
hidden_prev 0.18
target_size 304.00
target_prev 0.30
target_size_in_hidden 51.00
target_prev_in_hidden 0.28
hidden_size_in_known 18.00
hidden_prev_in_known 0.20

Draw estimates


# can draw estimates
draw_estimates(study1) %>% 
  kable(digits = 2)
estimator estimate se inquiry
target_prev_in_hidden_ss 0.44 0.10 target_prev_in_hidden
hidden_size_rds_sspse 160.50 88.73 hidden_size
hidden_size_rds_chords 42.00 51.16 hidden_size
hidden_size_rds_multi 176.67 32.49 hidden_size
hidden_size_pps_nsum 170.34 25.80 hidden_size
hidden_size_pps_ht 246.67 39.44 hidden_size
hidden_prev_pps_ht 0.25 0.04 hidden_prev
hidden_size_tls_nsum 252.44 40.23 hidden_size
hidden_size_tls_ht 131.41 41.37 hidden_size
hidden_prev_tls_ht 0.13 0.04 hidden_prev
hidden_size_tls_recap 190.95 11.69 hidden_size
hidden_size_tls_mse 182.52 8.49 hidden_size
hidden_size_rds_pps_recap 164.57 29.82 hidden_size
hidden_size_rds_tls_recap 190.95 11.69 hidden_size
hidden_size_rds_pps_mse 170.77 36.05 hidden_size
hidden_size_rds_tls_mse 182.52 8.49 hidden_size
hidden_size_link 148.60 7.31 hidden_size

Step 7. Diagnose study design


requireNamespace(c("doParallel", "parallel"))
doParallel::registerDoParallel(cores = 10)

set.seed(872312)
study1_simulations <-
  plyr::llply(
    as.list(1:100),
    .fun = function(x) {
      base::suppressWarnings(
        DeclareDesign::simulate_design(study1, sims = 1) %>%
          dplyr::mutate(
            sim_ID = x
          )
      )
    },
    .parallel = TRUE
  ) %>%
  dplyr::bind_rows()

saveRDS(study1_simulations, here::here("inst/extdata/study1_sim.rds"))

study_diagnosands <-
 declare_diagnosands(
    mean_estimand = mean(estimand),
    mean_estimate = mean(estimate),
    sd_estimate = sd(estimate),
    mean_se = mean(se),
    bias = mean(estimate - estimand),
    rmse = sqrt(mean((estimate - estimand) ^ 2))
  )

study1_simulations <- 
  readRDS(system.file("extdata", "study1_sim.rds", package = "hiddenmeta"))

diagnose_design(simulations_df = study1_simulations, 
                diagnosands = study_diagnosands) %>% 
  reshape_diagnosis() %>% 
  dplyr::select(-'Design') %>%
  DT::datatable(options = list(scrollX = TRUE, pageLength = 15))

Diagnosis of multiple studies

Additional study designs


study_2 <- 
  list(
    pop = 
      list(
        handler = get_study_population,
        network_handler = sim_block_network,
        network_handler_args = 
          list(N = 5000, K = 3, 
               prev_K = c(frame = .5, known = .2, hidden = .1), 
               rho_K = c(.05, .05, .05),
               p_edge_within = list(frame = c(0.05, 0.05), 
                                    known = c(0.1, 0.05), 
                                    hidden = c(0.2, 0.9)),
               p_edge_between = list(frame = 0.05, 
                                     known = 0.1, 
                                     hidden = 0.01),
               directed = FALSE),
        
        group_names = c("frame", "known", "hidden"),
        
        # probability of visibility (show-up) for each group
        p_visible = list(frame = 1, known = 1, hidden = .6),
        
        # probability of service utilization in hidden population
        # for service multiplier
        
        add_groups = 
          list(service_use = "rbinom(.N, 1, 0.25 + hidden * 0.3)",
               "paste0('loc_', 1:8) := lapply(runif(8, 0.05,.3), function(add) rbinom(.N, 1, 0.1 + hidden * add))",
               known_2 = 0.1, known_3 = 0.2)
      ),
    sample = 
      list(
        rds = list(handler = sample_rds,
                   # RDS parameters
                   sampling_variable = "rds",
                   hidden_var = "hidden",
                   n_seed = 100,
                   n_coupons = 3,
                   n_waves = 2,
                   target_type = "waves",
                   target_n_rds = 2),
        tls = list(handler = sample_tls,
                   sampling_variable = "tls",
                   # TLS sampling parameters
                   target_n_clusters = 8,
                   target_cluster_type = "fixed",
                   target_per_cluster = 40,
                   clusters = paste0("loc_", 1:10)),
        pps = list(handler = sample_pps,
                   sampling_variable = "pps",
                   # prop sampling parameters
                   sampling_frame = "frame",
                   strata = NULL,
                   cluster = NULL,
                   target_n_pps = 800),
        link_trace = list(handler = sample_rds,
                            sampling_variable = "link_trace",
                            hidden_var = "hidden",
                            n_seed = 50,
                            n_coupons = 3,
                            add_seeds = 0,
                            target = "sample",
                            target_n_rds = 150,
                            linktrace = TRUE)
      ),
    inquiries = list(handler = get_study_estimands,
                     out_groups = "^known$|^frame$", 
                     hidden_var = "hidden"),
    estimators = 
      list(
        rds = 
          list(sspse = list(handler = get_study_est_sspse, 
                            label = "rds_sspse",
                            prior_mean = 450,
                            mcmc_params = list(interval = 5, burnin = 2000, samplesize = 500),
                            total = 5000,
                            prefix = "rds"),
               chords = list(handler = get_study_est_chords, 
                             type = "jeffreys",
                             label = "rds_chords",
                             seed_condition = "rds_from == -999",
                             n_boot = 100,
                             prefix = "rds"),
               multiplier = list(handler = get_study_est_multiplier, 
                                 service_var = "service_use",
                                 seed_condition = "rds_from == -999",
                                 n_boot = 100,
                                 prefix = "rds",
                                 label = "rds_multi")),
        tls =
          list(ht = list(handler = get_study_est_ht,
                         weight_var = "tls_weight",
                         survey_design = ~ tls_loc_sampled,
                         prefix = "tls",
                         label = "tls_ht"),
               nsum = list(handler = get_study_est_nsum,
                           known = c("known", "frame"),
                           hidden = "hidden_visible_out",
                           survey_design = ~ tls_loc_sampled,
                           n_boot = 100,
                           prefix = "tls",
                           label = "tls_nsum"),
               recap = list(handler = get_study_est_recapture,
                            capture_vars = paste0("loc_", 1:8),
                            sample_condition = "tls == 1",
                            model = "Mt",
                            hidden_variable = "hidden",
                            label = "tls_recap")),
        pps = 
          list(
            ht = list(handler = get_study_est_ht, 
                      label = "pps_ht"),
            nsum = list(handler = get_study_est_nsum,
                        known = c("frame", "known"),
                        hidden = "hidden_visible_out",
                        survey_design = ~ pps_cluster + strata(pps_strata),
                        n_boot = 100,
                        label = "pps_nsum")),
        all = 
          list(recap1 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds", "pps"),
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_pps_recap"),
               recap2 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds", paste0("loc_", 1:8)),
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_tls_recap")),
        link_trace =
          list(link_trace = list(handler = get_study_est_linktrace,
                                   total = 300,
                                   strata = "hidden",
                                   gibbs_params = list(n_samples = 5L, 
                                                       chain_samples = 250L, 
                                                       chain_burnin = 50L),
                                   priors = list(p_n = 0L, p_l = 0.1, p_b = 1L),
                                   prefix = "link_trace",
                                   label = "link_trace"))
      )
  )

study_3 <- 
  list(
    pop = 
      list(
        handler = get_study_population,
        
        # network structure setup
        network_handler = sim_block_network,
        network_handler_args = 
          list(N = 2000, K = 2, prev_K = c(known = .3, hidden = .1), rho_K = .3,
               p_edge_within = list(known = c(0.05, 0.1), hidden = c(0.05, 0.7)),
               p_edge_between = list(known = 0.05, hidden = 0.05),
               directed = FALSE),
        
        # groups
        group_names = c("known", "hidden"),
        
        # probability of visibility (show-up) for each group
        p_visible = list(known = 1, hidden = .7),
        
        # probability of service utilization in hidden population
        # for service multiplier
        add_groups = 
          list(service_use = "rbinom(n(), 1, 0.25 + hidden * 0.05)",
               "paste0('loc_', 1:8) := lapply(c(.2, .1, .3, .2, .4, .1, .3, .2), function(add) rbinom(.N, 1, 0.1 + hidden * add))",
               known_2 = 0.1, known_3 = 0.2)
      ),
    sample = 
      list(
        rds = list(handler = sample_rds,
                   # RDS parameters
                   sampling_variable = "rds",
                   hidden_var = "hidden", # default
                   n_seed = 20,
                   n_coupons = 3,
                   target_type = "sample",
                   target_n_rds = 100),
        tls = list(handler = sample_tls,
                   sampling_variable = "tls",
                   # TLS sampling parameters
                   target_n_clusters = 4,
                   target_cluster_type = "fixed",
                   target_per_cluster = 30,
                   clusters = paste0("loc_", 1:8)),
        pps = list(handler = sample_pps,
                   sampling_variable = "pps",
                   # prop sampling parameters
                   sampling_frame = NULL,
                   strata = NULL,
                   cluster = NULL,
                   target_n_pps = 400),
        link_trace = list(handler = sample_rds,
                            sampling_variable = "link_trace",
                            hidden_var = "hidden",
                            n_seed = 50,
                            n_coupons = 3,
                            add_seeds = 0,
                            target = "sample",
                            target_n_rds = 150,
                            linktrace = TRUE)
      ),
    inquiries = list(handler = get_study_estimands,
                     out_groups = "^known$", 
                     hidden_var = "hidden"),
    estimators = 
      list(
        rds = 
          list(sspse = list(handler = get_study_est_sspse,
                            prior_mean = 200,
                            mcmc_params = list(interval = 5, burnin = 2000, samplesize = 500),
                            total = 2000,
                            prefix = "rds", 
                            label = "rds_sspse"),
               chords = list(handler = get_study_est_chords, 
                             type = "jeffreys",
                             seed_condition = "rds_from == -999",
                             n_boot = 100,
                             prefix = "rds",
                             label = "rds_chords"),
               multiplier = list(handler = get_study_est_multiplier, 
                                 service_var = "service_use",
                                 seed_condition = "rds_from == -999",
                                 n_boot = 100,
                                 prefix = "rds",
                                 label = "rds_multi")),
        tls =
          list(ht = list(handler = get_study_est_ht,
                         weight_var = "tls_weight",
                         survey_design = ~ tls_loc_sampled,
                         prefix = "tls",
                         label = "tls_ht"),
               nsum = list(handler = get_study_est_nsum,
                           known = c("known", "known_2", "known_3"),
                           hidden = "hidden_visible_out",
                           survey_design = ~ tls_loc_sampled,
                           n_boot = 100,
                           prefix = "tls",
                           label = "tls_nsum"),
               recap = list(handler = get_study_est_recapture,
                            capture_vars = paste0("loc_", 1:6),
                            # sample_condition = "tls == 1",
                            model = "Mt",
                            hidden_variable = "hidden",
                            label = "tls_recap")),
        pps = 
          list(ht = list(handler = get_study_est_ht,
                         prefix = "pps",
                         label = "pps_ht"),
               nsum = list(handler = get_study_est_nsum,
                           known = c("known", "known_2", "known_3"),
                           hidden = "hidden_visible_out",
                           survey_design = ~ pps_cluster + strata(pps_strata),
                           n_boot = 100,
                           prefix = "pps",
                           label = "pps_nsum")),
        all = 
          list(recap1 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds", "pps"),
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_pps_recap"),
               recap2 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds", paste0("loc_", 1:4)),
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_tls_recap")),
        link_trace =
          list(link_trace = list(handler = get_study_est_linktrace,
                                   total = 300,
                                   strata = "hidden",
                                   gibbs_params = list(n_samples = 5L, 
                                                       chain_samples = 250L, 
                                                       chain_burnin = 50L),
                                   priors = list(p_n = 0L, p_l = 0.1, p_b = 1L),
                                   prefix = "link_trace",
                                   label = "link_trace"))
      )
  )

Declare multiple studies


multi_population <- 
  declare_population(handler = get_multi_populations, 
                     pops_args = list(study_1 = study_1$pop,
                                      # study_2 = study_2$pop,
                                      study_3 = study_3$pop))

multi_sampling <- 
  declare_sampling(handler = get_multi_samples, 
                   samples_args = list(study_1 = study_1$sample,
                                       # study_2 = study_2$sample,
                                       study_3 = study_3$sample)) 

multi_inquiry <- 
  declare_inquiry(handler = get_multi_estimands, 
                  inquiries_args = list(study_1 = study_1$inquiries,
                                        # study_2 = study_2$inquiries,
                                        study_3 = study_3$inquiries)) 

multi_estimators <-
  declare_estimator(handler = get_multi_estimates,
                    estimators_args = list(study_1 = study_1$estimators,
                                           # study_2 = study_2$estimators,
                                           study_3 = study_3$estimators))

multi_study <- multi_population + multi_sampling + multi_inquiry + multi_estimators

# set.seed(872312)
# draw_estimands(multi_study) %>%
#   kable()
# 
# draw_estimates(multi_study) %>%
#   kable()

Diagnose multiple studies


# multi_simulations <- simulate_design(multi_study, sims = 2)

requireNamespace(c("doParallel", "parallel"))
doParallel::registerDoParallel(cores = 40)

set.seed(872312)
multi_simulations <-
  plyr::llply(
    as.list(1:100),
    .fun = function(x) {
      base::suppressWarnings(
        DeclareDesign::simulate_design(multi_study, sims = 1) %>%
          dplyr::mutate(
            sim_ID = x
          )
      )
    },
    .parallel = TRUE
  ) %>%
  dplyr::bind_rows()

saveRDS(multi_simulations, file = here::here("inst/extdata/multi_sim.rds"))

multi_simulations <- 
  readRDS(system.file("extdata", "multi_sim.rds", package = "hiddenmeta"))

diagnose_design(simulations_df = multi_simulations, 
                diagnosands = study_diagnosands) %>% 
  reshape_diagnosis(digits = 2) %>%
  dplyr::select(-'Design') %>%
  dplyr::filter(`Mean Estimate` != "NA") %>% 
  DT::datatable(options = list(scrollX = TRUE, pageLength = 15))

Diagnosis of meta-study

  1. Conduct multi-study design for as many sampling-estimator pairs in each study as possible, then diagnose the multi-study design. Calculate average (across simulations) estimand and bias of sampling-estimator for each of the studies and estimator sampling strategies. These will serve as population we will be drawing population-sampling-estimator triads

  2. Estimands include:

    • Average estimand by inquiry label (within study)
    • Average bias of specific sampling-estimator pair (across studies) compared to truth
    • Average relative bias of sampling-estimator pair (across studies) compared to “gold standard”
    • Ratio of average bias to costs of sampling-estimator pair
  3. As mentioned before sampling will consist of drawing population-sampling-estimator triads from the population presuming that each study uses at least two sampling strategies at a time

  4. Once we draw sample we use Stan model to estimate study-specific estimands and sampling-estimator specific errors (biases)

  5. We have all parts to conduct the full cycle of meta-analyses


meta_population <- 
  declare_population(multi_design = multi_study, n_sim = 3, parallel = FALSE, 
                     handler = get_meta_population)

meta_inquiry <- 
  declare_inquiry(study_estimand = "hidden_size",
                  samp_est_benchmark = "pps_ht", 
                  handler = get_meta_estimands)

meta_sample <- 
  declare_sampling(sampling_variable = "meta",
                   selection_variables = c("sample", "estimator"),
                   samples_per_study = 2, estimator_per_sample = 3, 
                   force = list(sample = "pps", estimator = "ht"),
                   handler = get_meta_sample)

meta_estimator <- 
  declare_estimator(sampling_variable = "meta",
                    which_estimand = "hidden_size",
                    benchmark = list(sample = "pps", estimator = "ht"),
                    control_params = list(
                      iter = 8000, chains = 8, thin = 10,
                      seed = 872312,
                      cores = 1),
                    stan_handler = get_meta_stan,
                    handler = get_meta_estimates)

meta_study <- meta_population + meta_inquiry + meta_sample + meta_estimator

# set.seed(872312)
# draw_estimands(meta_study) %>% 
#   kable()
# 
# draw_estimates(meta_study) %>% 
#   kable()

requireNamespace(c("doParallel", "parallel"))
doParallel::registerDoParallel(cores = 20)

set.seed(872312)
meta_simulations <-
  plyr::llply(
    as.list(1:100),
    .fun = function(x) {
      base::suppressWarnings(
        DeclareDesign::simulate_design(meta_study, sims = 1) %>%
          dplyr::mutate(
            sim_ID = x
          )
      )
    },
    .parallel = TRUE
  ) %>%
  dplyr::bind_rows()

saveRDS(meta_simulations, file = here::here("inst/extdata/meta_sim.rds"))

meta_simulations <- 
  readRDS(system.file("extdata", "meta_sim.rds", package = "hiddenmeta"))

diagnose_design(simulations_df = meta_simulations, 
                diagnosands = study_diagnosands) %>% 
  reshape_diagnosis(digits = 2) %>%
  dplyr::select(-'Design') %>%
  dplyr::filter(`Mean Estimate` != "NA") %>% 
  DT::datatable(options = list(scrollX = TRUE, pageLength = 15))