--- title: "simulations" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{simulations} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup, eval=FALSE} library(sensemakr) library(MendelianRandomization) library(MRPRESSO) library(paramtest) library(MRMix) library(genius) library(snow) # function to find the critical k search <- function(ks, model, treatment = "prs", benchmark_covariates = "x") { positive <- sign(coef(model)[treatment]) > 0 get.ci <- ifelse(positive,"adjusted_lower_CI", "adjusted_upper_CI") out <- suppressWarnings(tryCatch({ out <- ovb_bounds(model = model, treatment = treatment, benchmark_covariates = benchmark_covariates, kd = ks) value <- out[[get.ci]]^2 value }, error = function(e) Inf)) return(out) } # function to create the data create_data <- function(n = 1e5, m = 90, phi, beta, delta, tau = 0, gamma = .1, psi =1, eta = .1, scale = TRUE){ G <- rbinom(n*m, 2,prob = 1/3) dim(G) <- c(n,m) colnames(G) <- paste0("g",1:m) w <- c(G %*% phi + rnorm(n)) x <- c(G %*% delta + rnorm(n)) u <- rnorm(n) d <- c(G %*% beta + psi*w + psi*x + u + rnorm(n)) y <- c(tau*d + gamma*w + eta*x + u + rnorm(n)) # scale data if(scale){ w <- as.numeric(scale(w)) x <- as.numeric(scale(x)) d <- as.numeric(scale(d)) y <- as.numeric(scale(y)) G <- apply(G, 2, function(x)as.numeric(scale(x))) } data <- list(y = y,d = d,x = x,w = w, G= G, u = u) return(data) } # function to get the betas get_betas <- function(y, d, G){ md <- lm(d ~ G) my <- lm(y ~ G) coef.md <- coef(summary(md)) coef.my <- coef(summary(my)) bd <- coef.md[-1,1] bdse <- coef.md[-1,2] by <- coef.my[-1,1] byse <- coef.my[-1,2] data <- data.frame(bd, by, bdse, byse) return(data) } # Argument R1.a sets j variants to be valid # Argument R1.b makes W "k times" stronger than X sim <- function(nsim, n = 100, m= 90, gamma = 0.1, eta = gamma, tau = 0, presso = FALSE, R1.a = F, j = 1, R1.b = F, k = 1, scale = TRUE){ eta <- eta # initialize output out <- data.frame(n = n) out$tau <- tau out$m <- m out$R1.a <- R1.a out$j <- j out$R1.b <- R1.b out$k <- k phi <- runif(m, .01, .05) beta <- runif(m, .01, .05) delta <- runif(m, .01, .05) if(R1.a){ delta[1:j] <- 0 phi[1:j] <- 0 } if(R1.b){ phi <- sqrt(k)*phi gamma <- sqrt(k)*gamma } # two samples of size n data1 <- create_data(n = n, phi = phi, beta = beta, delta = delta, tau = tau, gamma = gamma, eta = eta, scale = scale) b1 <- with(data1, get_betas(y, d, G)) # memory rm(data1) gc() data2 <- create_data(n = n, phi = phi, beta = beta, delta = delta, tau = tau, gamma = gamma, eta = eta, scale = scale) b2 <- with(data2, get_betas(y, d, G)) # compute everything that needs data2 than removes it # PRS data2$prs <- data2$G %*% b1$bd # MR Genius form <- paste0("A~", paste0(colnames(data2$G), collapse = "+")) form <- as.formula(form) output.genius <- genius_addY(Y = data2$y, A = data2$d, G = data2$G, formula = form) out$p.genius <- output.genius$pval # RF sensitivity rf <- with(data2, lm(y ~ prs + x)) out$r2yx.z <- partial_r2(rf)["x"] model.yw <- with(data2, lm(y ~ prs + w)) out$r2yw.z <- partial_r2(model.yw)["w"] model.zx <- with(data2, lm(prs ~ x)) out$r2zx <- partial_r2(model.zx)["x"] model.zw <- with(data2, lm(prs ~ w)) out$r2zw <- partial_r2(model.zw)["w"] out$gamma <- gamma out$eta <- eta # sanity checks true.model <- lm(y~d + w + x + u, data = data2) out$true.tau <- coef(true.model)["d"] rf.full <- lm(y ~ prs + x + w, data = data2) fs.full <- lm(d ~ prs + x + w, data = data2) out$iv.tau <- coef(rf.full)["prs"]/coef(fs.full)["prs"] # memory rm(data2) gc() # MR input input.mr <- mr_input(by = b1$by, byse = b1$byse, bx = b2$bd, bxse = b2$bdse) # MR IVW output.ivw <- mr_ivw(input.mr) out$p.ivw <- output.ivw@Pvalue # MR egger output.egger <- mr_egger(input.mr) out$p.egger <- output.egger@Causal.pval # Other MR output.conmix <- mr_conmix(input.mr) out$p.conmix <- output.conmix@Pvalue output.mbe <- mr_mbe(input.mr) out$p.mbe <- output.mbe@Pvalue output.median <- mr_median(input.mr) out$p.median <- output.median@Pvalue # output.lasso <- mr_lasso(input.mr) # output.lasso@Pvalue if(presso){ # MR Presso res <- data.frame(by = b1$by, byse = b1$byse, bx = b2$bd, bxse = b2$bdse) output.presso <- mr_presso(BetaOutcome = "by", BetaExposure = "bx", SdOutcome = "byse", SdExposure = "bxse", OUTLIERtest = T, DISTORTIONtest = F, data = res, NbDistribution = 1000, SignifThreshold = 0.05) presso.p.values <- output.presso$`Main MR results`$`P-value` p.presso <- presso.p.values[2] if(is.na(p.presso)){ p.presso <- presso.p.values[1] } out$p.presso <- p.presso } #MR-Mix output.mix <- MRMix(betahat_x = b1$bd, betahat_y = b2$by, sx = b1$bdse, sy =b2$byse,profile = F) out$p.mix <- output.mix$pvalue_theta ## sensemakr sense.out <- sensemakr(rf, treatment = "prs", benchmark_covariates = "x", kd = 1) rv <- sense.out$sensitivity_stats[,"rv_qa"] r2yz <- sense.out$sensitivity_stats[,"r2yd.x"] or_t <- sense.out$sensitivity_stats$t_statistic adj_t <- sense.out$bounds$adjusted_t s.t <- sign(or_t) s.adj_t <- sign(adj_t) # search k if(rv > 0){ ks <- optimise(f = search, interval = c(0, 10), model = rf, treatment = "prs", benchmark_covariates = "x")$minimum } else { ks <- 0 } if(s.t != s.adj_t){ p.bench <- 1 } else { p.bench <- pt(abs(adj_t), df = n, lower.tail = F)*2 } out$p.bench <- p.bench out$rv <- rv out$r2yz <- r2yz out$ks <- ks out$or_t <- or_t out$adj_t <- adj_t return(out) } #### Use the code below to run the simulation #### Warning: it takes a long time (several days on a regular computer) #### so simulate once to see how long it takes, and plan accordingly. ## test run # system.time(test <- sim(1, n = 1e5, presso = T, gamma = 0.05, tau = 0)) ## now simulate as many times as you want, with different parameter values ## system.time({ # sim.out <- grid_search(sim, # n.iter = 1000, # params = list(n = c(150000, 300000, 450000), gamma = 0.05, tau = c(0, 0.1), presso = T), # parallel = "multicore", # ncpus = 32 # ) # }) ```