---
title: "Simulation"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Simulation}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
```
Simulation study of a gene-environment interaction effect. The main effect ```delta``` is set to have 95% power. The interaction effect ```theta``` is set using ```phi``` which is relative to the main effect ranging from ```{0..4}x``` as described in Brookes et al (2004).
```{r}
library("varGWASR")
library("broom")
library("dplyr")
library("ggplot2")
set.seed(1234)
n_obs <- 1000
n_sim <- 1000
maf <- 0.4
# main effect size of x on y detectable ~ 95% power
delta <- 0.17
# simulation
results <- data.frame()
for (phi in seq(0, 4, .5)){
# size of interaction relative to main effect (Brookes S, et al 2004)
theta <- delta * phi
for (i in 1:n_sim) {
# simulate GxE interaction effect
x <- rbinom(n_obs, 2, maf)
u <- rnorm(n_obs)
y <- x * delta + x*u * theta + rnorm(n_obs)
# test/estimate variance effect
fit <- varGWASR::model(data.frame(x, y), "x", "y")
# add true variance difference
fit$v1 <- var(y[x==1]) - var(y[x==0])
fit$v2 <- var(y[x==2]) - var(y[x==0])
# store result
fit$phi <- phi
results <- rbind(results, fit)
}
}
```
Power/T1E of the Brown-Forsythe test using LAD regression
```{r}
tbl <- results %>%
dplyr::group_by(phi) %>%
dplyr::summarize(tidy(binom.test(sum(phi_p < 0.05), n_sim))) %>%
dplyr::select(phi, estimate, conf.low, conf.high) %>%
dplyr::rename(power="estimate", ci_95l="conf.low", ci_95h="conf.high")
knitr::kable(tbl, digits=3)
```
Effect size of the Brown-Forsythe test using LAD regression
```{r}
# estimate mean and 95% CI
v1 <- results %>%
dplyr::group_by(phi) %>%
dplyr::summarize(t.test(phi_x1) %>% tidy) %>%
dplyr::select(phi, estimate, conf.low, conf.high) %>%
dplyr::mutate(genotype="SNP=1", method="LAD-BF")
v2 <- results %>%
dplyr::group_by(phi) %>%
dplyr::summarize(t.test(phi_x2) %>% tidy) %>%
dplyr::select(phi, estimate, conf.low, conf.high) %>%
dplyr::mutate(genotype="SNP=2", method="LAD-BF")
v1_mean <- results %>%
dplyr::group_by(phi) %>%
dplyr::summarize(x=mean(v1))
v2_mean <- results %>%
dplyr::group_by(phi) %>%
dplyr::summarize(x=mean(v2))
v1 <- merge(v1, v1_mean, "phi")
v1$wd <- 0.01
v2 <- merge(v2, v2_mean, "phi")
v2$wd <- 0.05
ci <- rbind(v1, v2)
ci$phi <- as.factor(ci$phi)
ci$genotype <- as.factor(ci$genotype)
```
```{r, fig.width=7,fig.height=7}
ggplot(data=ci, aes(x=x, y=estimate, ymin=conf.low, ymax=conf.high)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, linetype="dashed", color="grey") +
geom_errorbar(aes(width=wd)) +
theme_classic() +
xlab("Difference in variance compared with SNP=0") +
ylab("Estimated difference in variance compared with SNP=0 (95% CI)") +
facet_wrap(method ~ genotype, scales="free") +
scale_y_continuous(breaks = scales::pretty_breaks(n = 5)) +
theme(
strip.background = element_blank(),
legend.title.align=0.5
)
```
Confidence interval coverage of the Brown-Forsythe test using LAD regression
```{r}
results <- merge(results, v1_mean, "phi")
results <- merge(results, v2_mean, "phi")
names(results)[10] <- "v1_mean"
names(results)[11] <- "v2_mean"
results$phi_x1_lci <- results$phi_x1 - (1.96 * results$se_x1)
results$phi_x1_uci <- results$phi_x1 + (1.96 * results$se_x1)
results$phi_x2_lci <- results$phi_x2 - (1.96 * results$se_x2)
results$phi_x2_uci <- results$phi_x2 + (1.96 * results$se_x2)
coverage1 <- results %>%
dplyr::group_by(phi) %>%
dplyr::summarize(binom.test(sum(phi_x1_lci <= v1_mean & phi_x1_uci >= v1_mean), n()) %>% tidy) %>%
dplyr::select(phi, estimate, conf.low, conf.high) %>%
dplyr::mutate(genotype="SNP=1", method="LAD-BF")
coverage2 <- results %>%
dplyr::group_by(phi) %>%
dplyr::summarize(binom.test(sum(phi_x2_lci <= v2_mean & phi_x2_uci >= v2_mean), n()) %>% tidy) %>%
dplyr::select(phi, estimate, conf.low, conf.high) %>%
dplyr::mutate(genotype="SNP=2", method="LAD-BF")
coverage1 <- merge(coverage1, v1_mean, "phi")
coverage1$wd <- 0.01
coverage2 <- merge(coverage2, v2_mean, "phi")
coverage2$wd <- 0.05
coverage <- rbind(coverage1, coverage2)
coverage$phi <- as.factor(coverage$phi)
coverage$genotype <- as.factor(coverage$genotype)
```
```{r, fig.width=7,fig.height=7}
ggplot(data=coverage, aes(x=x, y=estimate, ymin=conf.low, ymax=conf.high)) +
geom_point() +
geom_errorbar(aes(width=wd)) +
theme_classic() +
xlab("Difference in variance compared with SNP=0") +
ylab("Coverage of 95% CI (95% CI)") +
geom_hline(yintercept = 0.95, linetype = "dashed", color = "grey") +
facet_grid(method ~ genotype, scales="free_x") +
labs(shape="Genotype") +
scale_y_continuous(limits = c(0, 1), breaks = scales::pretty_breaks(n = 5)) +
theme(
strip.background = element_blank(),
legend.title.align=0.5
)
```