---
title: "Simulation"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Simulation}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
```

Simulation study of a gene-environment interaction effect. The main effect ```delta``` is set to have 95% power. The interaction effect ```theta``` is set using ```phi``` which is relative to the main effect ranging from ```{0..4}x``` as described in Brookes et al (2004).

```{r}
library("varGWASR")
library("broom")
library("dplyr")
library("ggplot2")
set.seed(1234)

n_obs <- 1000
n_sim <- 1000
maf <- 0.4

# main effect size of x on y detectable ~ 95% power
delta <- 0.17

# simulation
results <- data.frame()
for (phi in seq(0, 4, .5)){
  
  # size of interaction relative to main effect (Brookes S, et al 2004)
  theta <- delta * phi
  
  for (i in 1:n_sim) {
      # simulate GxE interaction effect
      x <- rbinom(n_obs, 2, maf)
      u <- rnorm(n_obs)
      y <- x * delta + x*u * theta + rnorm(n_obs)

      # test/estimate variance effect
      fit <- varGWASR::model(data.frame(x, y), "x", "y")

      # add true variance difference
      fit$v1 <- var(y[x==1]) - var(y[x==0])
      fit$v2 <- var(y[x==2]) - var(y[x==0])
    
      # store result
      fit$phi <- phi
      results <- rbind(results, fit)
  }

}
```

Power/T1E of the Brown-Forsythe test using LAD regression

```{r}
tbl <- results %>% 
  dplyr::group_by(phi) %>%
  dplyr::summarize(tidy(binom.test(sum(phi_p < 0.05), n_sim))) %>%
  dplyr::select(phi, estimate, conf.low, conf.high) %>%
  dplyr::rename(power="estimate", ci_95l="conf.low", ci_95h="conf.high")
knitr::kable(tbl, digits=3)
```

Effect size of the Brown-Forsythe test using LAD regression

```{r}
# estimate mean and 95% CI
v1 <- results %>%
    dplyr::group_by(phi) %>%
    dplyr::summarize(t.test(phi_x1) %>% tidy) %>%
    dplyr::select(phi, estimate, conf.low, conf.high) %>%
    dplyr::mutate(genotype="SNP=1", method="LAD-BF")
v2 <- results %>%
    dplyr::group_by(phi) %>%
    dplyr::summarize(t.test(phi_x2) %>% tidy) %>%
    dplyr::select(phi, estimate, conf.low, conf.high) %>%
    dplyr::mutate(genotype="SNP=2", method="LAD-BF")
v1_mean <- results %>%
    dplyr::group_by(phi) %>%
    dplyr::summarize(x=mean(v1))
v2_mean <- results %>%
    dplyr::group_by(phi) %>%
    dplyr::summarize(x=mean(v2))
v1 <- merge(v1, v1_mean, "phi")
v1$wd <- 0.01
v2 <- merge(v2, v2_mean, "phi")
v2$wd <- 0.05
ci <- rbind(v1, v2)
ci$phi <- as.factor(ci$phi)
ci$genotype <- as.factor(ci$genotype)
```

```{r, fig.width=7,fig.height=7}
ggplot(data=ci, aes(x=x, y=estimate, ymin=conf.low, ymax=conf.high)) +
    geom_point() + 
    geom_abline(intercept = 0, slope = 1, linetype="dashed", color="grey") +
    geom_errorbar(aes(width=wd)) +
    theme_classic() +
    xlab("Difference in variance compared with SNP=0") +
    ylab("Estimated difference in variance compared with SNP=0 (95% CI)") +
    facet_wrap(method ~ genotype, scales="free") +
    scale_y_continuous(breaks = scales::pretty_breaks(n = 5)) +
    theme(
        strip.background = element_blank(),
        legend.title.align=0.5
    )
```

Confidence interval coverage of the Brown-Forsythe test using LAD regression

```{r}
results <- merge(results, v1_mean, "phi")
results <- merge(results, v2_mean, "phi")
names(results)[10] <- "v1_mean"
names(results)[11] <- "v2_mean"
results$phi_x1_lci <- results$phi_x1 - (1.96 * results$se_x1)
results$phi_x1_uci <- results$phi_x1 + (1.96 * results$se_x1)
results$phi_x2_lci <- results$phi_x2 - (1.96 * results$se_x2)
results$phi_x2_uci <- results$phi_x2 + (1.96 * results$se_x2)

coverage1 <- results %>% 
    dplyr::group_by(phi) %>%
    dplyr::summarize(binom.test(sum(phi_x1_lci <= v1_mean & phi_x1_uci >= v1_mean), n()) %>% tidy) %>%
    dplyr::select(phi, estimate, conf.low, conf.high) %>%
    dplyr::mutate(genotype="SNP=1", method="LAD-BF")
coverage2 <- results %>% 
    dplyr::group_by(phi) %>%
    dplyr::summarize(binom.test(sum(phi_x2_lci <= v2_mean & phi_x2_uci >= v2_mean), n()) %>% tidy) %>%
    dplyr::select(phi, estimate, conf.low, conf.high) %>%
    dplyr::mutate(genotype="SNP=2", method="LAD-BF")

coverage1 <- merge(coverage1, v1_mean, "phi")
coverage1$wd <- 0.01
coverage2 <- merge(coverage2, v2_mean, "phi")
coverage2$wd <- 0.05
coverage <- rbind(coverage1, coverage2)
coverage$phi <- as.factor(coverage$phi)
coverage$genotype <- as.factor(coverage$genotype)
```

```{r, fig.width=7,fig.height=7}
ggplot(data=coverage, aes(x=x, y=estimate, ymin=conf.low, ymax=conf.high)) +
    geom_point() + 
    geom_errorbar(aes(width=wd)) +
    theme_classic() +
    xlab("Difference in variance compared with SNP=0") +
    ylab("Coverage of 95% CI (95% CI)") +
    geom_hline(yintercept = 0.95, linetype = "dashed", color = "grey") +
    facet_grid(method ~ genotype, scales="free_x") +
    labs(shape="Genotype") +
    scale_y_continuous(limits = c(0, 1), breaks = scales::pretty_breaks(n = 5)) +
    theme(
        strip.background = element_blank(),
        legend.title.align=0.5
    )
```