Simulation

Simulation study of a gene-environment interaction effect. The main effect delta is set to have 95% power. The interaction effect theta is set using phi which is relative to the main effect ranging from {0..4}x as described in Brookes et al (2004).

library("varGWASR")
library("broom")
library("dplyr")
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library("ggplot2")
set.seed(1234)

n_obs <- 1000
n_sim <- 1000
maf <- 0.4

# main effect size of x on y detectable ~ 95% power
delta <- 0.17

# simulation
results <- data.frame()
for (phi in seq(0, 4, .5)){
  
  # size of interaction relative to main effect (Brookes S, et al 2004)
  theta <- delta * phi
  
  for (i in 1:n_sim) {
      # simulate GxE interaction effect
      x <- rbinom(n_obs, 2, maf)
      u <- rnorm(n_obs)
      y <- x * delta + x*u * theta + rnorm(n_obs)

      # test/estimate variance effect
      fit <- varGWASR::model(data.frame(x, y), "x", "y")

      # add true variance difference
      fit$v1 <- var(y[x==1]) - var(y[x==0])
      fit$v2 <- var(y[x==2]) - var(y[x==0])
    
      # store result
      fit$phi <- phi
      results <- rbind(results, fit)
  }

}

Power/T1E of the Brown-Forsythe test using LAD regression

tbl <- results %>% 
  dplyr::group_by(phi) %>%
  dplyr::summarize(tidy(binom.test(sum(phi_p < 0.05), n_sim))) %>%
  dplyr::select(phi, estimate, conf.low, conf.high) %>%
  dplyr::rename(power="estimate", ci_95l="conf.low", ci_95h="conf.high")
knitr::kable(tbl, digits=3)

phi	power	ci_95l	ci_95h
0.0	0.054	0.041	0.070
0.5	0.043	0.031	0.057
1.0	0.104	0.086	0.125
1.5	0.295	0.267	0.324
2.0	0.671	0.641	0.700
2.5	0.928	0.910	0.943
3.0	0.996	0.990	0.999
3.5	1.000	0.996	1.000
4.0	1.000	0.996	1.000

Effect size of the Brown-Forsythe test using LAD regression

# estimate mean and 95% CI
v1 <- results %>%
    dplyr::group_by(phi) %>%
    dplyr::summarize(t.test(phi_x1) %>% tidy) %>%
    dplyr::select(phi, estimate, conf.low, conf.high) %>%
    dplyr::mutate(genotype="SNP=1", method="LAD-BF")
v2 <- results %>%
    dplyr::group_by(phi) %>%
    dplyr::summarize(t.test(phi_x2) %>% tidy) %>%
    dplyr::select(phi, estimate, conf.low, conf.high) %>%
    dplyr::mutate(genotype="SNP=2", method="LAD-BF")
v1_mean <- results %>%
    dplyr::group_by(phi) %>%
    dplyr::summarize(x=mean(v1))
v2_mean <- results %>%
    dplyr::group_by(phi) %>%
    dplyr::summarize(x=mean(v2))
v1 <- merge(v1, v1_mean, "phi")
v1$wd <- 0.01
v2 <- merge(v2, v2_mean, "phi")
v2$wd <- 0.05
ci <- rbind(v1, v2)
ci$phi <- as.factor(ci$phi)
ci$genotype <- as.factor(ci$genotype)

ggplot(data=ci, aes(x=x, y=estimate, ymin=conf.low, ymax=conf.high)) +
    geom_point() + 
    geom_abline(intercept = 0, slope = 1, linetype="dashed", color="grey") +
    geom_errorbar(aes(width=wd)) +
    theme_classic() +
    xlab("Difference in variance compared with SNP=0") +
    ylab("Estimated difference in variance compared with SNP=0 (95% CI)") +
    facet_wrap(method ~ genotype, scales="free") +
    scale_y_continuous(breaks = scales::pretty_breaks(n = 5)) +
    theme(
        strip.background = element_blank(),
        legend.title.align=0.5
    )
#> Warning: The `legend.title.align` argument of `theme()` is deprecated as of ggplot2
#> 3.5.0.
#> ℹ Please use theme(legend.title = element_text(hjust)) instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.

Confidence interval coverage of the Brown-Forsythe test using LAD regression

results <- merge(results, v1_mean, "phi")
results <- merge(results, v2_mean, "phi")
names(results)[10] <- "v1_mean"
names(results)[11] <- "v2_mean"
results$phi_x1_lci <- results$phi_x1 - (1.96 * results$se_x1)
results$phi_x1_uci <- results$phi_x1 + (1.96 * results$se_x1)
results$phi_x2_lci <- results$phi_x2 - (1.96 * results$se_x2)
results$phi_x2_uci <- results$phi_x2 + (1.96 * results$se_x2)

coverage1 <- results %>% 
    dplyr::group_by(phi) %>%
    dplyr::summarize(binom.test(sum(phi_x1_lci <= v1_mean & phi_x1_uci >= v1_mean), n()) %>% tidy) %>%
    dplyr::select(phi, estimate, conf.low, conf.high) %>%
    dplyr::mutate(genotype="SNP=1", method="LAD-BF")
coverage2 <- results %>% 
    dplyr::group_by(phi) %>%
    dplyr::summarize(binom.test(sum(phi_x2_lci <= v2_mean & phi_x2_uci >= v2_mean), n()) %>% tidy) %>%
    dplyr::select(phi, estimate, conf.low, conf.high) %>%
    dplyr::mutate(genotype="SNP=2", method="LAD-BF")

coverage1 <- merge(coverage1, v1_mean, "phi")
coverage1$wd <- 0.01
coverage2 <- merge(coverage2, v2_mean, "phi")
coverage2$wd <- 0.05
coverage <- rbind(coverage1, coverage2)
coverage$phi <- as.factor(coverage$phi)
coverage$genotype <- as.factor(coverage$genotype)

ggplot(data=coverage, aes(x=x, y=estimate, ymin=conf.low, ymax=conf.high)) +
    geom_point() + 
    geom_errorbar(aes(width=wd)) +
    theme_classic() +
    xlab("Difference in variance compared with SNP=0") +
    ylab("Coverage of 95% CI (95% CI)") +
    geom_hline(yintercept = 0.95, linetype = "dashed", color = "grey") +
    facet_grid(method ~ genotype, scales="free_x") +
    labs(shape="Genotype") +
    scale_y_continuous(limits = c(0, 1), breaks = scales::pretty_breaks(n = 5)) +
    theme(
        strip.background = element_blank(),
        legend.title.align=0.5
    )