Simulation study of a gene-environment interaction effect.
The main effect delta
is set to have 95% power. The
interaction effect theta
is set using phi
which is relative to the main effect ranging from {0..4}x
as described in Brookes et al (2004).
library("varGWASR")
library("broom")
library("dplyr")
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library("ggplot2")
set.seed(1234)
n_obs <- 1000
n_sim <- 1000
maf <- 0.4
# main effect size of x on y detectable ~ 95% power
delta <- 0.17
# simulation
results <- data.frame()
for (phi in seq(0, 4, .5)){
# size of interaction relative to main effect (Brookes S, et al 2004)
theta <- delta * phi
for (i in 1:n_sim) {
# simulate GxE interaction effect
x <- rbinom(n_obs, 2, maf)
u <- rnorm(n_obs)
y <- x * delta + x*u * theta + rnorm(n_obs)
# test/estimate variance effect
fit <- varGWASR::model(data.frame(x, y), "x", "y")
# add true variance difference
fit$v1 <- var(y[x==1]) - var(y[x==0])
fit$v2 <- var(y[x==2]) - var(y[x==0])
# store result
fit$phi <- phi
results <- rbind(results, fit)
}
}
Power/T1E of the Brown-Forsythe test using LAD regression
tbl <- results %>%
dplyr::group_by(phi) %>%
dplyr::summarize(tidy(binom.test(sum(phi_p < 0.05), n_sim))) %>%
dplyr::select(phi, estimate, conf.low, conf.high) %>%
dplyr::rename(power="estimate", ci_95l="conf.low", ci_95h="conf.high")
knitr::kable(tbl, digits=3)
phi | power | ci_95l | ci_95h |
---|---|---|---|
0.0 | 0.054 | 0.041 | 0.070 |
0.5 | 0.043 | 0.031 | 0.057 |
1.0 | 0.104 | 0.086 | 0.125 |
1.5 | 0.295 | 0.267 | 0.324 |
2.0 | 0.671 | 0.641 | 0.700 |
2.5 | 0.928 | 0.910 | 0.943 |
3.0 | 0.996 | 0.990 | 0.999 |
3.5 | 1.000 | 0.996 | 1.000 |
4.0 | 1.000 | 0.996 | 1.000 |
Effect size of the Brown-Forsythe test using LAD regression
# estimate mean and 95% CI
v1 <- results %>%
dplyr::group_by(phi) %>%
dplyr::summarize(t.test(phi_x1) %>% tidy) %>%
dplyr::select(phi, estimate, conf.low, conf.high) %>%
dplyr::mutate(genotype="SNP=1", method="LAD-BF")
v2 <- results %>%
dplyr::group_by(phi) %>%
dplyr::summarize(t.test(phi_x2) %>% tidy) %>%
dplyr::select(phi, estimate, conf.low, conf.high) %>%
dplyr::mutate(genotype="SNP=2", method="LAD-BF")
v1_mean <- results %>%
dplyr::group_by(phi) %>%
dplyr::summarize(x=mean(v1))
v2_mean <- results %>%
dplyr::group_by(phi) %>%
dplyr::summarize(x=mean(v2))
v1 <- merge(v1, v1_mean, "phi")
v1$wd <- 0.01
v2 <- merge(v2, v2_mean, "phi")
v2$wd <- 0.05
ci <- rbind(v1, v2)
ci$phi <- as.factor(ci$phi)
ci$genotype <- as.factor(ci$genotype)
ggplot(data=ci, aes(x=x, y=estimate, ymin=conf.low, ymax=conf.high)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, linetype="dashed", color="grey") +
geom_errorbar(aes(width=wd)) +
theme_classic() +
xlab("Difference in variance compared with SNP=0") +
ylab("Estimated difference in variance compared with SNP=0 (95% CI)") +
facet_wrap(method ~ genotype, scales="free") +
scale_y_continuous(breaks = scales::pretty_breaks(n = 5)) +
theme(
strip.background = element_blank(),
legend.title.align=0.5
)
#> Warning: The `legend.title.align` argument of `theme()` is deprecated as of ggplot2
#> 3.5.0.
#> ℹ Please use theme(legend.title = element_text(hjust)) instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
Confidence interval coverage of the Brown-Forsythe test using LAD regression
results <- merge(results, v1_mean, "phi")
results <- merge(results, v2_mean, "phi")
names(results)[10] <- "v1_mean"
names(results)[11] <- "v2_mean"
results$phi_x1_lci <- results$phi_x1 - (1.96 * results$se_x1)
results$phi_x1_uci <- results$phi_x1 + (1.96 * results$se_x1)
results$phi_x2_lci <- results$phi_x2 - (1.96 * results$se_x2)
results$phi_x2_uci <- results$phi_x2 + (1.96 * results$se_x2)
coverage1 <- results %>%
dplyr::group_by(phi) %>%
dplyr::summarize(binom.test(sum(phi_x1_lci <= v1_mean & phi_x1_uci >= v1_mean), n()) %>% tidy) %>%
dplyr::select(phi, estimate, conf.low, conf.high) %>%
dplyr::mutate(genotype="SNP=1", method="LAD-BF")
coverage2 <- results %>%
dplyr::group_by(phi) %>%
dplyr::summarize(binom.test(sum(phi_x2_lci <= v2_mean & phi_x2_uci >= v2_mean), n()) %>% tidy) %>%
dplyr::select(phi, estimate, conf.low, conf.high) %>%
dplyr::mutate(genotype="SNP=2", method="LAD-BF")
coverage1 <- merge(coverage1, v1_mean, "phi")
coverage1$wd <- 0.01
coverage2 <- merge(coverage2, v2_mean, "phi")
coverage2$wd <- 0.05
coverage <- rbind(coverage1, coverage2)
coverage$phi <- as.factor(coverage$phi)
coverage$genotype <- as.factor(coverage$genotype)
ggplot(data=coverage, aes(x=x, y=estimate, ymin=conf.low, ymax=conf.high)) +
geom_point() +
geom_errorbar(aes(width=wd)) +
theme_classic() +
xlab("Difference in variance compared with SNP=0") +
ylab("Coverage of 95% CI (95% CI)") +
geom_hline(yintercept = 0.95, linetype = "dashed", color = "grey") +
facet_grid(method ~ genotype, scales="free_x") +
labs(shape="Genotype") +
scale_y_continuous(limits = c(0, 1), breaks = scales::pretty_breaks(n = 5)) +
theme(
strip.background = element_blank(),
legend.title.align=0.5
)