Title: | CAMeRa (Cross Ancestral Mendelian Randomisation) |
---|---|
Description: | CAMERA estimates joint causal effect in multiple ancestries and detects pleiotropy via the zero relevance model. |
Authors: | Yoonsu Cho [aut], Gibran Hemani [aut, cre] , Tom Palmer [aut] |
Maintainer: | Gibran Hemani <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.1.0 |
Built: | 2024-12-30 03:51:54 UTC |
Source: | https://github.com/MRCIEU/CAMERA |
A simple wrapper function. Using a summary set, identify set of instruments for the traits, and peform SEM MR to test the association across the population.
import()
Migrate the results from a previous CAMERA
CAMERA$import(x)
x
R6 Environment created for CAMERA. Default = x
assign()
CAMERA$assign(...)
import_from_local()
CAMERA$import_from_local( instrument_raw, instrument_outcome, instrument_regions, instrument_outcome_regions, exposure_ids, outcome_ids, pops, ... )
new()
Create a new dataset and initialise an R interface
CAMERA$new( exposure_ids = NULL, outcome_ids = NULL, pops = NULL, bfiles = NULL, plink = NULL, radius = NULL, clump_pop = NULL, x = NULL )
exposure_ids
Exposures IDs obtained from IEU GWAS database (https://gwas.mrcieu.ac.uk/) for each population
outcome_ids
Outcome IDs obtained from IEU GWAS database (https://gwas.mrcieu.ac.uk/) for each population
pops
Ancestry information for each population (i.e. AFR, AMR, EUR, EAS, SAS)
bfiles
Locations of LD reference files for each population (Download from: http://fileserve.mrcieu.ac.uk/ld/1kg.v3.tgz)
plink
Location of executable plink (ver.1.90 is recommended)
radius
Genomic window size to extract SNPs
clump_pop
Reference population for clumping
x
Import data where available
instrument_heterogeneity()
CAMERA$instrument_heterogeneity( instrument = self$instrument_raw, alpha = "bonferroni", method = "ivw", outlier_removal = FALSE )
estimate_instrument_specificity()
CAMERA$estimate_instrument_specificity( instrument, alpha = "bonferroni", winnerscurse = FALSE )
replication_evaluation()
CAMERA$replication_evaluation( instrument = self$instrument_raw, ld = self$ld_matrices )
check_phenotypes()
CAMERA$check_phenotypes(ids = self$exposure_ids)
cross_estimate()
CAMERA$cross_estimate(dat = self$harmonised_dat)
plot_cross_estimate()
CAMERA$plot_cross_estimate(est = self$mrres, qj_alpha = 0.05)
extract_instruments()
CAMERA$extract_instruments(exposure_ids = self$exposure_ids, ...)
extract_instrument_regions()
CAMERA$extract_instrument_regions( radius = self$radius, instrument_raw = self$instrument_raw, exposure_ids = self$exposure_ids )
scan_regional_instruments()
CAMERA$scan_regional_instruments( instrument_raw = self$instrument_raw, instrument_regions = self$instrument_regions )
plot_regional_instruments_maxz()
CAMERA$plot_regional_instruments_maxz( instrument_region_zscores = self$instrument_region_zscores, instruments = self$instrument_raw, region = 1:min(10, nrow(instruments)), comparison = FALSE )
regional_ld_matrices()
CAMERA$regional_ld_matrices( instrument_regions = self$instrument_regions, bfiles = self$bfiles, pops = self$pops, plink = self$plink )
susie_finemap_regions()
CAMERA$susie_finemap_regions( dat = self$instrument_regions, ld = self$ld_matrices )
paintor_finemap_regions()
CAMERA$paintor_finemap_regions( region = self$instrument_regions, ld = self$ld_matrices, PAINTOR = "PAINTOR", workdir = tempdir() )
MsCAVIAR_finemap_regions()
CAMERA$MsCAVIAR_finemap_regions( region = self$instrument_regions, ld = self$ld_matrices, MsCAVIAR = "MsCAVIAR", workdir = tempdir() )
fema_regional_instruments()
CAMERA$fema_regional_instruments( method = "fema", instrument_regions = self$instrument_regions, instrument_raw = self$instrument_raw, n = self$exposure_metadata$sample_size )
plot_regional_instruments()
CAMERA$plot_regional_instruments( region, instrument_regions = self$instrument_regions, meta_analysis_regions = self$instrument_fema_regions )
get_metadata()
CAMERA$get_metadata( exposure_ids = self$exposure_ids, outcome_ids = self$outcome_ids )
estimate_instrument_heterogeneity_per_variant()
CAMERA$estimate_instrument_heterogeneity_per_variant(dat = self$harmonised_dat)
mrgxe()
CAMERA$mrgxe( dat = self$harmonised_dat, variant_list = subset(self$instrument_heterogeneity_per_variant, Qfdr < 0.05)$SNP, nboot = 100 )
mrgxe_plot()
CAMERA$mrgxe_plot(mrgxe_res = self$mrgxe_res)
mrgxe_plot_variant()
CAMERA$mrgxe_plot_variant( variant = self$mrgxe_res %>% dplyr::filter(p.adjust(a_pval, "fdr") < 0.05) %>% { .$SNP }, dat = self$harmonised_dat )
make_outcome_data()
CAMERA$make_outcome_data(exp = self$instrument_raw, p_exp = 0.05/nrow(exp))
make_outcome_local()
CAMERA$make_outcome_local( exp = self$instrument_raw, out = self$instrument_outcome_regions, p_exp = 0.05/nreow(exp) )
harmonise()
CAMERA$harmonise(exp = self$instrument_raw, out = self$instrument_outcome)
set_summary()
CAMERA$set_summary()
pleiotropy()
CAMERA$pleiotropy(harmonised_dat = self$harmonised_dat, mrres = self$mrres)
plot_pleiotropy()
CAMERA$plot_pleiotropy(dat = self$pleiotropy_outliers)
plot_pleiotropy_heterogeneity()
CAMERA$plot_pleiotropy_heterogeneity( dat = self$pleiotropy_Q_outliers, pthresh = 0.05 )
perform_basic_sem()
CAMERA$perform_basic_sem(harmonised_dat = self$harmonised_dat_sem)
runsem()
CAMERA$runsem(model, data, modname)
standardise_data()
CAMERA$standardise_data( dat = self$instrument_raw, standardise_unit = FALSE, standardise_scale = FALSE, scaling_method = "simple_mode" )
clone()
The objects of this class are cloneable with this method.
CAMERA$clone(deep = FALSE)
deep
Whether to make a deep clone.
A simple wrapper function for importing data from local files for use with the CAMERA class.
new()
Create a new dataset and initialise an R interface
CAMERA_local$new( metadata, ld_ref, plink_bin, mc.cores = 1, radius = 25000, pthresh = 5e-08, minmaf = 0.01 )
metadata
Data frame with information about the data. One row per dataset. See details for info on columns
ld_ref
Data frame with two columns - pop = population (referencing the pop values in metadata), bfile = path to plink file for that reference
plink_bin
Location of executable plink (ver.1.90 is recommended)
radius
Genomic window size to extract SNPs
pthresh
P-value threshold for instrument inclusion
minmaf
Minimum allelel frequency per dataset
clump_pop
Reference population for clumping
standardise()
CAMERA_local$standardise( d, ea_col = "ea", oa_col = "oa", beta_col = "beta", eaf_col = "eaf", chr_col = "chr", pos_col = "pos", vid_col = "vid" )
read_file()
CAMERA_local$read_file(m, minmaf = 0.01)
pool_tophits()
CAMERA_local$pool_tophits( rawdat, tophits, metadata, radius = 250000, pthresh = 5e-08, mc.cores = 10 )
organise_data()
CAMERA_local$organise_data( metadata = self$metadata, plink_bin = self$plink_bin, ld_ref = self$ld_ref, pthresh = self$pthresh, minmaf = self$minmaf, radius = self$radius, mc.cores = self$mc.cores )
fixed_effects_meta_analysis_fast()
CAMERA_local$fixed_effects_meta_analysis_fast(beta_mat, se_mat)
organise()
CAMERA_local$organise()
clone()
The objects of this class are cloneable with this method.
CAMERA_local$clone(deep = FALSE)
deep
Whether to make a deep clone.
For a single variant estiamted in different sub groups.
egger_bootstrap(b_gx, se_gx, b_gy, se_gy, nboot = 1000)
egger_bootstrap(b_gx, se_gx, b_gy, se_gy, nboot = 1000)
b_gx |
Vector of instrument-exposure associations, one for each sub group |
se_gx |
Vector of standard errors to b_gx |
b_gy |
Vector of instrument-outcome associations, one for each sub group |
se_gy |
Vector of standard errors for b_gy |
nboot |
Number of bootstraps. Default=1000 |
Estimate the degree of pleiotropy using MR GxE. This method uses a negative control type approach based on an assumption that the instrument-exposure association is uncorrelated with the pleiotropic effect. Therefore, as the instrument-exposure association reduces in magnitude, the effect on the outcome will reduce towards an intercept term which represents the pleiotropic effect.
Standard errors are obtained from parametric bootstrap
List
a = intercept estimate (pleiotropy)
b = slope estimate (b_iv effect)
a_se = standard error of intercept
b_se = standard error of slope
a_pval = p-value of intercept estimate
b_pval = p-value of slope estimate
a_mean = mean value of intercept from bootstraps
b_mean = mean value of slope estimates from bootstraps
Perform fixed effects meta analysis for one association
fixed_effects_meta_analysis(beta_vec, se_vec, infl = 10000)
fixed_effects_meta_analysis(beta_vec, se_vec, infl = 10000)
beta_vec |
Vector of betas |
se_vec |
Vector of ses |
infl |
Inflation factor - how much larger is the estimate than the estimate of the tightest SE - for use in removing unreliable estimates |
list of results
Assumes effects across studies are all on the same scale
fixed_effects_meta_analysis_fast(beta_mat, se_mat)
fixed_effects_meta_analysis_fast(beta_mat, se_mat)
beta_mat |
Matrix of betas - rows are SNPs, columns are studies |
se_mat |
Matrix of SEs - rows are SNPs, columns are studies |
list of meta analysis betas and SEs
Sometimes estimates appear unstable. They are likely unreliable and best to not use for heterogeneity analyses etc.
identify_blownup_estimates(b, se, infl)
identify_blownup_estimates(b, se, infl)
b |
Vector of betas |
se |
Vector of SEs |
infl |
Inflation factor - how much larger is the estimate than the estimate of the tightest SE |
index of betas to remove
Taken from Okbay et al 2016. Under the assumption that all discovery effects are unbiased, what fraction of associations would replicate in the replication dataset, given the differential power of the discovery and replication datasets. Uses standard error of the replication dataset to account for differences in sample size and distribution of independent variable
prop_overlap(b_disc, b_rep, se_disc, se_rep, alpha)
prop_overlap(b_disc, b_rep, se_disc, se_rep, alpha)
b_disc |
Vector of discovery betas |
b_rep |
Vector of replication betas |
se_disc |
Vector of discovery standard errors |
se_rep |
Vector of replication standard errors |
alpha |
Nominal replication significance threshold |
List of results
res: aggregate expected replication rate vs observed replication rate
variants: per variant expected replication rates
Uses weighted Z scores following advice from https://onlinelibrary.wiley.com/doi/full/10.1111/j.1420-9101.2005.00917.x Suggested weights are 1/se^2 However se is scale dependent and it would be ideal to avoid scale issues at this stage So using instead calculate expected se based on n and af. Assumes continuous traits in the way it uses n (i.e. not case control aware at the moment)
z_meta_analysis(beta_mat, se_mat, n, eaf_mat)
z_meta_analysis(beta_mat, se_mat, n, eaf_mat)
beta_mat |
Matrix of betas - rows are SNPs, columns are studies |
se_mat |
Matrix of SEs - rows are SNPs, columns are studies |
n |
Vector of sample sizes for each |
eaf_mat |
Matrix of allele frequencies - rows are SNPs, columns are studies |