--- title: "Feature summary" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Feature summary} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ## Create Omiprep object ```{r setup} library(omiprep) # import data data <- read.csv(system.file("extdata", "dummy_data.csv", package = "omiprep"), header=T, row.names = 1) |> as.matrix() samples <- read.csv(system.file("extdata", "dummy_samples.csv", package = "omiprep"), header=T, row.names = 1) features <- read.csv(system.file("extdata", "dummy_features.csv", package = "omiprep"), header=T, row.names = 1) # create object mydata <- Omiprep(data = data, samples = samples, features = features) ``` ## Summary of Omiprep object ```{r omiprep_summary} summary(mydata) ``` ## Run feature summary ```{r feature_summary1} # note that for illustrative purposes we are using a log outlier unit distance of 1.0 here, in practice we tend to favor a value of 5.0. feature_sum1 <- feature_summary(omiprep = mydata, source_layer = "input", outlier_udist = 1.0, tree_cut_height = 0.5, output = "data.frame", cores = 1) ``` ## Table of feature summary ```{r feature_sum1_table, echo=FALSE} feature_sum1 |> head(n = 10) |> knitr::kable( digits = 3, row.names = FALSE, align = "c") |> kableExtra::kable_styling(full_width = FALSE) ``` ## Feature summary attributes In addition to the summary data, the hierarchical cluster dendrogram is appended to the returned `data.frame` as and `attribute`. This can be accessed with the attribute name: `[source_layer]_tree`, in this case we summarised the `input` data, therefore the attribute name is `input_tree`. ```{r tree, out.width="100%", fig.align='center', fig.alt = "Decision tree showing feature importance in dataset", fig.width = 10, fig.height = 4} suppressPackageStartupMessages(library(dendextend)) # extract tree from attributes tree <- attr(feature_sum1, 'input_tree') dend <- stats::as.dendrogram(tree) # color the independent features blue metab_color <- feature_sum1[, c("feature_id", "independent_features")] metab_color <- metab_color[match(labels(dend), metab_color$feature_id), ] metab_color$color <- ifelse(metab_color$independent_features==TRUE, "#477EB8", "grey") # format dendrogram for ploting dend <- dend |> dendextend::set("labels_cex", 0.75) |> dendextend::set("labels_col", metab_color$color) |> dendextend::set("branches_lwd", 1) |> dendextend::set("branches_k_color", value = metab_color$color) ## plot the dendrogram dend |> plot(main = "Feature clustering dendrogram") abline(h = 0.5, col = "#E41A1C", lwd = 1.5) ``` ## Run feature summary on subset Using the `sample_ids` and `feature_ids` arguments you can run the summary for a subset of the data. Note: all rows will be return, however summary data will only be returned for the specified ids. ```{r feature_sum_subset} ## define a vector of sample IDs sids <- mydata@samples[mydata@samples$sex == "female", "sample_id"] ## define a vector of feature IDs fids <- mydata@features[, "feature_id"] |> sample(10) # note that for illustrative purposes we are using a log outlier unit distance of 1.0 here, in practice we tend to favor a value of 5.0. feature_sum_subset <- feature_summary(omiprep = mydata, source_layer = "input", outlier_udist = 1.0, tree_cut_height = 0.5, sample_ids = sids, feature_ids = fids, output = "data.frame", cores = 1) ``` ## Table of feature summary for subset ```{r feature_sum_subset_table, echo=FALSE} feature_sum_subset |> na.omit() |> knitr::kable( digits = 3, row.names = FALSE, align = "c") |> kableExtra::kable_styling(full_width = FALSE) |> kableExtra::scroll_box(width = "100%", height = "500px") ``` ## Run sample & feature summaries together ```{r summarise_data} # note that for illustrative purposes we are using a log outlier unit distance of 1.0 here, in practice we tend to favor a value of 5.0. sam_n_feat_sum <- summarise(omiprep = mydata, source_layer = "input", outlier_udist = 1.0, tree_cut_height = 0.5, sample_ids = sids, feature_ids = fids, output = "data.frame", cores = 1) ``` ## Table of feature summary for subset ```{r show2, echo=FALSE} sam_n_feat_sum$feature_summary |> na.omit() |> knitr::kable( digits = 3, row.names = FALSE, align = "c") |> kableExtra::kable_styling(full_width = TRUE) |> kableExtra::scroll_box(width = "100%", height = "500px") ```