diff --git a/.Rbuildignore b/.Rbuildignore index 7d68da4..6256641 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -5,3 +5,5 @@ ^Dockerfile$ ^Dockerfiles/.*$ ^Conda_Recipe/.*$ +^doc$ +^Meta$ diff --git a/.gitignore b/.gitignore index dabea9e..fdd4445 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ tests/testthat/output/ inst/doc #inst/extdata/* -docs *.Rds *.rds +/doc/ +/Meta/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fe1d48..a3438a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,15 @@ # CHANGELOG +## v1.0.3 (in development) +### Feature + +* feat: Add compareCellPopulations() function for comparing cell population distributions across experimental groups + - Visualizes cell population frequencies or absolute counts across multiple groups + - Generates alluvial flow bar plots and faceted box plots + - Supports custom group ordering and color palettes + - Added ggalluvial dependency for flow visualizations + - Generated from JSON template using json2r.prompt.md instructions ## v1.0.2 (2024-02-01) diff --git a/DESCRIPTION b/DESCRIPTION index 0a26fcb..e74cdc5 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,6 +39,7 @@ Imports: gargle (>= 1.2.0), ggplot2 (>= 3.3.6), ggpubr (>= 0.4.0), + ggalluvial, globals (>= 0.16.1), harmony (>= 0.1.1), hdf5r (>= 1.3.5), @@ -83,8 +84,8 @@ Imports: dendextend, dendsort, pheatmap, - scales, celldex, + scales, gdata, ggrepel, tidyr, diff --git a/NAMESPACE b/NAMESPACE index 96b1ac4..6e6b355 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,15 +3,19 @@ export(aggregateCounts) export(annotateCellTypes) export(appendMetadataToSeuratObject) +export(build_modscore_plots) export(colorByGene) export(colorByMarkerTable) export(combineNormalize) +export(compareCellPopulations) +export(compute_modscore_data) export(degGeneExpressionMarkers) export(dotPlotMet) export(dualLabeling) export(filterQC) export(filterSeuratObjectByMetadata) export(heatmapSC) +export(launch_module_score_app) export(modScore) export(nameClusters) export(object) @@ -21,7 +25,7 @@ export(processRawData) export(reclusterFilteredSeuratObject) export(reclusterSeuratObject) export(tSNE3D) -export(violinPlot_mod) +export(violinPlot) import(MAST) import(RColorBrewer) import(Seurat) @@ -40,12 +44,14 @@ import(gridExtra) import(harmony) import(httr) import(jsonlite) +import(magrittr) import(parallel) import(plotly) import(quantmod) import(reshape2) import(rlang) import(scales) +import(tibble) import(tidyverse) import(tools) import(utils) @@ -76,6 +82,7 @@ importFrom(dplyr,arrange) importFrom(dplyr,case_when) importFrom(dplyr,desc) importFrom(dplyr,filter) +importFrom(dplyr,group_by) importFrom(dplyr,if_else) importFrom(dplyr,mutate) importFrom(dplyr,mutate_if) @@ -86,14 +93,26 @@ importFrom(dplyr,row_number) importFrom(dplyr,select) importFrom(dplyr,summarise) importFrom(ggExtra,ggMarginal) +importFrom(ggalluvial,geom_flow) importFrom(ggplot2,aes) importFrom(ggplot2,coord_fixed) +importFrom(ggplot2,element_blank) +importFrom(ggplot2,element_text) importFrom(ggplot2,geom_hline) +importFrom(ggplot2,geom_line) importFrom(ggplot2,geom_point) +importFrom(ggplot2,geom_segment) +importFrom(ggplot2,geom_violin) importFrom(ggplot2,geom_vline) importFrom(ggplot2,ggplot) importFrom(ggplot2,ggtitle) +importFrom(ggplot2,guide_legend) +importFrom(ggplot2,guides) +importFrom(ggplot2,scale_color_gradientn) importFrom(ggplot2,scale_color_identity) +importFrom(ggplot2,scale_x_continuous) +importFrom(ggplot2,scale_y_continuous) +importFrom(ggplot2,scale_y_log10) importFrom(ggplot2,scale_y_reverse) importFrom(ggplot2,theme) importFrom(ggplot2,theme_bw) @@ -128,6 +147,7 @@ importFrom(stats,kmeans) importFrom(stats,mad) importFrom(stats,median) importFrom(stats,quantile) +importFrom(stats,setNames) importFrom(stringr,str_replace_all) importFrom(stringr,str_sort) importFrom(stringr,str_split_fixed) diff --git a/R/Compare_Cell_Populations.R b/R/Compare_Cell_Populations.R new file mode 100644 index 0000000..d427394 --- /dev/null +++ b/R/Compare_Cell_Populations.R @@ -0,0 +1,263 @@ +#' @title Compare Cell Populations +#' @description Compare cell population distributions across different groups +#' using bar plots and box plots. Creates visualizations showing cell type +#' frequencies or counts across user-defined groupings. +#' +#' @details This function generates comparative visualizations of cell +#' populations from a Seurat object. It can display data as either frequency +#' percentages or absolute counts, and creates both stacked bar plots +#' (with alluvial flow connections) and grouped box plots for comparison +#' across samples and conditions. +#' +#' @param object A Seurat object containing the single-cell data +#' @param annotation.column Character string specifying the metadata column +#' containing cell type annotations to summarize in the bar plot +#' @param group.column Character string specifying the metadata column +#' defining groups to compare (e.g., treatment conditions) +#' @param sample.column Character string specifying the metadata column +#' containing sample identifiers. Default is "orig.ident" +#' @param counts.type Character string specifying plot data type: +#' "Frequency" (percentages) or "Counts" (absolute numbers). Default is "Frequency" +#' @param group.order Character vector specifying the order of groups in plots. +#' If NULL, uses natural order from data. Default is NULL +#' @param seurat.object.filename Character string for the Seurat object +#' filename. Default is "seurat_object.rds" +#' @param wrap.ncols Integer specifying number of columns for facet wrapping +#' in box plots. Default is 5 +#' +#' @import Seurat +#' @import ggplot2 +#' @import ggpubr +#' @import RColorBrewer +#' @import tibble +#' @import reshape2 +#' @import data.table +#' @import dplyr +#' @import magrittr +#' @import cowplot +#' @import gridExtra +#' @import grid +#' @import scales +#' +#' @importFrom ggalluvial geom_flow +#' @importFrom stats setNames +#' @importFrom grDevices colorRampPalette +#' +#' @export +#' +#' @return A list containing: +#' \itemize{ +#' \item \code{Plots} - A list with two ggplot objects: +#' \itemize{ +#' \item \code{Barplot} - Stacked bar plot with alluvial flows +#' \item \code{Boxplot} - Faceted box plots by cell type (only if counts.type="Frequency") +#' } +#' \item \code{Table} - A data.frame with cell counts and percentages +#' } +#' +#' @examples +#' \dontrun{ +#' # Compare cell populations by treatment group +#' results <- compareCellPopulations( +#' object = seurat_obj, +#' annotation.column = "cell_type", +#' group.column = "treatment", +#' sample.column = "sample_id", +#' counts.type = "Frequency" +#' ) +#' +#' # Display plots +#' plot(results$Plots$Barplot) +#' plot(results$Plots$Boxplot) +#' +#' # View summary table +#' head(results$Table) +#' } + +compareCellPopulations <- function( + object, + annotation.column, + group.column, + sample.column = "orig.ident", + counts.type = "Frequency", + group.order = NULL, + wrap.ncols = 5 +) { + + ## -------------------------------- ## + ## Input Validation ## + ## -------------------------------- ## + + # Validate object + if (!inherits(object, "Seurat")) { + stop("Error: 'object' must be a Seurat object") + } + + # Validate counts.type + if (!counts.type %in% c("Frequency", "Counts")) { + stop("Error: 'counts.type' must be either 'Frequency' or 'Counts'") + } + + ## --------- ## + ## Functions ## + ## --------- ## + + createAnnoTable <- function(SO, AnnoCol, GroupCol) { + ## Extract annotation data for each group using a 2D contingency table + cntMat <- table(SO@meta.data[[AnnoCol]], SO@meta.data[[GroupCol]]) + + # Convert to data frame while preserving row/column names + cntTble <- as.data.frame.matrix(cntMat) + cntTble <- data.frame( + lapply(cntTble, function(x) as.numeric(as.character(x))), + check.names = FALSE, + row.names = rownames(cntTble) + ) + + freqTble <- apply(cntTble, 2, FUN = function(x) { + return(x / sum(x)) + }) + freqTble <- (freqTble * 100) + + outTbl <- merge(cntTble, as.data.frame(freqTble), + by = 'row.names', + suffixes = c('_CellCounts', '_Percent')) + outTbl <- dplyr::rename(outTbl, 'Clusters' = "Row.names") + + return(list( + 'CellFreq' = freqTble, + 'CellCounts' = cntTble, + 'OutTable' = outTbl + )) + } + + ## --------------- ## + ## Main Code Block ## + ## --------------- ## + + # Replace dots with underscores in column names + colnames(object@meta.data) <- gsub("\\.", "_", colnames(object@meta.data)) + + # Update column names if they were modified + annotation.column <- gsub("\\.", "_", annotation.column) + group.column <- gsub("\\.", "_", group.column) + sample.column <- gsub("\\.", "_", sample.column) + + + # Validate metadata columns exist + required.cols <- c(annotation.column, group.column, sample.column) + missing.cols <- setdiff(required.cols, colnames(object@meta.data)) + if (length(missing.cols) > 0) { + stop("Error: The following columns are missing from metadata: ", + paste(missing.cols, collapse = ", ")) + } + + + + + # Set up ordering + ordr <- object@meta.data[[annotation.column]] %>% + unique() %>% + sort() + + if (is.null(group.order)) { + group.order <- unique(object@meta.data[[group.column]]) + } + + # Set up colors + numColors <- max( + length(unique(object@meta.data[[annotation.column]])), + 20 + ) + colpaired <- colorRampPalette(brewer.pal(12, "Paired")) + cols <- c( + "#e6194B", "#3cb44b", "#4363d8", "#f58231", "#911eb4", "#42d4f4", + "#f032e6", "#bfef45", "#fabebe", "#469990", "#e6beff", "#9A6324", + "#800000", "#aaffc3", "#808000", "#000075", + colpaired(numColors) + ) + names(cols) <- ordr + + object@meta.data[[annotation.column]] <- factor( + object@meta.data[[annotation.column]], + levels = ordr + ) + + # Create tables + ColTables <- createAnnoTable(object, annotation.column, group.column) + BoxTables <- createAnnoTable(object, annotation.column, sample.column) + + metaGroups <- object@meta.data[, c(group.column, sample.column)] + rownames(metaGroups) <- NULL + metaGroups <- metaGroups %>% unique() + + ## Create plots based on counts type + if (counts.type == 'Frequency') { + ptbl <- melt(ColTables$CellFreq) + ptblBox <- melt(as.matrix(BoxTables$CellFreq)) + ptblBox <- merge(ptblBox, metaGroups, + by.x = 'Var2', by.y = sample.column, all.x = TRUE) + + labelCol <- 'PerValue' + ylab <- 'Frequency of each cell type (100%)' + } else if (counts.type == "Counts") { + ptbl <- melt(as.matrix(ColTables$CellCounts)) + ptblBox <- melt(as.matrix(BoxTables$CellCounts)) + ptblBox <- merge(ptblBox, metaGroups, + by.x = 'Var2', by.y = sample.column, all.x = TRUE) + + labelCol <- 'value' + ylab <- 'Cell Counts' + } + + # Format bar plot data + ptbl$Var1 <- factor(ptbl$Var1, levels = ordr) + ptbl$value <- round(ptbl$value, 1) + ptbl$PerValue <- paste0(ptbl$value, '%') + ptbl$PerValue <- gsub('^%$', "_", ptbl$PerValue) + ptbl[ptbl$value < 1, 'PerValue'] <- "" + ptbl$Var2 <- factor(ptbl$Var2, levels = group.order) + + # Create bar plot with alluvial flows + p2 <- ptbl %>% + ggplot(aes_string(y = 'value', x = 'Var2', fill = 'Var1', label = labelCol)) + + geom_flow(aes(alluvium = Var1), alpha = .2, + lty = 2, color = "black", + curve_type = "linear", + width = .5) + + geom_col(aes(fill = Var1), width = .5, color = "black") + + geom_text(size = 3, position = position_stack(vjust = 0.5)) + + theme_classic() + + ylab(ylab) + + xlab("") + + scale_x_discrete(guide = guide_axis(angle = 45)) + + scale_fill_manual(annotation.column, values = cols) + + # Create box plot + ptblBox$value <- round(ptblBox$value, 1) + ptblBox$PerValue <- paste0(ptblBox$value, '%') + ptblBox$PerValue <- gsub('^%$', "_", ptblBox$PerValue) + ptblBox[ptblBox$value < 1, 'PerValue'] <- "" + ptblBox[[group.column]] <- factor(ptblBox[[group.column]], levels = group.order) + + p2_Box <- ptblBox %>% + ggboxplot(y = 'value', x = group.column, add = "jitter", color = "Var1") + + facet_wrap(~Var1, ncol = wrap.ncols, scales = 'fixed') + + ylab(ylab) + + xlab("") + + theme(legend.title = element_blank()) + + # Return results + result <- list( + 'Plots' = list('Barplot' = p2, 'Boxplot' = p2_Box), + 'Table' = ColTables$OutTable + ) + + return(result) +} + +# Add global variables to avoid R CMD check NOTEs +utils::globalVariables(c( + "Var1", "Var2", "value", "PerValue", "alluvium", + ".", "CellFreq", "CellCounts", "OutTable" +)) diff --git a/R/ModuleScoreHelpers.R b/R/ModuleScoreHelpers.R index 539b6d9..462e582 100644 --- a/R/ModuleScoreHelpers.R +++ b/R/ModuleScoreHelpers.R @@ -1,12 +1,14 @@ #' @title Helpers for ModuleScore Shiny app #' @description Precompute module scores per celltype and build plots from cached data. +#' @name modscore-imports #' @keywords internal #' @importFrom dplyr mutate group_by summarise arrange select #' @importFrom ggplot2 ggplot aes theme_bw theme element_blank -#' @importFrom geom_point scale_color_gradientn guides guide_legend -#' @importFrom xlab ylab element_text geom_violin theme_classic geom_hline -#' @importFrom scale_y_continuous geom_line geom_segment scale_y_log10 scale_x_continuous -#' @importFrom gridExtra arrangeGrob grid textGrob gpar +#' @importFrom ggplot2 geom_point scale_color_gradientn guides guide_legend +#' @importFrom ggplot2 xlab ylab element_text geom_violin theme_classic geom_hline +#' @importFrom ggplot2 scale_y_continuous geom_line geom_segment scale_y_log10 scale_x_continuous +#' @importFrom gridExtra arrangeGrob +#' @importFrom grid textGrob gpar NULL #' @export diff --git a/R/ModuleScoreHelpers_011726.R b/R/ModuleScoreHelpers_011726.R index 539b6d9..6a76384 100644 --- a/R/ModuleScoreHelpers_011726.R +++ b/R/ModuleScoreHelpers_011726.R @@ -1,12 +1,14 @@ #' @title Helpers for ModuleScore Shiny app #' @description Precompute module scores per celltype and build plots from cached data. +#' @name modscore-imports-011726 #' @keywords internal #' @importFrom dplyr mutate group_by summarise arrange select #' @importFrom ggplot2 ggplot aes theme_bw theme element_blank -#' @importFrom geom_point scale_color_gradientn guides guide_legend -#' @importFrom xlab ylab element_text geom_violin theme_classic geom_hline -#' @importFrom scale_y_continuous geom_line geom_segment scale_y_log10 scale_x_continuous -#' @importFrom gridExtra arrangeGrob grid textGrob gpar +#' @importFrom ggplot2 geom_point scale_color_gradientn guides guide_legend +#' @importFrom ggplot2 xlab ylab element_text geom_violin theme_classic geom_hline +#' @importFrom ggplot2 scale_y_continuous geom_line geom_segment scale_y_log10 scale_x_continuous +#' @importFrom gridExtra arrangeGrob +#' @importFrom grid textGrob gpar NULL #' @export diff --git a/R/Violin_Plots_by_Metadata.R b/R/Violin_Plots_by_Metadata.R index 3a0f580..290f78c 100644 --- a/R/Violin_Plots_by_Metadata.R +++ b/R/Violin_Plots_by_Metadata.R @@ -26,7 +26,7 @@ #' @export #' @examples #' \dontrun{ -#' violinPlot_mod( +#' violinPlot( #' object = seurat, #' assay = "SCT", #' layer = "data", @@ -41,7 +41,7 @@ #' @return violin ggplot2 object -violinPlot_mod <- function (object, +violinPlot <- function (object, assay, layer, genes, @@ -184,5 +184,5 @@ violinPlot_mod <- function (object, g <- g + geom_jitter(size = jitter_dot_size, shape = 1, position = position_dodge(width = 0.9), alpha = 0.5) } - return(g) + return(list(plots=g)) } diff --git a/README.md b/README.md index f2e1e03..34311d6 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,22 @@ R package for Single Cell analysis The Single Cell Workflow streamlines the analysis of multimodal Single Cell RNA-Seq data produced from 10x Genomics. It can be run in a docker container, and for biologists, in user-friendly web-based interactive notebooks (NIDAP, Palantir Foundry). Much of it is based on the Seurat workflow in Bioconductor, and supports CITE-Seq data. It incorporates a cell identification step (ModScore) that utilizes module scores obtained from Seurat and also includes Harmony for batch correction. +## Key Functions + +### Sequential Workflow +1. **processRawData()** - Process H5 files into Seurat objects +2. **filterQC()** - Quality control and filtering +3. **combineNormalize()** - Merge samples, normalize, dimension reduction +4. **Harmony integration** (optional) - Batch correction +5. **annotateCellTypes()** - Automatic cell type annotation via SingleR + +### Analysis & Visualization +- **compareCellPopulations()** - Compare cell population distributions across groups +- **degGeneExpressionMarkers()** - Differential expression analysis +- **reclusterSeuratObject()** / **reclusterFilteredSeuratObject()** - Subset and re-cluster +- **colorByGene()**, **heatmapSC()**, **violinPlot()** - Visualization functions +- **plotMetadata()**, **dotPlotMet()** - Metadata visualization +
For further documentation see our detailed [Docs Website](https://nidap-community.github.io/SCWorkflow/) diff --git a/docs/CHANGELOG.html b/docs/CHANGELOG.html index cdf2cd9..d7caeea 100644 --- a/docs/CHANGELOG.html +++ b/docs/CHANGELOG.html @@ -44,6 +44,19 @@

CHANGELOG

+
+

v1.0.3 (in development)

+
+

Feature

+
  • feat: Add compareCellPopulations() function for comparing cell population distributions across experimental groups +
    • Visualizes cell population frequencies or absolute counts across multiple groups
    • +
    • Generates alluvial flow bar plots and faceted box plots
    • +
    • Supports custom group ordering and color palettes
    • +
    • Added ggalluvial dependency for flow visualizations
    • +
    • Generated from JSON template using json2r.prompt.md instructions
    • +
  • +
+

v1.0.2 (2024-02-01)

@@ -74,7 +87,7 @@

Documentation8b5cc98)

-

Feature

+

Feature

  • feat: Update test-annotation to supress warnings (3d5cf8f)

  • feat: test (4c4cee7)

  • feat: test (c8274f9)

  • diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 5519705..f0f2a04 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -42,27 +42,8 @@

    License

-
MIT License
-
-Copyright (c) 2024 NIDAP Community
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+
YEAR: 2024
+COPYRIGHT HOLDER: NIDAP Community
 
diff --git a/docs/LICENSE.html b/docs/LICENSE.html new file mode 100644 index 0000000..61736ff --- /dev/null +++ b/docs/LICENSE.html @@ -0,0 +1,71 @@ + +NA • SCWorkflow + Skip to contents + + +
+
+
+ + +

MIT License

+

Copyright (c) 2024 NIDAP Community

+

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

+

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

+

THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

+ + +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/articles/CONTRIBUTING.html b/docs/articles/CONTRIBUTING.html new file mode 100644 index 0000000..31a76ec --- /dev/null +++ b/docs/articles/CONTRIBUTING.html @@ -0,0 +1,526 @@ + + + + + + + +Contributing to SCWorkflow • SCWorkflow + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Overview +

+

+


+
+
+

+Propose Change +

+
+
+

+Clone the repo +

+

If you are a member of CCBR, +you can clone this repository to your computer or development +environment.

+


+

SCWorkflow is a large repository so this may take a few minutes.

+
git clone --single-branch --branch DEV https://github.com/NIDAP-Community/SCWorkflow.git
+
+

Cloning into ‘SCWorkflow’…
remote: Enumerating objects: 3126, +done.
remote: Counting objects: 100% (734/734), done.
remote: +Compressing objects: 100% (191/191), done.
remote: Total 3126 +(delta 630), reused 545 (delta 543), pack-reused 2392 (from 1)
+Receiving objects: 100% (3126/3126), 1.04 GiB | 4.99 MiB/s, done.
+Resolving deltas: 100% (1754/1754), done.
Updating files: 100% +(306/306), done.

+
+
cd SCWorkflow
+


+
+
+

+Install dependencies +

+

If this is your first time cloning the repo you may have to install +dependencies

+ +

Check R CMD: In an R console, make sure the package +passes R CMD check by running:

+
+   devtools::check()
+
+

⚠️ Note: If R CMD check doesn’t pass cleanly, it’s a +good idea to ask for help before continuing.

+
+ +


+
+
+

+Load SCWorkflow from repo +

+

In an R console, load the package from the local repo using:

+
+devtools::load_all()
+


+
+
+

+Create branch +

+

Create a Git branch for your pull request (PR). Give the branch a +descriptive name for the changes you will make.

+

Example: Use iss-10 if it’s for a +specific issue, or feature-new-plot for a new feature.

+

For bug fixes or small changes, you can branch from the +main branch.

+
# Create a new branch from main and switch to it
+git branch iss-10
+git switch iss-10
+
+

Success: Switched to a new branch ‘iss-10’

+
+

For new features or larger changes, branch from the DEV +branch.

+
# Switch to DEV branch, create a new branch, and switch to new branch
+git switch DEV
+git branch feature-new-plot
+git switch feature-new-plot
+
+

Success: Switched to a new branch +‘feature-new-plot’

+
+



+
+
+
+

Develop +

+
+
+

+Make your changes +

+

Now you’re ready to edit the code, write unit tests, and update the +documentation as needed.

+


+
+

+Code Style Guidelines +

+

New code should follow the general guidelines outlined here. +- Important: Don’t restyle code unrelated to your +PR

+

Tools to help: - Use the styler package to +apply these styles

+

Key conventions from the tidyverse style +guide:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ElementStyleExample
Variablessnake_casemy_variable
Functionsverbs in camelCaseprocessData()
Assignment +<- operatorx <- 5
Operationspipesdata %>% filter() %>% mutate()
+


+
+
+

+Function Organization +

+

Structure your functions like this:

+

Functions should follow this template. Use roxygen2 for +documentation:

+
+#' @title Function Title
+#' @description Brief description of what the function does
+#' @param param1 Description of first parameter
+#' @param param2 Description of second parameter
+#' @details Additional details if needed
+#' @importFrom package function_name
+#' @export
+#' @return Description of what the function returns
+
+yourFunction <- function(param1, param2) {
+  
+  ## --------- ##
+  ## Functions ##
+  ## --------- ##
+  
+  ## --------------- ##
+  ## Main Code Block ##
+  ## --------------- ##
+  
+  output_list <- list(
+    object = SeuratObject,
+    plots = list(
+      'plotTitle1' = p1,
+      'plotTitle2' = p2
+    ),
+    data = list(
+      'dataframeTitle' = df1
+    )
+  )
+  
+  return(output_list)
+}
+


+
+
+
+

+Commit and Push Your Changes +

+

Best practices for commits:

+

We recommend following the “atomic commits” +principle where each commit contains one new feature, fix, or task.

+

Learn more: Atomic +Commits Guide

+


+
+
+

+Step-by-Step Process: +

+
+

1️⃣ Check Status +

+

Check the current state of your Git working directory and staging +area:

+
    git status
+
+
+

2️⃣ Stage Files +

+

Add the files that you changed to the staging area:

+
    git add path/to/changed/files/
+
+
+

3️⃣ Make the Commit +

+
    git commit -m 'feat: create function for awesome feature'
+

Your commit message should follow the Conventional +Commits specification. Briefly, each commit should start with one of +the approved types such as feat, fix, +docs, etc. followed by a description of the commit. Take a +look at the Conventional +Commits specification for more detailed information about how to +write commit messages.

+ +
+
+

4️⃣ Push your changes to GitHub: +

+
   git push
+

If this is the first time you are pushing this branch, you may have +to explicitly set the upstream branch:

+
   git push --set-upstream origin iss-10
+

We recommend pushing your commits often so they will be backed up on +GitHub. You can view the files in your branch on GitHub at +https://github.com/NIDAP-Community/SCWorkflow/tree/<your-branch-name> +(replace <your-branch-name> with the actual name of +your branch).

+



+
+
+
+
+

Document and Tests +

+
+
+

+Writing Tests +

+

Why tests matter: Most changes to the code will also +need unit tests to demonstrate that the changes work as intended.

+

How to add tests:

+
    +
  1. Use testthat +to create your unit tests
  2. +
  3. Follow the organization described in the tidyverse test style +guide +
  4. +
  5. Look at existing code in this package for examples
  6. +
+


+
+
+

+Documentation +

+

When to update documentation:

+
    +
  • Written a new function
  • +
  • Changed the API of an existing function
  • +
  • Function is used in a vignette
  • +
+

How to update documentation:

+
    +
  1. Use roxygen2 with Markdown +syntax +
  2. +
  3. See the R Packages book +for detailed instructions
  4. +
  5. Update relevant vignettes if needed
  6. +
+


+
+
+

+Check Your Work +

+

🔍 Final validation step:

+

After making your changes, run the following command from an R +console to make sure the package still passes R CMD check:

+
+devtools::check()
+
+

Goal: All checks should pass with no errors, +warnings, or notes.

+
+



+
+
+
+

+Deploy Feature +

+
+
+

1️⃣ Create the PR +

+

Once your branch is ready, create a PR on GitHub: https://github.com/NIDAP-Community/SCWorkflow/pull/new/

+

Select the branch you just pushed:

+
+Create a new PR from your branch
Create a new PR from your branch
+
+

Edit the PR title and description. The title should briefly describe +the change. Follow the comments in the template to fill out the body of +the PR, and you can delete the comments (everything between +<!-- and -->) as you go. When you’re +ready, click ‘Create pull request’ to open it.

+
+Open the PR after editing the title and description
Open the PR after editing the title and +description
+
+

Optionally, you can mark the PR as a draft if you’re not yet ready +for it to be reviewed, then change it later when you’re ready.

+
+
+

2️⃣ Wait for a maintainer to review your PR +

+

We will do our best to follow the tidyverse code review principles: +https://code-review.tidyverse.org/. The reviewer may +suggest that you make changes before accepting your PR in order to +improve the code quality or style. If that’s the case, continue to make +changes in your branch and push them to GitHub, and they will appear in +the PR.

+

Once the PR is approved, the maintainer will merge it and the +issue(s) the PR links will close automatically. Congratulations and +thank you for your contribution!

+
+
+

3️⃣ After your PR has been merged +

+

After your PR has been merged, update your local clone of the repo by +switching to the DEV branch and pulling the latest changes:

+
   git checkout DEV
+   git pull
+

It’s a good idea to run git pull before creating a new +branch so it will start from the most recent commits in main.

+



+
+
+
+ + +
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/Intro.html b/docs/articles/Intro.html new file mode 100644 index 0000000..91ada33 --- /dev/null +++ b/docs/articles/Intro.html @@ -0,0 +1,167 @@ + + + + + + + + • SCWorkflow + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + + +
+

SCWorkflow +

+

The CCBR Single-cell RNA-seq Package (SCWorkflow) allows users to +analyze their own single-cell RNA-seq datasets starting from CellRanger +output files (H5 or mtx files, etc.).

+
+

Installation +

+
+

You can install the SCWorkflow package from GitHub +with:

+
+# install.packages("remotes")
+remotes::install_github("NIDAP-Community/SCWorkflow", dependencies = TRUE)
+

There is also a Docker container available at

+
+
+
+

Usage +

+
+

Following this workflow you can perform these steps of a single-cell +RNA-seq analysis, and more:

+
    +
  • +

    Quality Control:

    +
      +
    • Import, Select, & Rename Samples

    • +
    • Filter Cells based on QC metrics

    • +
    • Combine Samples, Cluster, and Normalize your Data

    • +
    • Batch Correction using Harmony

    • +
    +
  • +
  • +

    Cell Annotation:

    +
      +
    • SingleR Automated Annotations

    • +
    • Module Scores

    • +
    • Co-Expression

    • +
    • External Annotations

    • +
    +
  • +
  • +

    Visualizations:

    +
      +
    • Dimensionality Reductions (t-SNE and UMAP Plots) colored by +Marker Expression or by Metadata

    • +
    • Heatmaps

    • +
    • Violin Plots

    • +
    • Trajectory

    • +
    +
  • +
  • +

    Differential Expression Analysis

    +
      +
    • Seurat’s FindMarkers()

    • +
    • Pseudobulk Aggregation

    • +
    • Pathway Analysis

    • +
    +
  • +
+

Please see the introductory +vignette for a quick start tutorial. Take a look at the reference +documentation for detailed information on each function in the +package.

+
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/README.html b/docs/articles/README.html index 3800316..d1bc74f 100644 --- a/docs/articles/README.html +++ b/docs/articles/README.html @@ -5,14 +5,14 @@ - • SCWorkflow +SCWorkflow-Intro • SCWorkflow - + Skip to contents @@ -64,7 +64,7 @@
diff --git a/docs/articles/SCWorkflow-Overview.html b/docs/articles/SCWorkflow-QC.html similarity index 92% rename from docs/articles/SCWorkflow-Overview.html rename to docs/articles/SCWorkflow-QC.html index 3336cb2..c64d5e1 100644 --- a/docs/articles/SCWorkflow-Overview.html +++ b/docs/articles/SCWorkflow-QC.html @@ -18,7 +18,7 @@ Skip to contents -

Combine, Normalize, and Cluster Data

+

This functions combines multiple sample level Seurat Objects into a single Seurat Object and normalizes the combined dataset. The multi-dimensionality of the data will be summarized into a set of @@ -259,13 +266,18 @@

Combine, Normalize, and Cluster Data cell.hashing.data = FALSE )

-



1. Hao Y et al. Integrated analysis of multimodal -single-cell data. Cell. 2021 Jun 24;184(13):3573-3587.e29. doi: +



+
    +
  1. Hao Y et al. Integrated analysis of multimodal single-cell data. +Cell. 2021 Jun 24;184(13):3573-3587.e29. doi: 10.1016/j.cell.2021.04.048. Epub 2021 May 31. PMID: 34062119; PMCID: -PMC8238499. 2. Heumos, L., Schaar, A.C., Lance, C. et al. Best practices -for single-cell analysis across modalities. Nat Rev Genet (2023). https://doi.org/10.1038/s41576-023-00586-w 3. Germain P, -Lun A, Macnair W, Robinson M (2021). “Doublet identification in -single-cell sequencing data using scDblFinder.” f1000research. doi:10.12688/f1000research.73600.1.

    +PMC8238499.

  2. +
  3. Heumos, L., Schaar, A.C., Lance, C. et al. Best practices for +single-cell analysis across modalities. Nat Rev Genet (2023). https://doi.org/10.1038/s41576-023-00586-w

  4. +
  5. Germain P, Lun A, Macnair W, Robinson M (2021). “Doublet +identification in single-cell sequencing data using scDblFinder.” +f1000research. doi:10.12688/f1000research.73600.1.

  6. +
diff --git a/docs/articles/SCWorkflow-Visualizations.html b/docs/articles/SCWorkflow-Visualizations.html index 86abf9b..4db36dc 100644 --- a/docs/articles/SCWorkflow-Visualizations.html +++ b/docs/articles/SCWorkflow-Visualizations.html @@ -216,11 +216,11 @@

Violin Plot from Seurat Object
 
-FigOut=violinPlot_mod(
+FigOut=violinPlot(
                 object=Anno_SO$object, 
                 assay='SCT', 
-                slot='scale.data', 
-                genes=c('Cd163','Cd38'), 
+                layer='scale.data', 
+                genes=c('Itgam','Cd38'), 
                 group='SCT_snn_res.0.4', 
                 facet_by = "", 
                 filter_outliers = F,
@@ -229,7 +229,7 @@ 

Violin Plot from Seurat Object= TRUE, jitter_dot_size = 1 )

-

+


@@ -299,7 +299,7 @@

Heatmap order.heatmap.rows = FALSE, row.order = c() )

-

+


@@ -352,7 +352,53 @@

Dot Plot of Genes by Metadata= FALSE, dot.color = "darkblue" )

-

+

+


+ +
+

Compare Cell Populations +

+
+

This function compares cell population composition across +experimental groups (for example sample, treatment, timepoints, or donor +cohorts) using metadata already stored in the Seurat object. It is +useful after clustering and annotation, when you want to quantify how +specific cell populations shift between conditions. + The function supports both Frequency (percent) and +Counts (absolute cell numbers) modes. In most +biological comparisons with unequal total cell recovery across samples, +frequency mode is preferred for interpretation. Counts mode can be +useful for QC and yield-focused assessments.

+ +

Methodology
+The method first aggregates metadata by annotation and group to compute +percentages and counts. It then links these summaries to sample-level +metadata and generates a composition-focused barplot for sample-level +variability. Together, these plots help distinguish overall +compositional shifts from replicate-level dispersion.

+
+
+FigOut=compareCellPopulations(
+            object=Anno_SO$object,
+            metadata.table=Anno_SO$object@meta.data,
+            annotation.column='immgen_main',
+            group.column='Treatment',
+            counts.type = "Frequency",
+            group.order = NULL,
+            wrap.ncols = 5
+)
+ 
+

+


diff --git a/docs/articles/images/DEV_CheatSheet.png b/docs/articles/images/DEV_CheatSheet.png new file mode 100644 index 0000000..ea888ee Binary files /dev/null and b/docs/articles/images/DEV_CheatSheet.png differ diff --git a/docs/articles/images/MS2.png b/docs/articles/images/MS2.png new file mode 100644 index 0000000..46e88d4 Binary files /dev/null and b/docs/articles/images/MS2.png differ diff --git a/docs/articles/images/MS3.png b/docs/articles/images/MS3.png new file mode 100644 index 0000000..b447d4d Binary files /dev/null and b/docs/articles/images/MS3.png differ diff --git a/docs/articles/images/SubRec_recl.png b/docs/articles/images/SubRec_recl.png new file mode 100644 index 0000000..4e993a7 Binary files /dev/null and b/docs/articles/images/SubRec_recl.png differ diff --git a/docs/articles/images/SubRec_sub2.png b/docs/articles/images/SubRec_sub2.png new file mode 100644 index 0000000..09a9ca9 Binary files /dev/null and b/docs/articles/images/SubRec_sub2.png differ diff --git a/docs/articles/images/Vis_3D.html b/docs/articles/images/Vis_3D.html new file mode 100644 index 0000000..1313685 --- /dev/null +++ b/docs/articles/images/Vis_3D.html @@ -0,0 +1,1961 @@ + + + + +plotly + + + + + + + + + + + + +
+
+
+ + + + diff --git a/docs/articles/images/Vis_CBG.png b/docs/articles/images/Vis_CBG.png new file mode 100644 index 0000000..e48f85c Binary files /dev/null and b/docs/articles/images/Vis_CBG.png differ diff --git a/docs/articles/images/Vis_CBM.png b/docs/articles/images/Vis_CBM.png new file mode 100644 index 0000000..d98c553 Binary files /dev/null and b/docs/articles/images/Vis_CBM.png differ diff --git a/docs/articles/images/Vis_CCPbar.png b/docs/articles/images/Vis_CCPbar.png new file mode 100644 index 0000000..231e5ad Binary files /dev/null and b/docs/articles/images/Vis_CCPbar.png differ diff --git a/docs/articles/images/Vis_CCPbox.png b/docs/articles/images/Vis_CCPbox.png new file mode 100644 index 0000000..94c402c Binary files /dev/null and b/docs/articles/images/Vis_CCPbox.png differ diff --git a/docs/articles/images/Vis_DPM.png b/docs/articles/images/Vis_DPM.png new file mode 100644 index 0000000..e39d493 Binary files /dev/null and b/docs/articles/images/Vis_DPM.png differ diff --git a/docs/articles/images/Vis_HM.png b/docs/articles/images/Vis_HM.png new file mode 100644 index 0000000..cb4f360 Binary files /dev/null and b/docs/articles/images/Vis_HM.png differ diff --git a/docs/articles/images/Vis_Violin.png b/docs/articles/images/Vis_Violin.png new file mode 100644 index 0000000..438ebaf Binary files /dev/null and b/docs/articles/images/Vis_Violin.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index a482a11..25cafbf 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -64,7 +64,7 @@

Developer

Getting Started
-
UNKNOWN TITLE
+
SCWorkflow-Intro