Skip to contents

Introduction

NMFscape provides fast non-negative matrix factorization (NMF) for SingleCellExperiment and SpatialExperiment objects using the high-performance RcppML backend. NMF is useful for dimensionality reduction, feature extraction, and identifying patterns in genomic data.

Installation

# From Bioconductor (when available)
if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install("NMFscape")

Quick Start

library(NMFscape)
#> Warning: replacing previous import 'BiocGenerics::sd' by 'stats::sd' when
#> loading 'NMFscape'
library(scuttle)

Basic Usage with SingleCellExperiment

# Create example data
sce <- mockSCE(ngenes = 1000, ncells = 200)
sce <- logNormCounts(sce)

# Run NMF with 10 factors
sce <- runNMFscape(sce, k = 10, verbose = FALSE)
#> 
#> Attaching package: 'Matrix'
#> The following object is masked from 'package:S4Vectors':
#> 
#>     expand

# Access results
nmf_coords <- reducedDim(sce, "NMF")
dim(nmf_coords)
#> [1] 200  10

# Get basis matrix (gene loadings)
basis <- getBasis(sce)
dim(basis)
#> [1] 1000   10

# Get top features for each factor
top_genes <- getTopFeatures(sce, n = 5)
head(top_genes, 2)
#> $NMF_1
#> [1] "Gene_0713" "Gene_0185" "Gene_0195" "Gene_0413" "Gene_0901"
#> 
#> $NMF_2
#> [1] "Gene_0880" "Gene_0397" "Gene_0869" "Gene_0019" "Gene_0690"

Advanced Usage

# Use different assay and parameters
sce <- runNMFscape(sce, k = 15, assay = "logcounts", 
              name = "NMF_15", L1 = c(0.01, 0.01), verbose = FALSE)

# Multiple NMF results can be stored
reducedDimNames(sce)
#> [1] "NMF"    "NMF_15"

Consensus NMF

For more robust results, use consensus NMF which runs multiple NMF iterations and combines results:

# Run consensus NMF across a range of k values
sce <- runConsensusNMF(sce, k_range = 8:12, n_runs = 20, verbose = FALSE)

# Get stability metrics to see optimal k selection
stability <- getStabilityMetrics(sce)
print(stability)
#>    k    stability silhouette reproducibility cophenetic_correlation
#> 1  8 0.0080974843  0.5144750      0.26250000                      1
#> 2  9 0.0003724395  0.5150000      0.08333333                      1
#> 3 10 0.0008040201  0.4000237      0.10500000                      1
#> 4 11 0.0009962640  0.4037430      0.11818182                      1
#> 5 12 0.0004184100  0.3114126      0.08333333                      1

# Access consensus results
optimal_k <- getOptimalK(sce)
geps <- getConsensusGEPs(sce)
usage <- getGEPUsage(sce)

# Get top genes for each program
top_genes <- getTopGEPFeatures(sce, n = 10)
head(top_genes, 2)
#> $cNMF_1
#>  [1] "Gene_0386" "Gene_0332" "Gene_0006" "Gene_0693" "Gene_0692" "Gene_0632"
#>  [7] "Gene_0215" "Gene_0741" "Gene_0017" "Gene_0283"
#> 
#> $cNMF_2
#>  [1] "Gene_0690" "Gene_0185" "Gene_0167" "Gene_0713" "Gene_0651" "Gene_0528"
#>  [7] "Gene_0811" "Gene_0441" "Gene_0221" "Gene_0387"

Visualization

# Plot stability metrics (requires ggplot2)
if (requireNamespace("ggplot2", quietly = TRUE)) {
  plotStability(sce)
  plotGEPs(sce, programs = 1:3, n_genes = 15)
}

Session Information

sessionInfo()
#> R version 4.5.1 (2025-06-13)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.3 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
#>  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
#>  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
#> [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
#> 
#> time zone: UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats4    stats     graphics  grDevices utils     datasets  methods  
#> [8] base     
#> 
#> other attached packages:
#>  [1] Matrix_1.7-3                scuttle_1.18.0             
#>  [3] NMFscape_0.99.0             SingleCellExperiment_1.30.1
#>  [5] SummarizedExperiment_1.38.1 Biobase_2.68.0             
#>  [7] GenomicRanges_1.60.0        GenomeInfoDb_1.44.2        
#>  [9] IRanges_2.42.0              S4Vectors_0.46.0           
#> [11] MatrixGenerics_1.20.0       matrixStats_1.5.0          
#> [13] BiocGenerics_0.54.0         generics_0.1.4             
#> [15] BiocStyle_2.36.0           
#> 
#> loaded via a namespace (and not attached):
#>  [1] tidyselect_1.2.1        blob_1.2.4              dplyr_1.1.4            
#>  [4] farver_2.1.2            Biostrings_2.76.0       S7_0.2.0               
#>  [7] fastmap_1.2.0           bluster_1.18.0          digest_0.6.37          
#> [10] rsvd_1.0.5              RcppML_0.3.7            lifecycle_1.0.4        
#> [13] cluster_2.1.8.1         KEGGREST_1.48.1         statmod_1.5.0          
#> [16] RSQLite_2.4.3           magrittr_2.0.3          compiler_4.5.1         
#> [19] rlang_1.1.6             sass_0.4.10             tools_4.5.1            
#> [22] igraph_2.1.4            yaml_2.3.10             knitr_1.50             
#> [25] S4Arrays_1.8.1          dqrng_0.4.1             bit_4.6.0              
#> [28] DelayedArray_0.34.1     RColorBrewer_1.1-3      abind_1.4-8            
#> [31] BiocParallel_1.42.1     desc_1.4.3              grid_4.5.1             
#> [34] beachmat_2.24.0         edgeR_4.6.3             ggplot2_4.0.0          
#> [37] scales_1.4.0            cli_3.6.5               rmarkdown_2.29         
#> [40] crayon_1.5.3            ragg_1.5.0              metapod_1.16.0         
#> [43] httr_1.4.7              DBI_1.2.3               cachem_1.1.0           
#> [46] parallel_4.5.1          AnnotationDbi_1.70.0    BiocManager_1.30.26    
#> [49] XVector_0.48.0          vctrs_0.6.5             jsonlite_2.0.0         
#> [52] bookdown_0.44           BiocSingular_1.24.0     BiocNeighbors_2.2.0    
#> [55] bit64_4.6.0-1           irlba_2.3.5.1           systemfonts_1.2.3      
#> [58] locfit_1.5-9.12         limma_3.64.3            jquerylib_0.1.4        
#> [61] glue_1.8.0              pkgdown_2.1.3           codetools_0.2-20       
#> [64] gtable_0.3.6            UCSC.utils_1.4.0        ScaledMatrix_1.16.0    
#> [67] tibble_3.3.0            pillar_1.11.0           htmltools_0.5.8.1      
#> [70] GenomeInfoDbData_1.2.14 R6_2.6.1                textshaping_1.0.3      
#> [73] evaluate_1.0.5          lattice_0.22-7          png_0.1-8              
#> [76] pheatmap_1.0.13         memoise_2.0.1           scran_1.36.0           
#> [79] bslib_0.9.0             Rcpp_1.1.0              SparseArray_1.8.1      
#> [82] xfun_0.53               fs_1.6.6                pkgconfig_2.0.3