Performing differential expression analysis adjusting for cell proportion differences, and other covariates using a additive model.

run_de(
  bulk,
  prop = NULL,
  sampleinfo,
  control = NULL,
  case = NULL,
  de_method = c("edgeR", "DESeq2", "limma_voom", "limma"),
  padj_method = "BH",
  ...
)

Arguments

bulk

a matrix-like object of gene expression values with rows representing genes, columns representing samples

prop

a matrix-like object of cell proportion values with rows representing cell types, columns representing samples. Default to NULL, not adjust for cell proportion.

sampleinfo

a data.frame of metadata for the samples. Rows represents samples; columns represents covariates to adjust for. The first column of sampleinfo should contains group information for differential analysis.

control

a character value indicating the control group in sampleinfo. Set to NULL to perform ANOVA-like analysis.

case

a character value indicating the case group in sampleinfo. Set to NULL to perform ANOVA-like analysis.

de_method

a character value indicating the method to use for testing differential expression. Should be one of "edgeR", "DESeq2", "limma_voom", "limma"

padj_method

method for adjusting multiple hypothesis testing. Default to "BH". See p.adjust for more details.

...

parameters pass to DE methods.

Value

a list of two elements:

  1. a data.frame containing normalized gene expression data.

  2. a data.frame containing detailed differential expression statistics. Columns represent "Gene name", "log2 fold change", "log2 average expression", "p value", "adjusted p value" respectively

Details

To perform ANOVA like analysis (differences between any groups), set control & case options to NULL and choose one of the following methods: edgeR, limma_voom or limma. DESeq2 does not provide direct support for this type of comparison.

Examples

if (FALSE) {
ref_list <- c(paste0(system.file("extdata", package = "SCdeconR"), "/refdata/sample1"),
              paste0(system.file("extdata", package = "SCdeconR"), "/refdata/sample2"))
phenopath1 <- paste0(system.file("extdata", package = "SCdeconR"),
"/refdata/phenodata_sample1.txt")
phenopath2 <- paste0(system.file("extdata", package = "SCdeconR"),
"/refdata/phenodata_sample2.txt")
phenodata_list <- c(phenopath1,phenopath2)

# construct integrated reference using harmony algorithm
refdata <- construct_ref(ref_list = ref_list,
                      phenodata_list = phenodata_list,
                      data_type = "cellranger",
                      method = "harmony",
                      group_var = "subjectid",
                      nfeature_rna = 50,
                      vars_to_regress = "percent_mt", verbose = FALSE)
phenodata <- data.frame(cellid = colnames(refdata),
                        celltypes = refdata$celltype,
                        subjectid = refdata$subjectid)
## construct a vector with same proportions across cell types
prop1 <- data.frame(celltypes = unique(refdata$celltype),
                   proportion = rep(0.125, 8))
## simulate 20 bulk samples based on specified cell type proportion
bulk_sim1 <- bulk_generator(ref = GetAssayData(refdata, slot = "data", assay = "SCT"),
                            phenodata = phenodata,
                            num_mixtures = 20,
                            prop = prop1, replace = TRUE)
## generate another vector with high proportion for a certian cell type
prop2 <- data.frame(celltypes = unique(refdata$celltype),
                    proportion = c(0.8, 0.1, 0.1, rep(0, 5)))
bulk_sim2 <- bulk_generator(ref = GetAssayData(refdata, slot = "data", assay = "SCT"),
                            phenodata = phenodata,
                            num_mixtures = 20,
                            prop = prop2, replace = TRUE)
## compare data for differential analysis
bulk_sim <- list(cbind(bulk_sim1[[1]], bulk_sim2[[1]]),
                 cbind(bulk_sim1[[2]], bulk_sim2[[2]]))
## force to be integer for DE purposes
bulk <- round(bulk_sim[[1]], digits=0)
colnames(bulk) <- paste0("sample", 1:ncol(bulk))
## predict cell type proportions using "OLS" algorithm
decon_res <- scdecon(bulk = bulk,
                     ref = GetAssayData(refdata, slot = "data", assay = "SCT"),
                     phenodata = phenodata,
                     filter_ref = TRUE,
                     decon_method = "OLS",
                     norm_method = "none",
                     trans_method = "none",
                     marker_strategy = "all")
## create sampleinfo
sampleinfo <- data.frame(condition = rep(c("group1", "group2"), each =20))
row.names(sampleinfo) <- colnames(bulk)
library(DESeq2)
deres <- run_de(bulk = bulk,
               prop = decon_res[[1]],
               sampleinfo = sampleinfo,
               control = "group1",
               case = "group2",
               de_method = "edgeR")

 ## run differential analysis without adjusting for cell proportion differences
 deres_notadjust <- run_de(bulk = bulk,
                           prop = NULL,
                           sampleinfo = sampleinfo,
                           control = "group1",
                           case = "group2",
                           de_method = "edgeR")

## scatter plot to compare the effect of adjusting cell proportion differences
comparedeg_scatter(results1 = deres[[2]],
                   results2 = deres_notadjust[[2]],
                   result_names = c("adjust for cell proportion", "not adjust for cell proportion"),
                   fc_cutoff = 1.5,
                   pval_cutoff = 0.05,
                   pvalflag = TRUE,
                   interactive = T)

## generate cell-type specific gene expression
ct_exprs_list <- celltype_expression(bulk = bulk,
                                     ref = GetAssayData(refdata, slot = "data", assay = "SCT"),
                                     phenodata = phenodata,
                                     prop = decon_res[[1]],
                                     UMI_min = 0,
                                     CELL_MIN_INSTANCE = 1)


}