1+ # ' @title Perform CTS-DEG analysis
2+ # '
3+ # ' @description Analysis the differential expression genes for each cell type
4+ # '
5+ # ' @param object ENIGMA object
6+ # ' @param FDR_control
7+ # ' if use Wang et al., FDR controlled DEG analysis model. Default: TRUE
8+ # '
9+ # ' @param FoldChange
10+ # ' if output the FoldChange of each gene, if FALSE, then output the expression difference
11+ # '
12+ # ' @param covariate
13+ # ' The data.frame object contains the covariate information of each sample
14+ # '
15+ # ' @return A list object contain the DEG results of each cell types
16+ # '
17+ # '
18+ # ' @examples
19+ # ' \dontrun{
20+ # ' DEG = FindCSE_DEG(object,y)
21+ # ' DEG = FindCSE_DEG(object,y,covariate=covariate)
22+ # ' head(DEG$celltype1)
23+ # ' }
24+ # '
25+ # ' @reference
26+ # ' Wang J, Roeder K, Devlin B. Bayesian estimation of cell type–specific gene expression with prior derived from single-cell data[J]. Genome research, 2021, 31(10): 1807-1818.
27+ # '
28+ # ' @export
29+ FindCSE_DEG <- function (object ,y ,FDR_control = TRUE ,covariate = NULL ,FoldChange = FALSE ){
30+ # ###
31+ # identify the DEG of ENIGMA outputs require normalized profile
32+ if (is.null(object @ result_CSE_normalized )){
33+ stop(' CTS-DEG analysis required normalized profile!' )
34+ }
35+ # convert sce object into 3-order array
36+ Exp = sce2array(object )
37+ cellName = dimnames(Exp )[[3 ]]
38+ DEG_list = list ()
39+ if (FDR_control ){result <- DEG_test(Exp ,y ,covariate )}else {result <- DEG_test2(Exp ,y ,covariate )}
40+ ES_m <- EffectiveSize(Exp ,y ,FoldChange )
41+ for (i in cellName ){
42+ Tab_m <- cbind(ES_m [,i ],result $ pval [,i ],result $ qval [,i ])
43+ if (FoldChange ){colnames(Tab_m ) <- c(" FoldChange" ," pvalue" ," qvalue" )}else {
44+ colnames(Tab_m ) <- c(" ExpressionDifference" ," pvalue" ," qvalue" )
45+ }
46+ DEG_list [[i ]] <- Tab_m
47+ }
48+ DEG_list
49+ }
50+
51+ EffectiveSize <- function (X_array ,y ,FoldChange = FoldChange ){
52+ if (FoldChange ){
53+ # ## convert all profile into pseudo positive
54+ X_array [X_array < 0 ] <- 0
55+ ES_m <- NULL
56+ nCell = dim(X_array )[3 ]
57+ cellName = dimnames(X_array )[[3 ]]
58+ for (i in 1 : nCell ){
59+ G = X_array [,,i ]
60+ FC <- apply(G ,1 ,FCcall ,y = y )
61+ ES_m = cbind(ES_m ,FC )
62+ }
63+ colnames(ES_m ) <- cellName
64+ }else {
65+ ES_m <- NULL
66+ nCell = dim(X_array )[3 ]
67+ cellName = dimnames(X_array )[[3 ]]
68+ for (i in 1 : nCell ){
69+ G = X_array [,,i ]
70+ FC <- apply(G ,1 ,DEcall ,y = y )
71+ ES_m = cbind(ES_m ,FC )
72+ }
73+ colnames(ES_m ) <- cellName
74+ }
75+ ES_m
76+ }
77+
78+ FCcall <- function (g ,y ){
79+ mean(g [y == 1 ])/ mean(g [y == 0 ])
80+ }
81+
82+ DEcall <- function (g ,y ){
83+ mean(g [y == 1 ]) - mean(g [y == 0 ])
84+ }
85+
86+
87+ DEG_test <- function (X_array ,y ,covariate = NULL ){
88+ O = array (0 ,
89+ dim = c( dim(X_array )[1 ],
90+ dim(X_array )[3 ],
91+ dim(X_array )[2 ]),
92+ dimnames = list ( dimnames(X_array )[[1 ]],
93+ dimnames(X_array )[[3 ]],
94+ dimnames(X_array )[[2 ]])
95+ )
96+ for (i in 1 : dim(X_array )[3 ]){
97+ O [,i ,] <- X_array [,,i ]
98+ }
99+ # ##Using ANOVA+glm model to evaluate prediction performance
100+ result <- test(O ,y ,covariate )
101+ pval <- t(result $ pval )
102+ qval <- t(result $ qval )
103+ colnames(qval ) <- colnames(pval ) <- dimnames(X_array )[[3 ]]
104+ rownames(qval ) <- rownames(pval ) <- dimnames(X_array )[[1 ]]
105+ return (list (qval = qval , pval = pval ))
106+ }
107+
108+
109+ get_pval <- function (pval , cell_type , K )
110+ {
111+ pval0 = rep(NA , K )
112+ names(pval0 ) = cell_type
113+ names = intersect(names(pval ), cell_type )
114+ pval0 [names ] = pval [names ]
115+ return (pval0 )
116+ }
117+
118+ pval2qval <- function (pval , A , y , covariate = NULL )
119+ {
120+ ng = nrow(A )
121+ if (is.null(covariate ))
122+ pval1 = sapply(1 : ng , function (g ) try(summary(manova(t(A [g ,
123+ , ]) ~ y ))$ stats [1 , " Pr(>F)" ], silent = T ))
124+ else pval1 = sapply(1 : ng , function (g ) try(summary(manova(t(A [g ,
125+ , ]) ~ y + covariate ))$ stats [1 , " Pr(>F)" ], silent = T ))
126+ pval = pval [, ! is.na(as.numeric(pval1 ))]
127+ pval1 = na.omit(as.numeric(pval1 ))
128+ qval1 = p.adjust(pval1 , " fdr" )
129+ qval = pval
130+ K = ncol(A )
131+ for (i in 1 : ncol(pval )) {
132+ qval [, i ] = 1
133+ if (min(pval [, i ], na.rm = T ) < 0.05 / K )
134+ qval [, i ][which.min(pval [, i ])] = qval1 [i ]
135+ }
136+ return (qval )
137+ }
138+
139+
140+ test <- function (A , y , covariate = NULL )
141+ {
142+ if (dim(A )[3 ] != length(y ))
143+ print(" CSE estimates and y have different length" )
144+ if (! is.null(covariate ))
145+ if (dim(A )[3 ] != nrow(covariate ))
146+ print(" CSE estimates and covariate have different number of samples/subjects" )
147+ else {
148+ if (! is.null(rownames(covariate )) & any(rownames(covariate ) !=
149+ dimnames(A )[[3 ]]))
150+ covariate = covariate [dimnames(A )[[3 ]], ]
151+ }
152+ K = ncol(A )
153+ cell_type = colnames(A )
154+ if (is.null(covariate ))
155+ pval = apply(A , 1 , function (x ) {
156+ pval = coef(summary(glm(y ~ . , data = data.frame (t(x )),
157+ family = " binomial" )))[, 4 ]
158+ return (get_pval(pval , cell_type , K ))
159+ })
160+ else pval = apply(A , 1 , function (x ) {
161+ pval = coef(summary(glm(y ~ . , data = data.frame (t(x ),
162+ covariate ), family = " binomial" )))[, 4 ]
163+ return (get_pval(pval , cell_type , K ))
164+ })
165+ qval = pval2qval(pval , A , y , covariate )
166+ return (list (qval = qval , pval = pval ))
167+ }
168+
169+ DEG_test2 <- function (X_array ,y ,covariate = NULL ){
170+ dims_ct <- dim(X_array )[3 ]
171+ cellName <- dimnames(X_array )[[3 ]]
172+ if (is.null(covariate )){
173+ pval <- qval <- NULL
174+ for (ct in 1 : dims_ct ){
175+ mat <- X_array [,,ct ]
176+ p <- NULL
177+ for (i in 1 : nrow(mat )){
178+ p <- c(p , summary(lm(mat [i ,]~ as.numeric(y )))$ coefficients [2 ,4 ])
179+ }
180+ q = p.adjust(p , " fdr" )
181+ pval <- cbind(pval ,p )
182+ qval <- cbind(qval ,q )
183+ }
184+ colnames(qval ) <- colnames(pval ) <- cellName
185+ rownames(qval ) <- rownames(pval ) <- dimnames(X_array )[[1 ]]
186+
187+ }else {
188+ pval <- qval <- NULL
189+ cvname <- colnames(covariate )
190+ dims_ct <- dim(X_array )[3 ]
191+ for (ct in 1 : dims_ct ){
192+ mat <- X_array [,,ct ]
193+ p <- NULL
194+ for (i in 1 : nrow(mat )){
195+ dat <- cbind(mat [i ,],y ,covariate )
196+ dat <- as.data.frame(dat )
197+ colnames(dat ) <- c(" x" ," y" ,cvname )
198+ p <- c(p , summary(lm(x ~ . ,dat = dat ))$ coefficients [2 ,4 ])
199+ }
200+ q = p.adjust(p , " fdr" )
201+ pval <- cbind(pval ,p )
202+ qval <- cbind(qval ,q )
203+ }
204+ colnames(qval ) <- colnames(pval ) <- cellName
205+ rownames(qval ) <- rownames(pval ) <- dimnames(X_array )[[1 ]]
206+ }
207+
208+ return (list (qval = qval , pval = pval ))
209+ }
0 commit comments