Skip to content

Commit 2afe55e

Browse files
authored
Add files via upload
1 parent 5e9c827 commit 2afe55e

5 files changed

Lines changed: 301 additions & 17 deletions

File tree

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: ENIGMA
22
Type: Package
33
Title: dEcoNvolutIon method based on reGularized MAtrix completion
4-
Version: 0.1.1
4+
Version: 0.1.5
55
Author: Weixu Wang, Jun Yao
66
Maintainer: Weixu Wang <ken71198@hotmail.com>
77
Description: Improved estimation of cell type-specific gene expression through

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
export(ENIGMA_L2_max_norm)
44
export(ENIGMA_trace_norm)
5+
export(FindCSE_DEG)
56
export(batch_correct)
67
export(cell_deconvolve_trace)
78
export(clean_model)

R/FindCSE_DEG.R

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
#' @title Perform CTS-DEG analysis
2+
#'
3+
#' @description Analysis the differential expression genes for each cell type
4+
#'
5+
#' @param object ENIGMA object
6+
#' @param FDR_control
7+
#' if use Wang et al., FDR controlled DEG analysis model. Default: TRUE
8+
#'
9+
#' @param FoldChange
10+
#' if output the FoldChange of each gene, if FALSE, then output the expression difference
11+
#'
12+
#' @param covariate
13+
#' The data.frame object contains the covariate information of each sample
14+
#'
15+
#' @return A list object contain the DEG results of each cell types
16+
#'
17+
#'
18+
#' @examples
19+
#' \dontrun{
20+
#' DEG = FindCSE_DEG(object,y)
21+
#' DEG = FindCSE_DEG(object,y,covariate=covariate)
22+
#' head(DEG$celltype1)
23+
#' }
24+
#'
25+
#' @reference
26+
#' Wang J, Roeder K, Devlin B. Bayesian estimation of cell type–specific gene expression with prior derived from single-cell data[J]. Genome research, 2021, 31(10): 1807-1818.
27+
#'
28+
#' @export
29+
FindCSE_DEG <- function(object,y,FDR_control = TRUE,covariate=NULL,FoldChange = FALSE){
30+
####
31+
#identify the DEG of ENIGMA outputs require normalized profile
32+
if(is.null(object@result_CSE_normalized)){
33+
stop('CTS-DEG analysis required normalized profile!')
34+
}
35+
#convert sce object into 3-order array
36+
Exp = sce2array(object)
37+
cellName = dimnames(Exp)[[3]]
38+
DEG_list = list()
39+
if(FDR_control){result <- DEG_test(Exp,y,covariate)}else{result <- DEG_test2(Exp,y,covariate)}
40+
ES_m <- EffectiveSize(Exp,y,FoldChange)
41+
for(i in cellName){
42+
Tab_m <- cbind(ES_m[,i],result$pval[,i],result$qval[,i])
43+
if(FoldChange){colnames(Tab_m) <- c("FoldChange","pvalue","qvalue")}else{
44+
colnames(Tab_m) <- c("ExpressionDifference","pvalue","qvalue")
45+
}
46+
DEG_list[[i]] <- Tab_m
47+
}
48+
DEG_list
49+
}
50+
51+
EffectiveSize <- function(X_array,y,FoldChange=FoldChange){
52+
if(FoldChange){
53+
### convert all profile into pseudo positive
54+
X_array[X_array<0] <- 0
55+
ES_m <- NULL
56+
nCell = dim(X_array)[3]
57+
cellName = dimnames(X_array)[[3]]
58+
for(i in 1:nCell){
59+
G = X_array[,,i]
60+
FC <- apply(G,1,FCcall,y=y)
61+
ES_m = cbind(ES_m,FC)
62+
}
63+
colnames(ES_m) <- cellName
64+
}else{
65+
ES_m <- NULL
66+
nCell = dim(X_array)[3]
67+
cellName = dimnames(X_array)[[3]]
68+
for(i in 1:nCell){
69+
G = X_array[,,i]
70+
FC <- apply(G,1,DEcall,y=y)
71+
ES_m = cbind(ES_m,FC)
72+
}
73+
colnames(ES_m) <- cellName
74+
}
75+
ES_m
76+
}
77+
78+
FCcall <- function(g,y){
79+
mean(g[y==1])/mean(g[y==0])
80+
}
81+
82+
DEcall <- function(g,y){
83+
mean(g[y==1]) - mean(g[y==0])
84+
}
85+
86+
87+
DEG_test <- function(X_array,y,covariate=NULL){
88+
O = array(0,
89+
dim = c( dim(X_array)[1],
90+
dim(X_array)[3],
91+
dim(X_array)[2]),
92+
dimnames = list( dimnames(X_array)[[1]],
93+
dimnames(X_array)[[3]],
94+
dimnames(X_array)[[2]])
95+
)
96+
for(i in 1:dim(X_array)[3]){
97+
O[,i,] <- X_array[,,i]
98+
}
99+
###Using ANOVA+glm model to evaluate prediction performance
100+
result <- test(O,y,covariate)
101+
pval <- t(result$pval)
102+
qval <- t(result$qval)
103+
colnames(qval) <- colnames(pval) <- dimnames(X_array)[[3]]
104+
rownames(qval) <- rownames(pval) <- dimnames(X_array)[[1]]
105+
return(list(qval = qval, pval = pval))
106+
}
107+
108+
109+
get_pval <- function (pval, cell_type, K)
110+
{
111+
pval0 = rep(NA, K)
112+
names(pval0) = cell_type
113+
names = intersect(names(pval), cell_type)
114+
pval0[names] = pval[names]
115+
return(pval0)
116+
}
117+
118+
pval2qval <- function (pval, A, y, covariate = NULL)
119+
{
120+
ng = nrow(A)
121+
if (is.null(covariate))
122+
pval1 = sapply(1:ng, function(g) try(summary(manova(t(A[g,
123+
, ]) ~ y))$stats[1, "Pr(>F)"], silent = T))
124+
else pval1 = sapply(1:ng, function(g) try(summary(manova(t(A[g,
125+
, ]) ~ y + covariate))$stats[1, "Pr(>F)"], silent = T))
126+
pval = pval[, !is.na(as.numeric(pval1))]
127+
pval1 = na.omit(as.numeric(pval1))
128+
qval1 = p.adjust(pval1, "fdr")
129+
qval = pval
130+
K = ncol(A)
131+
for (i in 1:ncol(pval)) {
132+
qval[, i] = 1
133+
if (min(pval[, i], na.rm = T) < 0.05/K)
134+
qval[, i][which.min(pval[, i])] = qval1[i]
135+
}
136+
return(qval)
137+
}
138+
139+
140+
test <- function (A, y, covariate = NULL)
141+
{
142+
if (dim(A)[3] != length(y))
143+
print("CSE estimates and y have different length")
144+
if (!is.null(covariate))
145+
if (dim(A)[3] != nrow(covariate))
146+
print("CSE estimates and covariate have different number of samples/subjects")
147+
else {
148+
if (!is.null(rownames(covariate)) & any(rownames(covariate) !=
149+
dimnames(A)[[3]]))
150+
covariate = covariate[dimnames(A)[[3]], ]
151+
}
152+
K = ncol(A)
153+
cell_type = colnames(A)
154+
if (is.null(covariate))
155+
pval = apply(A, 1, function(x) {
156+
pval = coef(summary(glm(y ~ ., data = data.frame(t(x)),
157+
family = "binomial")))[, 4]
158+
return(get_pval(pval, cell_type, K))
159+
})
160+
else pval = apply(A, 1, function(x) {
161+
pval = coef(summary(glm(y ~ ., data = data.frame(t(x),
162+
covariate), family = "binomial")))[, 4]
163+
return(get_pval(pval, cell_type, K))
164+
})
165+
qval = pval2qval(pval, A, y, covariate)
166+
return(list(qval = qval, pval = pval))
167+
}
168+
169+
DEG_test2 <- function(X_array,y,covariate=NULL){
170+
dims_ct <- dim(X_array)[3]
171+
cellName <- dimnames(X_array)[[3]]
172+
if(is.null(covariate)){
173+
pval <- qval <- NULL
174+
for(ct in 1:dims_ct){
175+
mat <- X_array[,,ct]
176+
p <- NULL
177+
for(i in 1:nrow(mat)){
178+
p <- c(p, summary(lm(mat[i,]~as.numeric(y)))$coefficients[2,4])
179+
}
180+
q = p.adjust(p, "fdr")
181+
pval <- cbind(pval,p)
182+
qval <- cbind(qval,q)
183+
}
184+
colnames(qval) <- colnames(pval) <- cellName
185+
rownames(qval) <- rownames(pval) <- dimnames(X_array)[[1]]
186+
187+
}else{
188+
pval <- qval <- NULL
189+
cvname <- colnames(covariate)
190+
dims_ct <- dim(X_array)[3]
191+
for(ct in 1:dims_ct){
192+
mat <- X_array[,,ct]
193+
p <- NULL
194+
for(i in 1:nrow(mat)){
195+
dat <- cbind(mat[i,],y,covariate)
196+
dat <- as.data.frame(dat)
197+
colnames(dat) <- c("x","y",cvname)
198+
p <- c(p, summary(lm(x~.,dat=dat))$coefficients[2,4])
199+
}
200+
q = p.adjust(p, "fdr")
201+
pval <- cbind(pval,p)
202+
qval <- cbind(qval,q)
203+
}
204+
colnames(qval) <- colnames(pval) <- cellName
205+
rownames(qval) <- rownames(pval) <- dimnames(X_array)[[1]]
206+
}
207+
208+
return(list(qval = qval, pval = pval))
209+
}

R/hello.R

Lines changed: 53 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,55 @@
1-
# Hello, world!
2-
#
3-
# This is an example function named 'hello'
4-
# which prints 'Hello, world!'.
5-
#
6-
# You can learn more about package authoring with RStudio at:
7-
#
8-
# http://r-pkgs.had.co.nz/
9-
#
10-
# Some useful keyboard shortcuts for package authoring:
11-
#
12-
# Install Package: 'Ctrl + Shift + B'
13-
# Check Package: 'Ctrl + Shift + E'
14-
# Test Package: 'Ctrl + Shift + T'
1+
X = t(matrix(c(1:12),nrow=3,ncol=4))
2+
theta = matrix(c(0.5,0,1,0.5,1,0),nrow=3,ncol=2)
3+
P = array(0,
4+
dim = c( nrow(X),
5+
ncol(X),
6+
ncol(theta)),
7+
dimnames = list( rownames(X),
8+
colnames(X),
9+
colnames(theta))
10+
)
11+
R <- matrix(c(2,4,4,6,3,2,5,4),nrow=4,ncol=2)
12+
derive_P2 <- function(X, theta, P_old,R,alpha){
13+
## P_old: a tensor variable with three dimensions
14+
## theta: the cell type proportions variable
15+
## cell_type_index: optimize which type of cells
16+
## R: reference matrix
17+
dP1 <- dP2 <- array(0,
18+
dim = c( nrow(X),
19+
ncol(X),
20+
ncol(theta)),
21+
dimnames = list( rownames(X),
22+
colnames(X),
23+
colnames(theta))
24+
)
25+
for(cell_type_index in 1:ncol(theta)){
26+
R.m <- as.matrix(R[,cell_type_index])
1527

16-
hello <- function() {
17-
print("Hello, world!")
28+
cell_type_seq <- c(1:ncol(theta))
29+
cell_type_seq <- cell_type_seq[cell_type_seq!=cell_type_index]
30+
31+
X_summary = Reduce("+",
32+
lapply(cell_type_seq, function(i) P_old[,,i]%*%diag(theta[,i]) )
33+
)
34+
X_summary <- X-X_summary
35+
36+
dP1[,,cell_type_index] <- 2*(P_old[,,cell_type_index]%*%diag(theta[,cell_type_index]) - X_summary)%*%diag(theta[,cell_type_index])
37+
dP2[,,cell_type_index] <- 2*(as.matrix(rowMeans(P_old[,,cell_type_index]))-R.m)%*%t(as.matrix(rep((1/ncol(dP2[,,cell_type_index])),ncol(dP2[,,cell_type_index]))))
38+
}
39+
print(dP1)
40+
print(dP2)
41+
dP1 = dP1 / sqrt( sum( dP1^2 ) ) * 1e5
42+
dP2 = dP2 / sqrt( sum( dP2^2 ) ) * 1e5
43+
44+
#calculate w1
45+
#if( crossprod(as.matrix(dP1), as.matrix(dP2)) >= crossprod(as.matrix(dP1)) ) {w1 = 1}
46+
#else if( crossprod(as.matrix(dP1), as.matrix(dP2)) >= crossprod(as.matrix(dP2)) ) {w1 = 0}
47+
#else {
48+
# w1 = crossprod(as.matrix(dP2-dP1), as.matrix(dP2))/sum((dP1-dP2)^2)
49+
#}
50+
w1 <- alpha
51+
w2 <- 1-w1
52+
53+
dP <- dP1*as.numeric(w1) + dP2*as.numeric(w2)
54+
return(dP)
1855
}

man/FindCSE_DEG.Rd

Lines changed: 37 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)