-
Notifications
You must be signed in to change notification settings - Fork 0
/
pRRophetic.R
255 lines (208 loc) · 12.1 KB
/
pRRophetic.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
rm(list = ls())
options(stringsAsFactors = F)
expFile="01rawData/GSE102349_TPM_symbol.txt"
pdFile = "01rawData/GSE102349_Cluster_res.txt"
testExpr <- read.table(expFile,sep="\t",header=T,row.names=1,check.names=F)
pd<- read.table(pdFile,sep="\t",header=T,row.names = 1,check.names=F)
# drug prediction R package of pRRophetic
library(pRRophetic)
library(ggplot2)
library(ggpubr)
library(cowplot)
mycol <- c("#B2182B","#E69F00","#56B4E9","#009E73","#F0E442","#0072B2","#D55E00","#CC79A7","#CC6666","#9999CC","#66CC99","#ADD1E5",
'#CCEBC5', '#FCCDE5', '#B3DE69', '#FDB462','#80B1D3', '#FB8072', '#BEBADA', '#FFFFB3', '#8DD3C7',"#999999")
# drug name
GCP.drug <- read.table("pRRophetic/drug_sig.txt")
GCP.drug <- GCP.drug$V1
exprData <- as.matrix(testExpr[,rownames(pd)])
# customize enough box colors that the number of colors is at least
# equal to the number of groups
jco <- c("#E69F00","#56B4E9","#009E73","#F0E442","#0072B2","#D55E00","#CC79A7","#CC6666","#9999CC","#66CC99","#ADD1E5",
'#CCEBC5', '#FCCDE5', '#B3DE69', '#FDB462','#80B1D3', '#FB8072', '#BEBADA', '#FFFFB3', '#8DD3C7',"#999999")
# drug sensitivity prediction
GCPinfo <- GCP.IC50 <- GCP.expr <- cvOut <- predictedPtype <- predictedBoxdat <- list() # Initialization list
plotp <- list()
for (drug in GCP.drug) {
set.seed(1248103) # because the prediction process defaults to a 10-fold CV, the seeds are set so that the results are repeatable
cat(drug," starts!\n") # indicates that the current drug has been analyzed
# To predict the IC50 value, use the default parameters.
# For details, see pRRopheticPredict parameter list
predictedPtype[[drug]] <- pRRopheticPredict(testMatrix = exprData ,
drug = drug,
tissueType = "allSolidTumors",
batchCorrect = "eb",
selection = 1,
dataset = "cgp2014")
if(!all(names(predictedPtype[[drug]])==rownames(pd))) {stop("Name mismatched!\n")}
predictedBoxdat[[drug]] <- data.frame("est.ic50"=predictedPtype[[drug]],
"Cluster"=ifelse(pd$Cluster == "Cluster1","Cluster1","Cluster2"),
row.names = names(predictedPtype[[drug]]))
predictedBoxdat[[drug]]$Cluster <- factor(predictedBoxdat[[drug]]$Cluster,levels = c("Cluster1","Cluster2"),ordered = T)
# drawing
p <- ggplot(data = predictedBoxdat[[drug]], aes(x=Cluster, y=est.ic50))
p <- p + geom_boxplot(aes(fill = Cluster),outlier.colour = NA,notch = T,size = 0.3)+
geom_jitter(aes(fill = Cluster),shape = 21,size=2,width = 0.2)+
geom_violin(aes(fill = Cluster),position = position_dodge(width = .75),
size = 0.3,alpha = 0.4,trim = T)+
scale_fill_manual(values=mycol[c(2,3)]) +
theme_classic()+
stat_compare_means(comparisons = list(c('Cluster1','Cluster2')),method="wilcox.test", label = "p.format")+
theme(legend.position="none") +
theme(axis.text.x = element_text(angle = 45, hjust = 1,size = 12),plot.title = element_text(size = 12, hjust = 0.5)) +
xlab("") + ylab("Estimated IC50") +
ggtitle(drug) # add the title
plotp[[drug]] <- p
cat(drug," has been finished!\n")
}
# Merged images
# Show the first four sensitive drugs
p1 <- plot_grid(plotp[[1]],plotp[[2]],plotp[[3]],plotp[[4]],labels = c("A","B","C","D"),nrow = 1) # title可以AI下拉到合适位置,就如例文所示
ggsave("pRRophetic/TOP4 boxplot of predicted IC50.pdf", width = 8, height = 3.6)
# display multiple sensitive drugs
p2 <- plot_grid(plotlist=plotp, ncol=5)
ggsave("pRRophetic/boxplot of predicted IC50_multiple.pdf", width = 10, height = 15)
# The differences between groups were examined
p <- vector()
for (drug in GCP.drug) {
tmp <- wilcox.test(as.numeric(predictedBoxdat[[drug]][which(predictedBoxdat[[drug]]$Cluster %in% "Cluster1"),"est.ic50"]),
as.numeric(predictedBoxdat[[drug]][which(predictedBoxdat[[drug]]$Cluster %in% "Cluster2"),"est.ic50"]),alternative = "less")$p.value
p <- append(p,tmp) # The rank sum test for p values of the two groups of samples
}
names(p) <- GCP.drug
print(p)
# Save to file
write.table(p,"pRRophetic/output_pvalue.txt", quote = F, sep = "\t")
# TIDE was used to compare the immune responses of different subtypes
library(pheatmap)
Sys.setenv(LANGUAGE = "en")
options(stringsAsFactors = FALSE)
TIDE <- testExpr
TIDE <- sweep(TIDE,2, apply(TIDE,2,median,na.rm=T))
TIDE <- sweep(TIDE,1, apply(TIDE,1,median,na.rm=T))
write.table(TIDE,"pRRophetic/TIDE_input.self_subtract",sep = "\t",row.names = T,col.names = NA,quote = F)
TIDE.res <- read.csv("pRRophetic/TIDE_output.csv",header = T,row.names = 1,check.names = F,stringsAsFactors = F)
pd$TIDE <- TIDE.res[rownames(pd),"Responder"]
print(table(pd$TIDE,pd$Cluster))
# To test whether immunotherapy response is related to subtypes, P<0.05 is relevant
print(fisher.test(table(pd$TIDE,pd$Cluster)))
# submap predict immunotherapy response of subtypes
# custom functions are used to generate the data format required by submap
generateInputFileForSubMap <- function(in_gct, gct_file, cls_file, sam_info, type_name = "type"){
in_gct <- data.frame(GeneID=rownames(in_gct),
description="na",
in_gct,
stringsAsFactors = F,
check.names = F)
cat("#1.2\n", file = gct_file)
cat(nrow(in_gct),"\t",ncol(in_gct)-2,"\n", file = gct_file, append = T)
cat(paste(colnames(in_gct), collapse = "\t"),"\n", file = gct_file, append = T)
for(i in 1:nrow(in_gct)) cat(paste(in_gct[i,], collapse = "\t"),"\n", file = gct_file, append = T)
cat(nrow(sam_info),length(levels(factor(sam_info$rank))),1, "\n", file = cls_file )
cat("#", paste0(levels(factor(sam_info[, type_name])), collapse = " " ), "\n", file = cls_file, sep = "", append = T)
cat(as.numeric(factor(sam_info[, type_name])), file = cls_file, append = T)
}
# The data format needed to create the submap (SKCM)
skcm.immunotherapy.logNC <- read.table("pRRophetic/skcm.immunotherapy.47samples.log2CountsNorm.txt",sep = "\t",row.names = 1,header = T,check.names = F,stringsAsFactors = F) #原文提供的log2转化的标准化count值
rownames(skcm.immunotherapy.logNC) <- toupper(rownames(skcm.immunotherapy.logNC))
skcm.immunotherapy.info <- read.table("pRRophetic/skcm.immunotherapy.47sampleInfo.txt",sep = "\t",row.names = 1,header = T,check.names = F,stringsAsFactors = F)
skcm.immunotherapy.info <- skcm.immunotherapy.info[order(skcm.immunotherapy.info$label),]
skcm.immunotherapy.info$rank <- rep(c(1,2,3,4),times=as.character(table(skcm.immunotherapy.info$label))) #1: CTLA4_noR 2: CTLA4_R 3:PD1_noR 4:PD1_R
# The data format needed to create the submap (GEO)
tmp <- read.table(expFile,sep = "\t",check.names = F,stringsAsFactors = F,header = T,row.names = 1)
GENELIST <- intersect(rownames(tmp),rownames(skcm.immunotherapy.logNC)) # take the list of genes after intersection
sam_info <- skcm.immunotherapy.info
in_gct <- skcm.immunotherapy.logNC[GENELIST,rownames(skcm.immunotherapy.info)]
# The file name that generates the output data
gct_file <- "skcm.immunotherapy.for.SubMap.gct"
cls_file <- "skcm.immunotherapy.for.SubMap.cls"
generateInputFileForSubMap(in_gct = in_gct, gct_file = gct_file, cls_file = cls_file, sam_info = sam_info, type_name = "rank")
samples.C1 <- rownames(pd[which(pd$Cluster == "Cluster1"),])
samples.C2 <- rownames(pd[which(pd$Cluster == "Cluster2"),])
sam_info <- data.frame("Cluster"=c(samples.C1,samples.C2),row.names = c(samples.C1,samples.C2))
sam_info$rank <- rep(c(1,2),times=c(length(samples.C1),length(samples.C2)))
# The file name that generates the output data
gct_file <- "GSE102349.Immune2.for.SubMap.gct"
cls_file <- "GSE102349.Immune2.for.SubMap.cls"
in_gct <- log2(tmp[GENELIST,rownames(sam_info)] + 1)
generateInputFileForSubMap(in_gct = in_gct, gct_file = gct_file, cls_file = cls_file, sam_info = sam_info, type_name = "rank")
# GenePattern was used for submap analysis
library(GenePattern)
gctfileA <- 'pRRophetic/GSE102349.Immune2.for.SubMap.gct'
clsfileA <- 'pRRophetic/GSE102349.Immune2.for.SubMap.cls'
gctfileB <- 'pRRophetic/skcm.immunotherapy.for.SubMap.gct'
clsfileB <- 'pRRophetic/skcm.immunotherapy.for.SubMap.cls'
source('D:/GEO/script/submap.R')
submap.main( gctfileA,
gctfileB,
clsfileA,
clsfileB,
output.filename="pRRophetic/SubMap",
ntag=100,
nperm=50,
nperm.fisher=1000,
weighted.score.type=1,
null.dist="pool",
p.corr="Bonferroni",
clust.row=1,
clust.col=1,
nom.p.mat="T",
create.legend="T",
rnd.seed=47365321)
library(pheatmap)
heatmap.YlGnPe <- c("#440259","#345F8C","#228C8A","#78CE51","#FAE71F")
cherry <- "#700353"
lightgrey <- "#dcddde"
# Input the nominal p values in the file and the corrected P values in the heat map
tmp <- matrix(c(0.987,0.412,0.997,0.001,0.040,0.388,0.086,0.976,
1,1,1,0.008,0.320,1,0.691,1), # Bonferroni校正p值
nrow = 4,byrow = T,dimnames = list(c("Cluster1_p","Cluster2_p","Cluster1_b","Cluster2_b"),c("CTAL4-noR","CTLA4-R","PD1-noR","PD1-R")))
library(pheatmap)
submap.data.A <- matrix(c(0.9700300 , 0.4595405 , 0.9930070 , 0.000999001,
0.1878122 , 0.3456543 , 0.1678322 , 0.955044955),nrow = 2,byrow = T)
submap.data.B <- matrix(c(1, 1, 1, 0.007992008,
1, 1, 1, 1.000000000),nrow = 2,byrow = T)
submap.data <- rbind(submap.data.A,submap.data.B)
row.names(submap.data) <- c('Cluster1_p','Cluster2_p','Cluster1_b','Cluster2_b')
colnames(submap.data) <- c("CTAL4-noR","CTLA4-R","PD1-noR","PD1-R")
annotation_row <- data.frame(pvalue=rep(c('Nominal p value','Bonferroni corrected'),c(2,2)))
rownames(annotation_row) <- row.names(submap.data)
pdf(file = 'pRRophetic/subclass mapping.pdf',width= 12,height= 8)
pheatmap(submap.data,
show_colnames = T,
color = heatmap.YlGnPe[5:1],
display_numbers = matrix(ifelse(submap.data < 0.05,'p < .05',''),nrow(submap.data)),number_format = "%.3f",
cluster_rows = F,cluster_cols = F,
annotation_row=annotation_row,
annotation_colors = list(pvalue=c("Nominal p value"=lightgrey,"Bonferroni corrected"=cherry)),
cellwidth=30,cellheight=30,main = "",fontsize_number = 9,
#fontsize_row = 20,fontsize_col = 25,
gaps_row=2,
fontsize=12)
dev.off()
# Draw a boxplot of the TIDE response
ann <-read.table("01rawData/GSE102349_Cluster_res.txt",header = T,sep = "\t",check.names = F)
head(ann)
print(table(ann$Cluster))
TIDE.res <- read.csv("pRRophetic/TIDE_output.csv",header = T,row.names = 1,check.names = F,stringsAsFactors = F)
TIDE.res$id <- rownames(TIDE.res)
# To test whether immunotherapy response is related to subtypes, P<0.05 is relevant
ann$TIDE <- TIDE.res[ann$id,"Responder"]
print(table(ann$TIDE,ann$Cluster))
print(fisher.test(table(ann$TIDE,ann$Cluster)))
boxdata <- merge(TIDE.res,ann,by="id")
boxdata$Cluster <- factor(boxdata$Cluster,levels=c("Cluster1","Cluster2"))
ggplot(boxdata,aes(Cluster,TIDE.x,fill=Cluster))+
geom_boxplot(outlier.colour = NA,notch = T,size = 0.3)+
geom_jitter(shape = 21,size=2,width = 0.2)+
geom_violin(position = position_dodge(width = .75),
size = 0.3,alpha = 0.4,trim = T)+
theme_classic()+ ylab('TIDE')+ggtitle('TIDE prediction')+
theme(legend.position = 'none',
axis.title.y = element_text(size=12),
axis.text = element_text(size=12),
axis.title.x = element_blank())+
scale_fill_manual(values=mycol[c(2,3)])+
stat_compare_means(comparisons = list(c('Cluster1','Cluster2')),
label = 'p.signif')+
stat_compare_means(label.y = max(boxdata$TIDE.x)+0)
ggsave("pRRophetic/TIDE_boxplot.pdf",width = 3.5,height = 3.5)