安装包
- conda create -n oncoPredict -c conda-forge r-base=4.1.3
- conda activate oncoPredict
- conda install -c conda-forge r-tidyverse -y
- conda install -c conda-forge r-irkernel -y
- Rscript -e "IRkernel::installspec(name='oncoPredict', displayname='r-oncoPredict')"
- conda install -c conda-forge r-nloptr -y
- conda install -c conda-forge r-lme4 -y
- conda install -c conda-forge r-pbkrtest -y
- conda install -c conda-forge r-car -y
- conda install -c conda-forge r-biocmanager -y
- conda install -c conda-forge r-ggpubr -y
- conda install -c bioconda bioconductor-maftools -y
- BiocManager::install('oncoPredict')
- 从oncoPredict作者提供的地址下载整理好的CTRP和GDSC,生信技能树的介绍。镜像
- unzip DataFiles.zip
读入训练数据
library(oncoPredict)
CTRP2 <- readRDS('../../../oncoPredict/DataFiles/DataFiles//Training Data/CTRP2_Expr (TPM, not log transformed).rds')
CTRP2 <- log10(CTRP2+1)
GDSC2_Res <- readRDS('../../../oncoPredict/DataFiles/DataFiles//Training Data/CTRP2_Res.rds')
GDSC2_Res <- exp(GDSC2_Res)
读入预测数据
f_dedup_IQR
load('../../../DEG/TCGA/PRAD_tp.rda')
tpm <- data@assays@data$tpm_unstrand
colnames(tpm) <- data@colData$patient
tpm <- tpm[,f_rm_duplicated(colnames(tpm))]
geneInfo <- as.data.frame(data@rowRanges)[c('gene_id','gene_type','gene_name')]
tpm <- f_dedup_IQR(as.data.frame(tpm), geneInfo$gene_name)
comm <- intersect(rownames(CTRP2), rownames(tpm))
CTRP2 <- CTRP2[comm,]
tpm <- tpm[comm,]
tpm <- log10(tpm+1)
进行预测
library(oncoPredict)
load('oncoPredict_calcPhenotype.rdata')
keep <- rowSums(CTRP2) > 0.8*ncol(CTRP2)
calcPhenotype(trainingExprData = CTRP2[keep,],
trainingPtype = GDSC2_Res,
testExprData = as.matrix(tpm),
batchCorrect = 'eb', # "eb" for ComBat
powerTransformPhenotype = TRUE,
removeLowVaryingGenes = 0.2,
minNumSamples = 10,
printOutput = TRUE,
removeLowVaringGenesFrom = 'rawData')
- save(CTRP2, GDSC2_Res, tpm, file = 'oncoPredict_calcPhenotype.rdata')
- nano oncoPredict_calcPhenotype.R
- conda activate oncoPredict
- Rscript ./oncoPredict_calcPhenotype.R --max-ppsize=500000
预测结果可视化
读入预测结果
testPtype <- read.csv('./calcPhenotype_Output/DrugPredictions.csv', row.names = 1)
testPtype <- log(testPtype)
testPtype
贴上分组
group <- readRDS('../fig5/tcga.predict.rds')
df <- cbind(testPtype[rownames(group), c('CIL55', 'BRD4132')],group$group)
colnames(df)[[ncol(df)]] <- 'Risk Group'
df
绘图
library(ggpubr)
options(repr.plot.width=4, repr.plot.height=4)
my_comparisons <- list(c("Low Risk", "High Risk"))
ggviolin(df, x="Risk Group", y="CIL55", fill = "Risk Group",
palette = c("#00AFBB", "#E7B800"),
add = "boxplot", add.params = list(fill="white"))+
stat_compare_means(comparisons = my_comparisons, label = "p.signif")#label这里表示选择显著性标记(星号)
Comments 3 条评论
你好,请问可否分享一下文中的预测数据
老师您好,想咨询一下oncopredict包输出的DrugPredictions.csv文件里面的数值代表的是不同样本对应的药物的IC50值吗(指数值越小证明样本对药物越敏感),还是说输出的数值代表的是样本对药物的敏感性(数值越大代表样本对药物越敏感)?
@155283 对DrugPredictions.csv文件里面的数值取对数后得到的数值代表的是不同样本对应的药物的IC50值