TCGAbiolinks下载蛋白质组数据

之前通过tcpa下载过蛋白数据,而TCGAbiolinks也有下载蛋白质组学数据的示例,后者看上去更全面一点。

下载数据

1
2
3
4
5
6
7
8
9
library(TCGAbiolinks)
query.rppa <- GDCquery(
project = "TCGA-PRAD",
data.category = "Proteome Profiling",
data.type = "Protein Expression Quantification"
)
GDCdownload(query.rppa)
rppa <- GDCprepare(query.rppa)
saveRDS(rppa, 'PRAD_rppa.rds')

清洗数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
pMiss <- function(x){round(sum(is.na(x))/length(x),3)}
rppa <- rppa[apply(rppa, 1, pMiss) < 0.05, ]
rppa <- rppa[, apply(rppa, 2, pMiss) < 0.05]
sum(is.na(rppa))
rowInfo <- rppa[1:5]
rppa <- rppa[-(1:5)]
rppa <- rppa[, substr(colnames(rppa), 14, 16) == "01A"]
colnames(rppa) <- substr(colnames(rppa),1,12)
group <- readRDS('../fig5/tcga.predict.rds')
gRow <- intersect(colnames(rppa), rownames(group))
group <- group[gRow,]
rppa <- rppa[, colnames(rppa) %in% gRow]
Ct1 <- rownames(group)[group$group == 'Low Risk']
Tt1 <- rownames(group)[group$group == 'High Risk']

计算差异蛋白

f_DE_limma

1
2
3
r1 <- f_DE_limma(rppa, rowInfo, Ct1, Tt1, trend=F)
rownames(rppa) <- rowInfo$AGID
save(r1, rppa, file = 'PRAD_TCPA_DE.rdata')

TCGAbiolinks下载蛋白质组数据
https://b.limour.top/2020.html
Author
Limour
Posted on
September 10, 2022
Licensed under