绘制富集系统聚类图

发布于 2022-09-09  127 次阅读


糖糖家的老张教程使用记录

安装补充包

计算差异基因

f_DESeq2f_dedup_IQR

r1 <- f_DESeq2(cts_b[keep,], geneInfo[keep,], Ct1, Tt1)
DEG <- subset(r1, !grepl('pseudogene', gene_type) & baseMean > quantile(baseMean)['25%'])
maxbaseMean <- function(x){
    x[['baseMean']]
}
DEG <- f_dedup_IQR(df = DEG, rowNn = DEG$gene_name, select_func = maxbaseMean)
FC_up <- with(DEG, log2FoldChange[log2FoldChange>0])
FC_up <- mean(FC_up) + 3*sd(FC_up)
FC_down <- with(DEG, log2FoldChange[log2FoldChange<0])
FC_down <- mean(FC_down) - 3*sd(FC_down)
saveRDS(subset(DEG, padj<0.05 & ((log2FoldChange > FC_up) | (log2FoldChange < FC_down))), 'DEG_filer.rds')

绘制差异热图

x <- readRDS('DEG_filer.rds')
tcga_predict <- readRDS('tcga.predict.rds')
tcga <- readRDS('../../../COXPH/data/PRAD.rds')
mat <- as.matrix(tcga$data[rownames(x)])
rownames(mat) <- NULL
mat <- t(mat)
rownames(mat) <- NULL
 
library(circlize)
library(ComplexHeatmap)
col_fun <- colorRamp2(
  c(-4, 0, 4), 
  c("#99CCCCAA", "white", "#BC3C29AA")
  )
#定义注释信息
ha <- HeatmapAnnotation(`Risk group` = tcga_predict$group,
                        `Risk Score` = tcga_predict$Risk_Score,
                        col = list(
                            `Risk group` = c("Low Risk" = "#99CCFFAA", 'High Risk' = '#FF6666AA')
                        ),
#                         annotation_name_gp = gpar(fontsize = 14),
                        show_annotation_name = TRUE)
p <- Heatmap(mat, name = 'Relative Expression', top_annotation = ha, column_order = order(tcga_predict$Risk_Score),
        col=col_fun)
pdf(file = 'C_Heatmap.pdf', width = 8, height = 8);print(p);dev.off()

通路富集

DEG <- readRDS('DEG_filer.rds')
KEGG <- readRDS('../../../GSEA/kk_SYMBOL.rds')
require(tidyverse)
require(enrichplot)
require(DOSE)
require(stringr)
library(clusterProfiler)
set.seed(3)
res <- enricher(gene = rownames(DEG), 
             pAdjustMethod = "fdr",
             pvalueCutoff = 0.05,
#              universe=universe,
             TERM2GENE = KEGG$TERM2GENE,
             TERM2NAME = KEGG$TERM2NAME)
write.csv(res@result, 'KEGG.csv')
saveRDS(res, 'DEG_KEGG.rds')

数据清洗

genedata <- data.frame(ID=DEG$gene_name,logFC=DEG$log2FoldChange)
GOplotIn_KEGG <- res[,c('ID','Description','p.adjust','geneID')]
GOplotIn_KEGG$geneID <- str_replace_all(GOplotIn_KEGG$geneID,'/',',') #把GeneID列中的’/’替换成‘,’
names(GOplotIn_KEGG) <- c('ID','Term','adj_pval','Genes')#修改列名,后面弦图绘制的时候需要这样的格式
GOplotIn_KEGG$Category = "KEGG"#分类信息
circ_KEGG <- GOplot::circle_dat(GOplotIn_KEGG, genedata) #GOplot导入数据格式整理
chord <- GOplot::chord_dat(data = circ_KEGG,genes = genedata) #生成含有选定基因的数据框

系统聚类图

pdf('D_KEGG.pdf', width = 12, height = 8);{
    GOplot::GOCluster(circ_KEGG, GOplotIn_KEGG$Term) #系统聚类图
};dev.off()

一枚爱好探索的医学生