1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
| # 0、加载程辑包 library(Seurat) library(dplyr) library(patchwork)
# 加载 SingleR library(SingleR) library(SummarizedExperiment) library(scater) library(BiocParallel) # 并行计算加速
# 配置数据路径 root_path = "~/zlliu/R_data/21.07.15.IntegrateData"
# 配置结果保存路径 output_path = "~/zlliu/R_output/21.09.20.SingleR" if (!file.exists(output_path)){dir.create(output_path)}
# 设置工作目录,输出文件将保存在此目录下 setwd(output_path) getwd()
# 1、读取数据和参考数据集 scRNAs = readRDS(file.path(root_path, 'hBLA_scRNAs.rds')) if (!("hM1.se" %in% ls())){ hM1.se <- readRDS("/home/rqzhang/zlliu/R_data/human_M1_10x/hM1.se.rds") } if (!("hmca" %in% ls())){ hmca <- readRDS("/home/rqzhang/zlliu/R_data/Human_Multiple_Cortical_Areas_SMART-seq/hmca.rds") }
# 1.1、裁剪数据集 f_noNull <- function(lc_se, lc_col){ if(any(lc_se$meta["sample_name"] != colnames(lc_se))){ return } lc_idx = which(lc_se$meta[lc_col] == "") lc_res = lc_se[, -lc_idx] lc_res$meta = lc_se$meta[-lc_idx,] lc_res }
hmca <- f_noNull(hmca, "subclass_label")
# 2、构造预测方法 f_get_pred <- function(scRNA, lc_i){ # 如果已经计算过了,就不再重复计算,直接返回上一次的结果 if(file.exists(sprintf( "%02d_hM1_subclass.txt", lc_i))){ result_main_hpca <- read.table(sprintf( "%02d_hM1_subclass.txt", lc_i), stringsAsFactors = F) return (result_main_hpca) } # 导出原始counts矩阵 test.count=as.data.frame(scRNA[["RNA"]]@counts) # 保留共同基因 common_hpca <- intersect(rownames(test.count), c(rownames(hM1.se), rownames(hmca))) test.count_forhpca <- test.count[common_hpca,] tp_idx = na.omit(match(common_hpca, rownames(hM1.se))) lc_hM1.se <- hM1.se[rownames(hM1.se)[tp_idx], ] tp_idx = na.omit(match(common_hpca, rownames(hmca))) lc_hmca <- hmca[rownames(hmca)[tp_idx], ]
gc() # 生成test数据集 test.count_forhpca.se <- SummarizedExperiment(assays=list(counts=test.count_forhpca)) test.count_forhpca.se <- logNormCounts(test.count_forhpca.se) # 进行分类预测 大概一小时 pred.main.hpca <- SingleR(test = test.count_forhpca.se, ref = list(m1=lc_hM1.se, mca=lc_hmca), labels = list(hM1.se$meta$subclass_label, hmca$meta$subclass_label), BPPARAM=MulticoreParam(32)) # 32CPUs
gc() #构造返回结果 result_main_hpca <- as.data.frame(pred.main.hpca$labels) result_main_hpca$CB <- rownames(pred.main.hpca) colnames(result_main_hpca) <- c('hM1_subclass', 'CB') write.table(result_main_hpca, sprintf( "%02d_hM1_subclass.txt", lc_i)) #保存下来,方便以后调用 result_main_hpca }
# 3、进行预测 tp_sc <- scRNAs # rm(x10s) gc() tp_sc_st <- 1 tp_sc_len <- length(tp_sc) for (lc_i in tp_sc_st:tp_sc_len) { f_get_pred(tp_sc[[lc_i]], lc_i) }
# 4、读取预测信息的函数 f_set_pred <- function(scRNA, result_main_hpca){ scRNA@meta.data$CB <- rownames(scRNA@meta.data) # print(scRNA@meta.data$CB) scRNA@meta.data=merge(scRNA@meta.data,result_main_hpca,by="CB") # print(result_main_hpca$CB) rownames(scRNA@meta.data)=scRNA@meta.data$CB scRNA }
for (lc_i in tp_sc_st:tp_sc_len) { tp_sc[[lc_i]] <- f_set_pred(tp_sc[[lc_i]], f_get_pred(tp_sc[[lc_i]], lc_i)) }
# 5、查找marker f_findMarkers <- function(lc_scRNA, lc_group.by){ Idents(lc_scRNA) <- lc_scRNA@meta.data[,lc_group.by] lc_markers <- FindAllMarkers(lc_scRNA, only.pos = TRUE, group.by = lc_group.by, min.pct = 0.25, logfc.threshold = 0.25) lc_markers <- subset(lc_markers, subset = p_val_adj<0.05) lc_markers }
tp_markers <- list() for (lc_i in 1:length(tp_sc)) { tp_markers[[lc_i]] <- f_findMarkers(tp_sc[[lc_i]], 'hM1_subclass') }
gl_MarkerGene <- read.csv(file.path("~/zlliu/R_data/hBLA", "cellType_Markers.csv"), header = F)
f_matchKownMarker <- function(lc_markers_all, lc_markers_kown){ lc_CellType <- lc_markers_kown[match(rownames(lc_markers_all), lc_markers_kown[,1]),2] cbind(lc_markers_all, lc_CellType) }
for (lc_i in 1:length(tp_sc)) { write.csv(f_matchKownMarker(tp_markers[[lc_i]], gl_MarkerGene), sprintf( "%02d_significant_markers_%s.csv", lc_i, names(tp_sc)[lc_i])) }
gl_MarkerGene <- read.csv("~/zlliu/R_data/hBLA/TableS3.csv", header = T)
|