SingleR (二) 使用参考数据集预测分群

第一步构建 21.09.20.SingleR.R

# 0、加载程辑包
library(Seurat)
library(dplyr)
library(patchwork)

# 加载 SingleR
library(SingleR)
library(SummarizedExperiment)
library(scater)
library(BiocParallel) # 并行计算加速

# 配置数据路径
root_path = "~/zlliu/R_data/21.07.15.IntegrateData"

# 配置结果保存路径
output_path = "~/zlliu/R_output/21.09.20.SingleR"
if (!file.exists(output_path)){dir.create(output_path)}

# 设置工作目录，输出文件将保存在此目录下
setwd(output_path)
getwd()

# 1、读取数据和参考数据集
scRNAs = readRDS(file.path(root_path, 'hBLA_scRNAs.rds'))
if (!("hM1.se" %in% ls())){
    hM1.se <- readRDS("/home/rqzhang/zlliu/R_data/human_M1_10x/hM1.se.rds")
}
if (!("hmca" %in% ls())){
    hmca <- readRDS("/home/rqzhang/zlliu/R_data/Human_Multiple_Cortical_Areas_SMART-seq/hmca.rds")
}

# 1.1、裁剪数据集
f_noNull <- function(lc_se, lc_col){
    if(any(lc_se$meta["sample_name"] != colnames(lc_se))){
        return
    }
    lc_idx = which(lc_se$meta[lc_col] == "")
    lc_res = lc_se[, -lc_idx]
    lc_res$meta = lc_se$meta[-lc_idx,]
    lc_res
}

hmca <- f_noNull(hmca, "subclass_label")

# 2、构造预测方法
f_get_pred <- function(scRNA, lc_i){
    
    # 如果已经计算过了，就不再重复计算，直接返回上一次的结果
    if(file.exists(sprintf( "%02d_hM1_subclass.txt", lc_i))){
        result_main_hpca <- read.table(sprintf( "%02d_hM1_subclass.txt", lc_i), stringsAsFactors = F)
        return (result_main_hpca)
    }
    
    # 导出原始counts矩阵
    test.count=as.data.frame(scRNA[["RNA"]]@counts)
    
    # 保留共同基因
    common_hpca <- intersect(rownames(test.count), c(rownames(hM1.se), rownames(hmca)))
    test.count_forhpca <- test.count[common_hpca,]
    tp_idx = na.omit(match(common_hpca, rownames(hM1.se)))
    lc_hM1.se <- hM1.se[rownames(hM1.se)[tp_idx], ]
    tp_idx = na.omit(match(common_hpca, rownames(hmca)))
    lc_hmca <- hmca[rownames(hmca)[tp_idx], ]

    gc()
    
    # 生成test数据集
    test.count_forhpca.se <- SummarizedExperiment(assays=list(counts=test.count_forhpca))
    test.count_forhpca.se <- logNormCounts(test.count_forhpca.se)
    
    # 进行分类预测 大概一小时
    pred.main.hpca <- SingleR(test = test.count_forhpca.se, ref = list(m1=lc_hM1.se, mca=lc_hmca), labels = list(hM1.se$meta$subclass_label, hmca$meta$subclass_label), BPPARAM=MulticoreParam(32)) # 32CPUs

    gc()
    
    #构造返回结果
    result_main_hpca <- as.data.frame(pred.main.hpca$labels)
    result_main_hpca$CB <- rownames(pred.main.hpca)
    colnames(result_main_hpca) <- c('hM1_subclass', 'CB')
    write.table(result_main_hpca, sprintf( "%02d_hM1_subclass.txt", lc_i)) #保存下来，方便以后调用
    result_main_hpca
}

# 3、进行预测
tp_sc <- scRNAs
# rm(x10s)
gc()
tp_sc_st <- 1
tp_sc_len <- length(tp_sc)
for (lc_i in tp_sc_st:tp_sc_len) {
    f_get_pred(tp_sc[[lc_i]], lc_i)
}

# 4、读取预测信息的函数
f_set_pred <- function(scRNA, result_main_hpca){
    scRNA@meta.data$CB <- rownames(scRNA@meta.data)
#     print(scRNA@meta.data$CB)
    scRNA@meta.data=merge(scRNA@meta.data,result_main_hpca,by="CB")
#     print(result_main_hpca$CB)
    rownames(scRNA@meta.data)=scRNA@meta.data$CB
    scRNA
}

for (lc_i in tp_sc_st:tp_sc_len) {
    tp_sc[[lc_i]] <- f_set_pred(tp_sc[[lc_i]], f_get_pred(tp_sc[[lc_i]], lc_i))
}

# 5、查找marker
f_findMarkers <- function(lc_scRNA, lc_group.by){
    Idents(lc_scRNA) <- lc_scRNA@meta.data[,lc_group.by]
    lc_markers <- FindAllMarkers(lc_scRNA, only.pos = TRUE, group.by = lc_group.by, min.pct = 0.25, logfc.threshold = 0.25)
    lc_markers <- subset(lc_markers, subset = p_val_adj<0.05)
    lc_markers
}

tp_markers  <- list()
for (lc_i in 1:length(tp_sc)) {
    tp_markers[[lc_i]] <- f_findMarkers(tp_sc[[lc_i]], 'hM1_subclass')
}

gl_MarkerGene <- read.csv(file.path("~/zlliu/R_data/hBLA", "cellType_Markers.csv"), header = F)

f_matchKownMarker <- function(lc_markers_all, lc_markers_kown){
    lc_CellType <- lc_markers_kown[match(rownames(lc_markers_all), lc_markers_kown[,1]),2]
    cbind(lc_markers_all, lc_CellType)
}

for (lc_i in 1:length(tp_sc)) {
    write.csv(f_matchKownMarker(tp_markers[[lc_i]], gl_MarkerGene), sprintf( "%02d_significant_markers_%s.csv", lc_i, names(tp_sc)[lc_i]))
}

gl_MarkerGene <- read.csv("~/zlliu/R_data/hBLA/TableS3.csv", header = T)

第二步构建 21.09.20.SingleR.pbs 并提交

#!/bin/bash
#PBS -q fat
#PBS -V
#PBS -o /home/rqzhang/zlliu/PBS/SingleR/SingleR.out
#PBS -e /home/rqzhang/zlliu/PBS/SingleR/SingleR.err
#PBS -l nodes=1:ppn=32
#PBS -r y
cd /home/rqzhang
Rscript /home/rqzhang/zlliu/PBS/SingleR/21.07.15.10x.hM1.se.R
echo $HOME

提交job：qsub /home/rqzhang/zlliu/PBS/SingleR/21.09.20.SingleR.pbs
查看所有任务：qstat

生物信息学 Seurat教程

SingleR (二) 使用参考数据集预测分群

https://b.limour.top/678.html

Author

Limour

Posted on

September 20, 2021

Licensed under

SingleR (三) 分群结果可视化 Previous

SingleR (一) 参考数据集的构建方法 Next

SingleR (二) 使用参考数据集预测分群

第一步 构建 21.09.20.SingleR.R

第二步 构建 21.09.20.SingleR.pbs 并提交

第一步构建 21.09.20.SingleR.R

第二步构建 21.09.20.SingleR.pbs 并提交