Seurat (九) hclust聚类分析 (二) by 细胞比例

第一步 读入数据

Seurat (十) 同群细胞在不同脑区的DEGs (二)第二步

第二步 构造相关计算函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
f_ggplot2_ti <- function(p, title, subtitle=''){
if (subtitle == ''){
(p + ggtitle(title) + theme(plot.title = element_text(hjust = 0.5)))
}else{
(p + ggtitle(title, subtitle=subtitle) +
theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5)))
}
}

library(ggdendro)
f_ggdendrogram <- function(hc){
ggdendrogram(hc$h, rotate = T, size = 3)+theme(axis.text = element_text(size=14,face = "bold"))
}

f_hc <- function(lc_counts){
d <- dist(t(lc_counts))
h <- hclust(d)
list(d=d, h=h)
}

f_hc_cop <- function(hc){
cop <- cophenetic(hc$h)
cor(cop, hc$d)
}


f_cluster_averages <- function(lc_scRNA, lc_metaN='ident'){
# 切分出Clusters
lc_clusters <- SplitObject(lc_scRNA, split.by = lc_metaN)
for (lc_i in 1:length(lc_clusters)){
lc_clusters[[lc_i]] <- lc_clusters[[lc_i]][[lc_clusters[[lc_i]]@active.assay]]@scale.data
}
for (lc_i in 1:length(lc_clusters)){
lc_clusters[[lc_i]] <- apply(lc_clusters[[lc_i]],1,mean)
}
lc_clusters <- data.frame(lc_clusters)
scale(lc_clusters)
}

f_br_cluster_f <- function(sObject, lc_groupN){
lc_filter <- unlist(unique(sObject[[lc_groupN]]))
lc_filter <- lc_filter[!is.na(lc_filter)]
lc_filter
}

f_br_cluster <- function(sObject, lc_groupN, lc_labelN, lc_prop = F){
lc_all <- unique(sObject[[lc_labelN]])
rownames(lc_all) <- lc_all[[1]]
colnames(lc_all) <- "CB"
# lc_tp <- SplitObject(subset(x = sObject, !!sym(lc_groupN)%in%f_br_cluster_f(sObject, lc_groupN)), split.by = lc_groupN)
lc_tp <- SplitObject(sObject, split.by = lc_groupN)
for(lc_i in 1:length(lc_tp)){
if(lc_prop){
lc_tp[[lc_i]] <- prop.table(table(lc_tp[[lc_i]][[lc_labelN]]))
}else{
lc_tp[[lc_i]] <- table(lc_tp[[lc_i]][[lc_labelN]])
}
}
for(lc_name in names(lc_tp)){
lc_all[[lc_name]] = 0
lc_all[names(lc_tp[[lc_name]]), lc_name] = lc_tp[[lc_name]]
}
lc_all[,-1]
}
  • 相关性验证的代码
1
2
3
4
5
6
7
library(Hmisc)
library(ggcorrplot)
f_corrplot <- function(lc_scdata){
lc_cor <- rcorr(lc_scdata)
lc_cor$P[is.na(lc_cor$P)]=0
ggcorrplot(lc_cor$r, type="full",hc.order = T, lab = T, p.mat = lc_cor$P)
}

第三步 (可选)筛选感兴趣的细胞群

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
n_ExN <- c('L4 IT','L5 IT','L5 ET','IT','L6b','L5/6 IT Car3','L6 IT','L2/3 IT','L5/6 NP','L6 IT Car3','L6 CT')

n_InN <- c('Lamp5','Pvalb','Sst','Vip','Sncg')

n_NoN <- c('Astro','PAX6','Endo','Micro-PVM','OPC','Oligo','Pericyte','VLMC')

n_groups <- list(NoN=n_NoN, ExN=n_ExN, InN=n_InN)

f_listUpdateRe <- function(lc_obj, lc_bool, lc_item){
lc_obj[lc_bool] <- rep(lc_item,times=sum(lc_bool))
lc_obj
}

f_grouplabel <- function(lc_meta.data, lc_groups){
res <- lc_meta.data[[1]]
for(lc_g in names(lc_groups)){
lc_bool = (res %in% lc_groups[[lc_g]])
for(c_n in colnames(lc_meta.data)){
lc_bool = lc_bool (lc_meta.data[[c_n]] %in% lc_groups[[lc_g]])
}
res <- f_listUpdateRe(res, lc_bool, lc_g)
}
names(res) <- rownames(lc_meta.data)
res
}

scRNA_split[['n_groups']] <- f_grouplabel(scRNA_split[[c("hM1_hmca_class")]], n_groups)

sc_Neuron <- subset(x = scRNA_split, n_groups %in% c("InN", "ExN"))

第四步 进行计算

  • by gene
1
2
3
4
5
6
7
8
9
10
11
12
test_N <- SplitObject(sc_Neuron, split.by = 'orig.ident')
test_N[['all']] <- sc_Neuron

listPN <- list()
for (n in names(test_N)){
test = f_cluster_averages(test_N[[n]], "Region")
hc <- f_hc(test)
cop <- paste(n, hc %>% f_hc_cop())
print(cop)
listPN[[n]] <- (f_hc(test) %>% f_ggdendrogram() %>% f_ggplot2_ti(paste0('hclust by gene in ', n), subtitle = cop)) +
(f_corrplot(test) %>% f_ggplot2_ti(paste0('cor by gene in ', n)))
}
  • by proportion of cells
1
2
3
4
5
6
7
8
9
10
11
12
test_s <- SplitObject(scRNA_split, split.by = 'orig.ident')
test_s[['all']] <- scRNA_split

listP <- list()
for (n in names(test_s)){
test = as.matrix(f_br_cluster(test_s[[n]], 'Region', 'hM1_hmca_class', lc_prop = T))
hc <- f_hc(test)
cop <- paste(n, hc %>% f_hc_cop())
print(cop)
listP[[n]] <- (f_hc(test) %>% f_ggdendrogram() %>% f_ggplot2_ti(paste0('hclust by proportion of cells in ', n), subtitle = cop)) +
(f_corrplot(test) %>% f_ggplot2_ti(paste0('cor by proportion of cells of ', n)))
}

第五步 输出计算结果

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# 配置数据和mark基因表的路径
root_path = "~/zlliu/R_data/hBLA"

# 配置结果保存路径
output_path = "~/zlliu/R_output/21.12.17.pcell_hclust"
if (!file.exists(output_path)){dir.create(output_path)}

# 设置工作目录,输出文件将保存在此目录下
setwd(output_path)
getwd()

library(export)
f_ppt_output <- function(fileName, image, lc_aspectr=1.5){
for (p in image){
graph2ppt(x=p, file=fileName, width=12, aspectr=lc_aspectr, append = TRUE, vector.graphic=F)
}
}

f_ppt_output('split.pptx', listP, lc_aspectr = 2)
f_ppt_output('sc_Neuron.pptx', listPN, lc_aspectr = 2)

Seurat (九) hclust聚类分析 (二) by 细胞比例
https://b.limour.top/1352.html
Author
Limour
Posted on
December 17, 2021
Licensed under