Seurat (四) 差异基因分析

第一步 火山图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
Idents(sc_Neuron) <- sc_Neuron[['hmca_class']]
all_markers <- FindAllMarkers(sc_Neuron, min.pct = 0.25, logfc.threshold = 0.25)
significant_markers <- subset(all_markers, subset = p_val_adj<0.05)

f_DEG_Volcano <- function(lc_logFC, lc_p, lc_gene, Threshold_logFC = 1, Threshold_p = 0.05, lc_rep=1:10){
col_vector = rep(rgb(108, 200, 228, maxColorValue = 255), length(lc_logFC))
col_vector[lc_p < Threshold_p & lc_logFC > Threshold_logFC] = rgb(226, 61, 75, maxColorValue = 255)
col_vector[lc_p < Threshold_p & lc_logFC < -Threshold_logFC] = rgb(232, 168, 71, maxColorValue = 255)
lc_p[lc_p < 1e-10] = 1e-10
lc_p[lc_p > 1 is.na(lc_p)] = 1
df = data.frame(logFC <- lc_logFC, `-log10(P)` <- -log10(lc_p), col <- col_vector, gene <- lc_gene)
colnames(df) <- c('logFC', '-log10(P)', "col", "gene")
lc_tp_logFC <- df$logFC
lc_tp_logFC[lc_p>=Threshold_p] = 0
lc_idx <- order(lc_tp_logFC)[c(lc_rep, length(lc_gene)+1-lc_rep)]
df$logFC[df$logFC > 10] = 10
df$logFC[df$logFC < -10] = -10
res <- ggplot() + geom_point(aes(logFC, `-log10(P)`, col=I(col)), data = df)
res <- res + theme_bw() + theme(panel.grid=element_line(colour=NA))
res <- res + geom_hline(yintercept=-log10(Threshold_p), linetype="longdash")
res <- res + geom_vline(xintercept=c(Threshold_logFC, -Threshold_logFC), linetype="longdash")
res <- res + geom_text_repel(data=df[lc_idx,],aes(logFC,`-log10(P)`,label=gene), force=T, max.overlaps=Inf)
res
}

第二步 热图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
f_cluster_averages <- function(lc_scRNA, lc_metaN='ident'){
# 切分出Clusters
lc_clusters <- SplitObject(lc_scRNA, split.by = lc_metaN)
for (lc_i in 1:length(lc_clusters)){
lc_clusters[[lc_i]] <- lc_clusters[[lc_i]][[lc_clusters[[lc_i]]@active.assay]]@scale.data
}
for (lc_i in 1:length(lc_clusters)){
lc_clusters[[lc_i]] <- apply(lc_clusters[[lc_i]],1,mean)
}
lc_clusters <- data.frame(lc_clusters)
scale(lc_clusters)
}

library(clusterProfiler)
library(pheatmap)
library(ggdendro)
f_DEG_hclust <- function(lc_counts){
ggdendrogram(hclust(dist(t(lc_counts))), rotate = T, size = 3)+theme(axis.text = element_text(size=14,face = "bold"))
}

tp_d <- f_cluster_averages(sc_Neuron, "orig.ident")
f_DEG_hclust(tp_d)

f_DEG_pheatmap_choose_matrix <- function(lc_tp_d, lc_significant_markers, lc_n = 120){
res <- subset(lc_significant_markers, abs(avg_log2FC) > 1)
res <- res[order(abs(res$avg_log2FC), decreasing = T),]
res <- head(unique(res$gene), n = lc_n)
res <- lc_tp_d[res,]
res
}

Idents(sc_Neuron) <- sc_Neuron[['orig.ident']]
all_markers <- FindAllMarkers(sc_Neuron, min.pct = 0.25, logfc.threshold = 0.25)
significant_markers <- subset(all_markers, subset = p_val_adj<0.05)

f_DEG_Volcano(all_markers$avg_log2FC, all_markers$p_val_adj, all_markers$gene)

第三步 一些奇怪的图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
f_br_cluster <- function(sObject, lc_groupN, lc_labelN, lc_prop = F){
lc_all <- unique(sObject[[lc_labelN]])
rownames(lc_all) <- lc_all[[1]]
colnames(lc_all) <- "CB"
lc_tp <- SplitObject(sObject, split.by = lc_groupN)
for(lc_i in 1:length(lc_tp)){
if(lc_prop){
lc_tp[[lc_i]] <- prop.table(table(lc_tp[[lc_i]][[lc_labelN]]))
}else{
lc_tp[[lc_i]] <- table(lc_tp[[lc_i]][[lc_labelN]])
}
}
for(lc_name in names(lc_tp)){
lc_all[[lc_name]] = 0
lc_all[names(lc_tp[[lc_name]]), lc_name] = lc_tp[[lc_name]]
}
lc_all[,-1]
}

f_q2l <- function(lc_q){
res <- NULL
for(lc_c in colnames(lc_q)){
for(lc_r in rownames(lc_q)){
res <- rbind(res, c(group=lc_c, label=lc_r, value=lc_q[lc_r,lc_c]))
}
}
res <- data.frame(res)
res$value = as.numeric(res$value)
res
}

library(RColorBrewer)
library(ggplot2)
col_Paired <- colorRampPalette(brewer.pal(12, "Paired"))
f_q_frequnency <- function(lc_q){
ggplot(f_q2l(lc_q),mapping = aes(group,value,fill=label))+
geom_bar(stat='identity',position='fill') + scale_fill_manual(values= col_Paired(nrow(lc_q)))+
labs(x = 'group',y = 'frequnency') +
theme(axis.title =element_text(size = 16),axis.text =element_text(size = 14, color = 'black'))+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+coord_flip()
}

options(repr.plot.width=9, repr.plot.height=5)
tp_test <- f_br_cluster(sc_Neuron, 'orig.ident', 'hmca_class')
f_q_frequnency(tp_test)
friedman.test(as.matrix(tp_test))
chisq.test(as.matrix(tp_test), simulate.p.value = TRUE)
fisher.test(as.matrix(tp_test), simulate.p.value = TRUE)

f_DoHeatmap <- function(sObject, lc_significant_markers, lc_groupN){
lc_significant_markers %>%
group_by(cluster) %>%
top_n(n = 10, wt = abs(avg_log2FC)) -> top10
DoHeatmap(sc_Neuron, group.by = lc_groupN, features = top10$gene) + scale_fill_gradientn(colors = c('#440154', '#25848D', "yellow"))
}


Seurat (四) 差异基因分析
https://b.limour.top/873.html
Author
Limour
Posted on
October 3, 2021
Licensed under