Seurat绘制小提琴图与箱型图的叠加图

第一步 准备测试数据

1
2
3
4
5
6
7
8
library(Seurat)
library(SeuratData)
library(ggplot2)
library(patchwork)
library(dplyr)
InstallData('pbmc3k')
pbmc3k
Idents(pbmc3k) <- pbmc3k[['seurat_annotations']]

第二步 尝试拼图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
require(ggplot2)
require(ggfun)
require(ggplotify)
require(aplot)
require(cowplot)
f_VlnBoxPlot_0 <- function(scRNAo, feature, ...){
vp <- VlnPlot(scRNAo, features = feature, slot = "counts", log = TRUE, ...) + NoLegend() # 获取数据
colnames(vp$data) <- c('GENE', 'ident') # 统一mapping的名称
p <- ggplot(vp$data, aes(x=ident,y=GENE,fill=ident)) + vp$theme # 重构绘图
p <- p + geom_violin() + scale_y_continuous(trans = "log10") # 绘制小提琴图,对y轴进行log10缩放
p <- p + stat_ydensity(trim = TRUE, scale = 'width', adjust = 1) # 绘制小提琴图
p <- p + geom_boxplot(width=0.618) # 绘制箱型图
p <- p + stat_summary(fun="mean",geom="point",color='white') # 添加均值点
p <- p + theme(plot.title=element_blank()) # 删除总标题
p <- p + labs(x=NULL, y=feature) # 删除x轴标题,y标题改成基因名
p <- p + theme(axis.title.y=element_text(vjust=0.5,angle=0)) # y标题垂直居中、水平显示
p <- p + theme(axis.text.x=element_blank()) # 删除x轴刻度
p <- p + theme(panel.background = element_rect(colour="black")) # 添加框线
p <- p + theme(plot.margin = unit(c(0,0,0,0), "cm")) # 尽量删除图外空白
return(p)
}
f_VlnBoxPlot_1 <- function(scRNAo, feature_type, features, isLast, ...){
feature = features[1]
p <- f_VlnBoxPlot_0(scRNAo, feature, ...)
if (length(features) > 1){
for (feature in features[2:length(features)]){
p <- p / f_VlnBoxPlot_0(scRNAo, feature, ...)
}
}
if (isLast){
p <- p + theme(axis.text.x=element_text(hjust=0.5,angle=45)) # 添加x轴坐标
}
p <- as.ggplot(p) + facet_set(feature_type, side = "r") # 添加基因集说明
p <- p + theme(strip.text.y = element_text(size = 15)) # 设置说明文字大小
return(p)
}
f_VlnBoxPlot_2 <- function(markers){
markers <- markers[order(markers$celltype),]
feature_types <- unique(markers$celltype)
res <- list()
for (feature_type in feature_types){
res[[feature_type]] = NULL
}
for (lc_i in 1:nrow(markers)){
feature_type <- markers[lc_i, 'celltype']
feature <- markers[lc_i, 'marker']
res[[feature_type]] <- c(res[[feature_type]], feature)
}
return(res)
}
pushR = function(foo, bar){
foo[[length(foo)+1]] <- bar
foo
}
popR = function(foo){
foo[[length(foo)]]
}
f_VlnBoxPlot <- function(scRNAo, lc_markers, ...){
markers <- f_VlnBoxPlot_2(lc_markers)
feature_types <- names(markers)
last_feature_type <- feature_types[length(feature_types)]
feature_type <- feature_types[1]
p <- f_VlnBoxPlot_1(scRNAo, feature_type, markers[[feature_type]], feature_type==last_feature_type)
# plist <- list()
# plist <- pushR(plist, p)
# p1 <- p # 作为参考标准图
if (length(feature_types) > 1){
for (feature_type in feature_types[2:length(feature_types)]){
p2 <- f_VlnBoxPlot_1(scRNAo, feature_type, markers[[feature_type]], feature_type==last_feature_type)
# plist <- pushR(plist, p2)
# p2 <- p2 + xlim2(p1) # x轴与p1对齐
p <- p / p2
}
}
# p <- as.ggplot(p + theme(axis.text.x=element_text(hjust=0.5,angle=45))) # 添加x轴坐标
# p <- plot_grid(plotlist = plist, ncol = 1, align = "v")
return(p)
}
markers <- read.csv(header = T, text = '
celltype,marker
Naive CD4+ T,IL7R
Naive CD4+ T,CCR7
CD14+ Mono,CD14
CD14+ Mono,LYZ
Memory CD4+,S100A4
B,MS4A1
CD8+ T,CD8A
FCGR3A+ Mono,FCGR3A
FCGR3A+ Mono,MS4A7
NK,GNLY
NK,NKG7
DC,FCER1A
DC,CST3
Platelet,PPBP
')
c <- f_VlnBoxPlot(pbmc3k, markers)
c

第三步 尝试分面

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
f_VlnBoxPlot_18 <- function(features){
tb <- data.frame(table(features))
tb <- tb[tb$Freq > 1,]
return(tb)
}

f_VlnBoxPlot_19 <- function(tmp, features, groupN){
dupl <- f_VlnBoxPlot_18(features)
if(nrow(dupl) < 1){
return(tmp)
}
for(lc_i in 1:nrow(dupl)){
dupfeature <- dupl[lc_i, 'features']
idx <- which(features == dupfeature)
for (lc_j in 2:dupl[lc_i, 'Freq']){
newfeature <- paste0(dupfeature,'.',lc_j)
# features[[idx[lc_j]]] <- newfeature
# tmp[[newfeature]] <- tmp[[dupfeature]]
colnames(tmp)[[idx[lc_j]]] <- newfeature
}
}
# tmp <- tmp[,c(features, groupN)]
return(tmp)
}

f_VlnBoxPlot_20 <- function(scRNAo, features, groupN){
tmp <- log1p(scRNAo[['RNA']]@counts)
# tmp <- scRNAo[['RNA']]@counts
tmp <- as.data.frame(t(as.matrix(tmp[features,])))
types <- scRNAo[[groupN]]
types[[groupN]] <- factor(types[[groupN]]) # 确保是因子
newLevels <- levels(types[[groupN]])
newLevels <- newLevels[order(newLevels)]
types[[groupN]] <- factor(types[[groupN]], levels=newLevels) # 排序
tmp <- cbind(tmp, types)
tmp <- f_VlnBoxPlot_19(tmp, features, groupN)
return(tmp)
}

f_setRowName <- function(lc_df, lc_colName){
lc_df <- lc_df[order(lc_df[[lc_colName]]),]
tp_index <- duplicated(lc_df[[lc_colName]])
lc_df <- lc_df[!tp_index,]
rownames(lc_df) <- lc_df[[lc_colName]]
lc_df
}

f_VlnBoxPlot_25 <- function(markers){
dupl <- f_VlnBoxPlot_18(markers$marker)
if(nrow(dupl) >= 1){
for(lc_i in 1:nrow(dupl)){
dupfeature <- dupl[lc_i, 'features']
idx <- which(markers$marker == dupfeature)
for (lc_j in 2:dupl[lc_i, 'Freq']){
newfeature <- paste0(dupfeature,'.',lc_j)
markers[idx[lc_j], 'marker'] <- newfeature
}
}
}
markers <- f_setRowName(markers, 'marker') # 准备marker到类型的映射
markers[['celltype']] <- factor(markers[['celltype']], ordered = T)
col <- f_VlnBoxPlot_col(length(levels(markers[['celltype']])))
markers[['colour']] <- col[as.numeric(markers[['celltype']])]
return(markers)
}

require(reshape2)
f_VlnBoxPlot_21 <- function(scRNAo, markers, groupN){
data <- f_VlnBoxPlot_20(scRNAo, markers[['marker']], groupN) # 获取宽数据
data <- melt(data, id.vars = groupN) # 宽数据变长数据

markers <- f_VlnBoxPlot_25(markers) # 准备marker到颜色的映射
data[['facet_fill_color']] <- markers[as.character(data[['variable']]), 'colour'] # 进行映射
newLevels <- data[['facet_fill_color']][!duplicated(data[['facet_fill_color']])] # 确保顺序
data[['facet_fill_color']] <- factor(data[['facet_fill_color']], levels=newLevels)

return(data)
}

f_VlnBoxPlot_23 <- function(scRNAo, markers, groupN){
markers <- markers[order(markers$celltype),] # 确保顺序正确
theme_s <- VlnPlot(scRNAo, features = markers[1,'marker'], slot = "counts", log = TRUE, group.by = groupN) # 借用 Seurat 的主题
data <- f_VlnBoxPlot_21(scRNAo, markers, groupN) # 准备好数据
p <- ggplot(data, aes(x=!!sym(groupN), y=value, fill= !!sym(groupN)))
p <- p + theme_s$theme + NoLegend() # 借用 Seurat 的主题
p <- p + geom_violin() # 绘制小提琴图
p <- p + stat_ydensity(trim = TRUE, scale = 'width', adjust = 1) # 绘制小提琴图
p <- p + geom_boxplot(width=0.618) # 绘制箱型图
p <- p + stat_summary(fun="mean",geom="point",color='white') # 添加均值点
p <- p + theme(plot.title=element_blank()) # 删除总标题
p <- p + labs(x=NULL, y=NULL) # 删除xy轴标题
p <- p + scale_y_continuous('', breaks = floor(max(layer_scales(p)$y$range$range))) # 美化y轴刻度
# p <- p + theme(panel.background = element_rect(colour="black")) # 添加框线
p <- p + theme(plot.margin = unit(c(0,0,0,0), "cm")) # 尽量删除图外空白
p <- p + facet_grid(variable ~ .) # 添加分页
nx <- length(unlist(unique(scRNAo[[groupN]])))
p <- p + scale_fill_manual(values=f_VlnBoxPlot_col(nx))
x_r <- layer_scales(p)$x$range$range # 保存x轴刻度
p <- p + theme(axis.text.x=element_blank()) # 删除x轴刻度
p <- f_VlnBoxPlot_26(data, p)
# layer_scales(p)$x$range$range <- x_r
nr <- length(x_r) + 2
p <- p + scale_x_continuous( # 恢复x轴刻度
limits = c(1/nr/2,1-1/nr/2),
breaks = seq(0,1,length.out=nr)[2:(nr-1)],
label = x_r
)
p <- p + theme(axis.text.x=element_text(hjust = 0.5, angle = 45))
return(p)
}

require(treeio)
require(ggplot2)
require(ggtree)
require(tidytree)
# require(aplot)
f_VlnBoxPlot_24 <- function(markers){
markers <- markers[order(markers$celltype),]
# pstandard <- ggplot(markers, aes(y = marker)) # 创建标准y轴顺序
feature_types <- unique(markers$celltype) # 创建根节点
feature_types <- as.data.frame(feature_types)
feature_types[['root']] = ' '
feature_types <- feature_types[, c('root', 'feature_types')]
colnames(feature_types) <- colnames(markers)

y = as.phylo(rbind(feature_types, markers)) # 转换成树

yy = as_tibble(y) %>% # 统一叶子节点的颜色
mutate(cat = ifelse(node %in% parent, 1, parent))
yy$cat[rootnode(y)] = 0

p <- ggtree(as.treedata(yy), ladderize=F, layout='roundrect') + # 通过 ggtree 绘图
geom_nodelab(aes(x=x*0.5, label=label,
fill=factor(cat)), hjust=0.5, geom='text', angle=90) +
geom_tiplab(aes(label=label, fill=factor(cat)), geom='text', angle=45, hjust=0.5) +
scale_y_reverse() +
hexpand(.2) + hexpand(.06, -1) +
theme(legend.position = 'none')
# p <- p + ylim2(pstandard) + scale_y_reverse()
p <- p + theme(plot.margin = unit(c(0,0,0,0), "cm")) # 尽量删除图外空白
return(p)
}

require(ggplotify)
require(gtable)
require(grid)
gtable_select <- function (x, ...)
{
matches <- c(...)
x$layout <- x$layout[matches, , drop = FALSE]
x$grobs <- x$grobs[matches]
x
}
gtable_stack <- function(g1, g2){
g1$grobs <- c(g1$grobs, g2$grobs)
g1$layout <- rbind(g1$layout, g2$layout)
g1
}
f_VlnBoxPlot_26 <- function(dat, p){
p <- p + theme(strip.background=element_blank()) # 去掉背景
dummy <- p # 魔改出facet的填充
dummy$layers <- NULL
dummy <- dummy + geom_rect(data=dat, xmin=-Inf, ymin=-Inf, xmax=Inf, ymax=Inf,
aes(fill = facet_fill_color))
# print(dummy)
g1 <- ggplotGrob(p)
g2 <- ggplotGrob(dummy)
panels <- grepl(pattern="panel", g2$layout$name)
strips <- grepl(pattern="strip-right", g2$layout$name)
g2$grobs[strips] <- replicate(sum(strips), nullGrob(), simplify = FALSE)
g2$layout$l[panels] <- g2$layout$l[panels] + 1
g2$layout$r[panels] <- g2$layout$r[panels] + 2
new_strips <- gtable_select(g2, panels strips)
# print(as.ggplot(new_strips))
new_plot <- gtable_stack(g1, new_strips)
p <- as.ggplot(new_plot)
return(p)
}

require(RColorBrewer)
col_Paired <- colorRampPalette(brewer.pal(12, "Paired"))
f_VlnBoxPlot_col <- function(nx){
if (nx <= 12){
return(brewer.pal(nx, "Paired"))
}else{
return(col_Paired(nx))
}
}

f_VlnBoxPlot <- function(scRNAo, markers, groupN){
p2 <- f_VlnBoxPlot_24(markers)
p <- f_VlnBoxPlot_23(scRNAo, markers , groupN)
p2 + p + plot_layout(ncol = 2, widths = c(1, 5))
}

f_VlnBoxPlot_simple <- function(scRNAo, markers, groupN){
markers <- markers[order(markers$celltype),] # 确保顺序正确
theme_s <- VlnPlot(scRNAo, features = markers[1,'marker'], slot = "counts", log = TRUE, group.by = groupN) # 借用 Seurat 的主题
data <- f_VlnBoxPlot_21(scRNAo, markers, groupN) # 准备好数据
p <- ggplot(data, aes(x=!!sym(groupN), y=value, fill= !!sym(groupN)))
p <- p + theme_s$theme + NoLegend() # 借用 Seurat 的主题
p <- p + geom_violin() # 绘制小提琴图
p <- p + stat_ydensity(trim = TRUE, scale = 'width', adjust = 1) # 绘制小提琴图
p <- p + geom_boxplot(width=0.618) # 绘制箱型图
p <- p + stat_summary(fun="mean",geom="point",color='white') # 添加均值点
p <- p + theme(plot.title=element_blank()) # 删除总标题
p <- p + labs(x=NULL, y=NULL) # 删除xy轴标题
p <- p + scale_y_continuous('', breaks = floor(max(layer_scales(p)$y$range$range))) # 美化y轴刻度
# p <- p + theme(panel.background = element_rect(colour="black")) # 添加框线
p <- p + theme(plot.margin = unit(c(0,0,0,0), "cm")) # 尽量删除图外空白
p <- p + facet_grid(variable ~ .) # 添加分页
nx <- length(unlist(unique(scRNAo[[groupN]])))
p <- p + scale_fill_manual(values=f_VlnBoxPlot_col(nx))
p <- f_VlnBoxPlot_26(data, p)
return(p)
}

第四步 最终效果展示

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
markers <- read.csv(header = T, text = '
celltype,marker
Naive CD4+ T,IL7R
Naive CD4+ T,CCR7
CD14+ Mono,CD14
CD14+ Mono,LYZ
Memory CD4+,IL7R
Memory CD4+,S100A4
B,MS4A1
CD8+ T,CD8A
FCGR3A+ Mono,FCGR3A
FCGR3A+ Mono,MS4A7
NK,GNLY
NK,NKG7
DC,FCER1A
DC,CST3
Platelet,PPBP
')

f_VlnBoxPlot(pbmc3k, markers , 'seurat_annotations')


Seurat绘制小提琴图与箱型图的叠加图
https://b.limour.top/1428.html
Author
Limour
Posted on
January 19, 2022
Licensed under