Seurat (三) scRNA-seq数据全流程分析 (一)

第一步 查看数据构成

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
library(RColorBrewer)
library(ggplot2)
blank_theme <- theme_minimal()+
theme(
axis.title.x = element_blank(),
axis.text.x=element_blank(),
axis.title.y = element_blank(),
axis.text.y=element_blank(),
panel.border = element_blank(),
panel.grid=element_blank(),
axis.ticks = element_blank(),
plot.title=element_text(size=14, face="bold",hjust = 0.5)
)
f_pie <- function(lc_x, lc_main){
lc_cols <- brewer.pal(length(lc_x), "Paired")
lc_v <- as.vector(100*lc_x)
lc_percent = sprintf('%0.2f%%',lc_v)
lc_df <- data.frame(type = names(lc_x), nums = lc_v)
lc_df$pos <- with(lc_df, 100-cumsum(nums)+nums/2)
# print(lc_df)
lc_pie <- ggplot(data = lc_df, mapping = aes(x = 1, y = nums, fill = type)) + geom_bar(stat = 'identity')
lc_pie <- lc_pie + coord_polar("y", start=0, direction = 1) + scale_fill_manual(values=lc_cols) + blank_theme
lc_pie <- lc_pie + geom_text(aes(x = 1.3, y=pos,label= lc_percent))
lc_pie <- lc_pie + labs(title = lc_main)
lc_pie
}

options(repr.plot.width=6, repr.plot.height=6)
options(ggrepel.max.overlaps = Inf)
f_pie(prop.table(table(Idents(scRNA))), "Proportion of each brain region")

第二步 QC

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
scRNA[["percent.mt"]] <- PercentageFeatureSet(scRNA, pattern = "^MT-")

options(repr.plot.width=18, repr.plot.height=6)
options(ggrepel.max.overlaps = Inf)
VlnPlot(scRNA,
features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
ncol = 3)
plot1 <- FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1 + plot2

x10s <- SplitObject(scRNA, split.by = 'ident')
# 6.3、分别对各样本进行QC
x10s$BA213 <- subset(x10s$BA213,
subset = nFeature_RNA > 200 & nFeature_RNA < 4000 & percent.mt < 10)
x10s$BA04 <- subset(x10s$BA04,
subset = nFeature_RNA > 200 & nFeature_RNA < 3000 & percent.mt < 10)
x10s$BA09 <- subset(x10s$BA09,
subset = nFeature_RNA > 200 & nFeature_RNA < 3500 & percent.mt < 10)
x10s$BA21 <- subset(x10s$BA21,
subset = nFeature_RNA > 200 & nFeature_RNA < 4500 & percent.mt < 10)
x10s$BA22 <- subset(x10s$BA22,
subset = nFeature_RNA > 200 & nFeature_RNA < 3000 & percent.mt < 10)
x10s$BA39 <- subset(x10s$BA39,
subset = nFeature_RNA > 200 & nFeature_RNA < 4000 & percent.mt < 10)
x10s$BA40 <- subset(x10s$BA40,
subset = nFeature_RNA > 200 & nFeature_RNA < 4000 & percent.mt < 10)
x10s$BA44 <- subset(x10s$BA44,
subset = nFeature_RNA > 200 & nFeature_RNA < 4000 & percent.mt < 10)
x10s$BA45 <- subset(x10s$BA45,
subset = nFeature_RNA > 200 & nFeature_RNA < 5000 & percent.mt < 10)

f_IntegrateData <- function(olist){
# 8、鉴定用于多样本数据整合的anchors
tp_anchors <- FindIntegrationAnchors(object.list = olist, dims = 1:30,
reduction = "cca")
gc()
# 9、整合多样本数据集 注意 内存需求大于16g!!!,不足请设置足够的swap空间再运行(总内存32g是够的)
tp_int <- IntegrateData(anchorset = tp_anchors)
gc()
tp_int
}

scRNA <- f_IntegrateData(x10s)

第三步 PCA降维

1
2
3
lc_all.genes <- rownames(scRNA)
scRNA <- ScaleData(scRNA, features = lc_all.genes)

第四步 UMPA降维

1
2
3
4
scRNA <- RunUMAP(scRNA, dims = 1:pca_dim)
lc_reduction = "umap"
options(repr.plot.width=9, repr.plot.height=9)
DimPlot(scRNA, reduction = lc_reduction, group.by = 'orig.ident', label = T, repel = T, label.size = 6) + labs(title = "UMAP reduction of brain regions")

Seurat (三) scRNA-seq数据全流程分析 (一)
https://b.limour.top/792.html
Author
Limour
Posted on
October 1, 2021
Licensed under