第一步 下载基因长度信息
https://file-cdn.limour.top/bix/22.02.18.tcga_rowRanges.csv.gz
第二步 读取基因长度信息
1
   | grg <- read.csv('22.02.18.tcga_rowRanges.csv')
 
  | 
 
第三步 构建counts转fpkm函数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
   | f_counts2fpkm <- function(ct, grg, gidN='ensembl_gene_id', rowN=NULL){     if(is.null(rowN)){         rowN <- rownames(ct)     }     grg <- grg[c(gidN, 'width')]     grg['width'] <- grg['width']/1000      idx <- match(rowN, grg[[1]])     idx_f <- !is.na(idx)      ct <- ct[idx_f,]      idx <- idx[idx_f]     grg <- grg[idx,]      lc_rpk <- ct/grg[[2]]     lc_fpkm <- t(t(lc_rpk)/colSums(ct) * 10^6)     lc_fpkm }
 
  | 
 
第四步 计算并转换基因名
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
   | f_id2name <- function(lc_exp, lc_db){     if(!is.data.frame(lc_db)){         lc_ids <- toTable(lc_db)     }else{         lc_ids <- lc_db     }     res <- lc_exp[rownames(lc_exp) %in% lc_ids[[1]],]     lc_ids=lc_ids[match(rownames(res),lc_ids[[1]]),]     lc_tmp = by(res,          lc_ids[[2]],          function(x) rownames(x)[which.max(rowMeans(x))])     lc_probes = as.character(lc_tmp)     res = res[rownames(res) %in% lc_probes,]     rownames(res)=lc_ids[match(rownames(res),lc_ids[[1]]),2]     res }
 
  | 
 
1 2
   | d2 <- f_counts2fpkm(d, grg) d2 <- f_id2name(d2, grg[c('ensembl_gene_id', 'external_gene_name')])
 
  |