035 Rna Seq差异分析结果gsekegg可视化

目的

在RNA-seq数据分析中,差异分析结果的GSE(Gene Set Enrichment)和KEGG(Kyoto Encyclopedia of Genes and Genomes)可视化,是通过对差异表达基因(DEGs,Differentially Expressed Genes)的功能富集分析和通路分析结果进行直观展示的过程。

具体代码

remove(list = ls()) ##清空当前环境
setwd("D:/生信代码复现/RNA-seq差异分析结果gseKEGG可视化") ##💚💚💚设置路径

library(clusterProfiler)
library(ggplot2)
Sys.setenv(LANGUAGE="en")#显示英文报错信息
options(stringsAsFactors=FALSE)#禁止chr转成factor
#library(org.Hs.eg.db)

do_gseKEGG<-function(data,
                       gene_col="Gene",
                       value_col="Log2FoldChange",
                       OrgDb="org.Hs.eg.db",
                       organism="hsa",
                       pvalueCutoff=1,
                       pAdjustMethod="BH"
){
  data<-data[,c(gene_col,value_col)]
  data<-data[order(data[[value_col]],decreasing=TRUE),]
  colnames(data)<-c("SYMBOL","logFC")
  
  gsym.id<-bitr(data$SYMBOL,fromType="SYMBOL",toType="ENTREZID",OrgDb=OrgDb)
  gsym.fc.id<-merge(data,gsym.id,by="SYMBOL",all=F)
  
  #按照foldchange排序
  gsym.fc.id.sorted<-gsym.fc.id[order(gsym.fc.id$logFC,decreasing=T),]
  
  #获得ENTREZID、foldchange列表,做为GSEA的输入
  id.fc<-gsym.fc.id.sorted$logFC
  names(id.fc)<-gsym.fc.id.sorted$ENTREZID
  
  res<-gseKEGG(
    id.fc,#根据logFC排序的基因集
    organism=organism,#人的拉丁名缩写
    pvalueCutoff=pvalueCutoff,
    pAdjustMethod=pAdjustMethod
  )
  
  #把ENTREZID转为genesymbol,便于查看通路里的基因
  kk.gsym<-setReadable(res,OrgDb,#物种
                         'ENTREZID')
  #按照enrichmentscore从高到低排序,便于查看富集的通路
  #kk.gsym<-kk.gsym[order(kk.gsym$NES,decreasing=F),]
  #sortkk<-kk.gsym[reorder(kk.gsym$p.adjust,decreasing=TRUE),]
  
  return(kk.gsym)
}

gseplot1<-function(data,xlabel="NESofTreat/ControlgseKEGG"){
  p<-ggplot(data,aes(x=NES,y=reorder(Description,NES,decreasing=FALSE),color=p.adjust))+
    geom_segment(aes(x=0,xend=NES,y=reorder(Description,NES,decreasing=FALSE),yend=Description),size=1)+
    geom_point(size=7,shape=16,aes(color=p.adjust))+
    geom_text(aes(x=0,y=reorder(Description,NES,decreasing=FALSE),label=Description),
              size=5,
              hjust=0.5,#Updatedhjust
              vjust=-0.6,
              size=3,
    )+
    #geom_text_repel(data=df_label,aes(NES,Description,label=label))+
    scale_colour_gradientn(colours=c("dodgerblue1","#44c1f0","lightgoldenrod","#e20612",'#cc340c'),trans="reverse")+
    #scale_color_viridis_c(trans="reverse")+
    theme_minimal()+
    
    labs(x=xlabel,y="",color="p.adjust")+
    theme(axis.text.x=element_text(size=13),
          axis.text.y=element_blank(),#去掉y轴标签
          plot.margin=margin(1,1,1,1,"cm"))#调整图形的左边距
  
  
  return(p)
}

gseplot2<-function(data,xlabel="NESofTreat/ControlgseKEGG"){
  p<-ggplot(data,aes(x=NES,y=reorder(Description,NES,decreasing=FALSE),color=p.adjust))+
    geom_segment(aes(x=0,xend=NES,y=reorder(Description,NES,decreasing=FALSE),yend=Description),size=1)+
    geom_point(size=7,shape=16,aes(color=p.adjust))+
    geom_text(aes(x=0,y=reorder(Description,NES,decreasing=FALSE),label=Description),
              size=5,hjust=ifelse(data$NES<=0,0,1),vjust=0.5,
              #angle=ifelse(df$NES<=0,0,180)
    )+
    #geom_text_repel(data=df_label,aes(NES,Description,label=label))+
    scale_colour_gradientn(colours=c("dodgerblue1","#44c1f0","lightgoldenrod","#e20612",'#cc340c'),trans="reverse")+
    #scale_color_viridis_c(trans="reverse")+
    theme_minimal()+
    labs(x=xlabel,y="",color="p.adjust",size=10)+
    theme(axis.text.x=element_text(size=15),
          axis.text.y=element_blank(),#去掉y轴标签
          plot.margin=margin(1,1,1,1,"cm"))#调整图形的左边距
  
  return(p)
}
#####下面的代码是读取RNA-seq差异表达数据
df<-read.csv("treat_control.DE.xls",header=TRUE,sep="\t")  
gse_kk<-do_gseKEGG(df)  

#按照NES排序  
kk.gsym_sort<-gse_kk[order(gse_kk$NES,decreasing=F),]  
#获取排名前6的观测值  
top_6<-kk.gsym_sort[1:6,]  
#获取排名末尾6的观测值  
bottom_6<-kk.gsym_sort[(nrow(kk.gsym_sort)-5):nrow(kk.gsym_sort),]  
df_top<-rbind(top_6,bottom_6)  
#write.table(gse_kk,"all.gseKEGG.tsv",sep="\t")
#######下面的代码是做gseKEGG富集分析
df<-read.csv("treat_control.DE.xls",header=TRUE,sep="\t")
gse_kk<-do_gseKEGG(df)

#按照NES排序
kk.gsym_sort<-gse_kk[order(gse_kk$NES,decreasing=F),]
#获取排名前6的观测值
top_6<-kk.gsym_sort[1:6,]
#获取排名末尾6的观测值
bottom_6<-kk.gsym_sort[(nrow(kk.gsym_sort)-5):nrow(kk.gsym_sort),]
df_top<-rbind(top_6,bottom_6)
#write.table(gse_kk,"all.gseKEGG.tsv",sep="\t")

######下面的代码是进行可视化
p<-gseplot1(df_top)  
options(repr.plot.width=9,repr.plot.height=6)  
p  
#ggsave("abs_top6.allGeneFC.gseKEGG.pdf",p,w=9,h=6)
p<-gseplot2(df_top)
p


#####下面的代码是对单个的KEGG信号通路的基因富集做可视化
#devtools::install_github('junjunlab/GseaVis')
library(GseaVis)

p<-gseaNb(object=gse_kk,
            geneSetID='hsa04978',
            markTopgene=TRUE,
            kegg=T,
            addGene=c("ATP1A2","SLC9A3"),
            addPval=T,
            pvalX=0.55,pvalY=0.8,
            pCol='black',
            pHjust=0,subPlot=2)

p

# 生成图
p <- gseplot1(df_top)

# 保存图像为 PDF
output_file1 <- "gseplot1_top6_bottom6.pdf"  # 保存文件名
ggsave(output_file1, plot = p, width = 9, height = 6)

# 生成图
p <- gseplot2(df_top)

# 保存图像为 PDF
output_file2 <- "gseplot2_top6_bottom6.pdf"  # 保存文件名
ggsave(output_file2, plot = p, width = 9, height = 6)

# 生成图
p <- gseaNb(object = gse_kk,
            geneSetID = 'hsa04978',
            markTopgene = TRUE,
            kegg = TRUE,
            addGene = c("ATP1A2", "SLC9A3"),
            addPval = TRUE,
            pvalX = 0.55, pvalY = 0.8,
            pCol = 'black',
            pHjust = 0, subPlot = 2)

# 保存图像为 PDF
output_file3 <- "gseaNb_hsa04978.pdf"  # 保存文件名
ggsave(output_file3, plot = p, width = 10, height = 8)






图片

图片

图片

最终出图

  • 一幅点图(排名前后6条通路,gseplot1)。
  • 一幅类似的点图(不同布局,gseplot2)。
  • 一幅单通路的富集分析图(gseaNb)。