目的
在RNA-seq数据分析中,差异分析结果的GSE(Gene Set Enrichment)和KEGG(Kyoto Encyclopedia of Genes and Genomes)可视化,是通过对差异表达基因(DEGs,Differentially Expressed Genes)的功能富集分析和通路分析结果进行直观展示的过程。
具体代码
remove(list = ls()) ##清空当前环境
setwd("D:/生信代码复现/RNA-seq差异分析结果gseKEGG可视化") ##💚💚💚设置路径
library(clusterProfiler)
library(ggplot2)
Sys.setenv(LANGUAGE="en")#显示英文报错信息
options(stringsAsFactors=FALSE)#禁止chr转成factor
#library(org.Hs.eg.db)
do_gseKEGG<-function(data,
gene_col="Gene",
value_col="Log2FoldChange",
OrgDb="org.Hs.eg.db",
organism="hsa",
pvalueCutoff=1,
pAdjustMethod="BH"
){
data<-data[,c(gene_col,value_col)]
data<-data[order(data[[value_col]],decreasing=TRUE),]
colnames(data)<-c("SYMBOL","logFC")
gsym.id<-bitr(data$SYMBOL,fromType="SYMBOL",toType="ENTREZID",OrgDb=OrgDb)
gsym.fc.id<-merge(data,gsym.id,by="SYMBOL",all=F)
#按照foldchange排序
gsym.fc.id.sorted<-gsym.fc.id[order(gsym.fc.id$logFC,decreasing=T),]
#获得ENTREZID、foldchange列表,做为GSEA的输入
id.fc<-gsym.fc.id.sorted$logFC
names(id.fc)<-gsym.fc.id.sorted$ENTREZID
res<-gseKEGG(
id.fc,#根据logFC排序的基因集
organism=organism,#人的拉丁名缩写
pvalueCutoff=pvalueCutoff,
pAdjustMethod=pAdjustMethod
)
#把ENTREZID转为genesymbol,便于查看通路里的基因
kk.gsym<-setReadable(res,OrgDb,#物种
'ENTREZID')
#按照enrichmentscore从高到低排序,便于查看富集的通路
#kk.gsym<-kk.gsym[order(kk.gsym$NES,decreasing=F),]
#sortkk<-kk.gsym[reorder(kk.gsym$p.adjust,decreasing=TRUE),]
return(kk.gsym)
}
gseplot1<-function(data,xlabel="NESofTreat/ControlgseKEGG"){
p<-ggplot(data,aes(x=NES,y=reorder(Description,NES,decreasing=FALSE),color=p.adjust))+
geom_segment(aes(x=0,xend=NES,y=reorder(Description,NES,decreasing=FALSE),yend=Description),size=1)+
geom_point(size=7,shape=16,aes(color=p.adjust))+
geom_text(aes(x=0,y=reorder(Description,NES,decreasing=FALSE),label=Description),
size=5,
hjust=0.5,#Updatedhjust
vjust=-0.6,
size=3,
)+
#geom_text_repel(data=df_label,aes(NES,Description,label=label))+
scale_colour_gradientn(colours=c("dodgerblue1","#44c1f0","lightgoldenrod","#e20612",'#cc340c'),trans="reverse")+
#scale_color_viridis_c(trans="reverse")+
theme_minimal()+
labs(x=xlabel,y="",color="p.adjust")+
theme(axis.text.x=element_text(size=13),
axis.text.y=element_blank(),#去掉y轴标签
plot.margin=margin(1,1,1,1,"cm"))#调整图形的左边距
return(p)
}
gseplot2<-function(data,xlabel="NESofTreat/ControlgseKEGG"){
p<-ggplot(data,aes(x=NES,y=reorder(Description,NES,decreasing=FALSE),color=p.adjust))+
geom_segment(aes(x=0,xend=NES,y=reorder(Description,NES,decreasing=FALSE),yend=Description),size=1)+
geom_point(size=7,shape=16,aes(color=p.adjust))+
geom_text(aes(x=0,y=reorder(Description,NES,decreasing=FALSE),label=Description),
size=5,hjust=ifelse(data$NES<=0,0,1),vjust=0.5,
#angle=ifelse(df$NES<=0,0,180)
)+
#geom_text_repel(data=df_label,aes(NES,Description,label=label))+
scale_colour_gradientn(colours=c("dodgerblue1","#44c1f0","lightgoldenrod","#e20612",'#cc340c'),trans="reverse")+
#scale_color_viridis_c(trans="reverse")+
theme_minimal()+
labs(x=xlabel,y="",color="p.adjust",size=10)+
theme(axis.text.x=element_text(size=15),
axis.text.y=element_blank(),#去掉y轴标签
plot.margin=margin(1,1,1,1,"cm"))#调整图形的左边距
return(p)
}
#####下面的代码是读取RNA-seq差异表达数据
df<-read.csv("treat_control.DE.xls",header=TRUE,sep="\t")
gse_kk<-do_gseKEGG(df)
#按照NES排序
kk.gsym_sort<-gse_kk[order(gse_kk$NES,decreasing=F),]
#获取排名前6的观测值
top_6<-kk.gsym_sort[1:6,]
#获取排名末尾6的观测值
bottom_6<-kk.gsym_sort[(nrow(kk.gsym_sort)-5):nrow(kk.gsym_sort),]
df_top<-rbind(top_6,bottom_6)
#write.table(gse_kk,"all.gseKEGG.tsv",sep="\t")
#######下面的代码是做gseKEGG富集分析
df<-read.csv("treat_control.DE.xls",header=TRUE,sep="\t")
gse_kk<-do_gseKEGG(df)
#按照NES排序
kk.gsym_sort<-gse_kk[order(gse_kk$NES,decreasing=F),]
#获取排名前6的观测值
top_6<-kk.gsym_sort[1:6,]
#获取排名末尾6的观测值
bottom_6<-kk.gsym_sort[(nrow(kk.gsym_sort)-5):nrow(kk.gsym_sort),]
df_top<-rbind(top_6,bottom_6)
#write.table(gse_kk,"all.gseKEGG.tsv",sep="\t")
######下面的代码是进行可视化
p<-gseplot1(df_top)
options(repr.plot.width=9,repr.plot.height=6)
p
#ggsave("abs_top6.allGeneFC.gseKEGG.pdf",p,w=9,h=6)
p<-gseplot2(df_top)
p
#####下面的代码是对单个的KEGG信号通路的基因富集做可视化
#devtools::install_github('junjunlab/GseaVis')
library(GseaVis)
p<-gseaNb(object=gse_kk,
geneSetID='hsa04978',
markTopgene=TRUE,
kegg=T,
addGene=c("ATP1A2","SLC9A3"),
addPval=T,
pvalX=0.55,pvalY=0.8,
pCol='black',
pHjust=0,subPlot=2)
p
# 生成图
p <- gseplot1(df_top)
# 保存图像为 PDF
output_file1 <- "gseplot1_top6_bottom6.pdf" # 保存文件名
ggsave(output_file1, plot = p, width = 9, height = 6)
# 生成图
p <- gseplot2(df_top)
# 保存图像为 PDF
output_file2 <- "gseplot2_top6_bottom6.pdf" # 保存文件名
ggsave(output_file2, plot = p, width = 9, height = 6)
# 生成图
p <- gseaNb(object = gse_kk,
geneSetID = 'hsa04978',
markTopgene = TRUE,
kegg = TRUE,
addGene = c("ATP1A2", "SLC9A3"),
addPval = TRUE,
pvalX = 0.55, pvalY = 0.8,
pCol = 'black',
pHjust = 0, subPlot = 2)
# 保存图像为 PDF
output_file3 <- "gseaNb_hsa04978.pdf" # 保存文件名
ggsave(output_file3, plot = p, width = 10, height = 8)
图片
最终出图
- 一幅点图(排名前后6条通路,
gseplot1)。 - 一幅类似的点图(不同布局,
gseplot2)。 - 一幅单通路的富集分析图(
gseaNb)。