一、目的
二、代码
remove(list = ls())
setwd("E:/微生物-赵建组/课题进展/生信挖掘相关基因/03.TCGA胃癌数据/02.1三包差异分析")
library(edgeR)
library(data.table)
library(tidyverse)
library(ggsignif)
library(RColorBrewer)
library(limma)
library(ggplot2)
library(ggpubr)
library(beepr)
library(gplots)
library(pheatmap)
library("DESeq2")
library(VennDiagram)
padj = 0.05
foldChange= 2
rt1=read.table("combined_RNAseq_FPKM.txt",sep="\t",header=T,check.names=F)
rt1=as.matrix(rt1)
rownames(rt1)=rt1[,1]
exp1=rt1[,2:ncol(rt1)]
dimnames=list(rownames(exp1),colnames(exp1))
data1=matrix(as.numeric(as.matrix(exp1)),nrow=nrow(exp1),dimnames=dimnames)
data1=avereps(data1)
data1=data1[rowMeans(data1)>0,]
data1=as.data.frame(data1)
exp1_data_T = data1%>% dplyr::select(str_which(colnames(.), "-01A"))
nT = ncol(exp1_data_T)
exp1_data_N = data1%>% dplyr::select(str_which(colnames(.), "-11A"))
nN = ncol(exp1_data_N)
rt1= cbind(exp1_data_N, exp1_data_T)
rt1=normalizeBetweenArrays(rt1)
group1=sapply(strsplit(colnames(rt1),"\\-"), "[", 4)
group1=sapply(strsplit(group1,""), "[", 1)
group1=gsub("2", "1", group1)
conNum1=length(group1[group1==1])
treatNum1=length(group1[group1==0])
class <- c(rep("con",conNum1),rep("treat",treatNum1))
design <- model.matrix(~factor(class)+0)
colnames(design) <- c("con","treat")
df.fit <- lmFit(rt1,design)
df.matrix<- makeContrasts(con - treat,levels=design)
fit<- contrasts.fit(df.fit,df.matrix)
fit2 <- eBayes(fit)
allDEG1 = topTable(fit2,coef=1,n=Inf,adjust="BH")
allDEG1 = na.omit(allDEG1)
padj = 0.05
foldChange= 2
diff_signif1 = allDEG1[(allDEG1$adj.P.Val < padj &
(allDEG1$logFC>foldChange | allDEG1$logFC<(-foldChange))),]
diff_signif1 = diff_signif1[order(diff_signif1$logFC),]
save(diff_signif1, file = 'limma_diff1.Rdata')
rt <- read.table( "combined_RNAseq_counts.txt",header=T,sep="\t",comment.char="",check.names=F)
rt=as.matrix(rt)
rownames(rt)=rt[,1]
exp=rt[,2:ncol(rt)]
dimnames=list(rownames(exp),colnames(exp))
data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames)
data=avereps(data)
data=data[rowMeans(data)>1,]
data2=as.data.frame(data)
exp_data_T = data2%>% dplyr::select(str_which(colnames(.), "-01A$"))
nT = ncol(exp_data_T)
exp_data_N = data2%>% dplyr::select(ends_with("-11A"))
nN = ncol(exp_data_N)
data= cbind(exp_data_N, exp_data_T)
group1=sapply(strsplit(colnames(data),"\\-"), "[", 4)
group1=sapply(strsplit(group1,""), "[", 1)
group1=gsub("2", "1", group1)
conNum=length(group1[group1==1])
treatNum=length(group1[group1==0])
count <- floor(data)
countData <- count[apply(count, 1, sum) > 0 , ]
data=colnames(countData)
Type=c(rep(1,conNum), rep(2,treatNum))
exp=cbind(data, Type)
exp=as.data.frame(exp)
colnames(exp)=c("id", "Type")
exp$Type=ifelse(exp$Type==1, "Normal", "Tumor")
exp=as.matrix(exp)
rownames(exp)=exp[,1]
exp=exp[,2:ncol(exp)]
exp=as.data.frame(exp)
colnames(exp)=c("condition")
write.table(exp, file="group.txt",sep="\t",quote=F)
colData <- read.table( "group.txt",header=T,sep="\t",row.names=1,comment.char="",check.names=F)
dds <- DESeqDataSetFromMatrix(countData = countData,colData = colData,design = ~ condition)
dds$condition <- relevel(dds$condition, ref = "Normal")
dds <- estimateSizeFactors(dds)
dds <- estimateDispersions(dds)
dds <- nbinomWaldTest(dds)
dds <- DESeq(dds)
allDEG2 <- as.data.frame(results(dds))
padj = 0.05
foldChange= 2
diff_signif2 = allDEG2[(allDEG2$padj < padj &
(allDEG2$log2FoldChange>foldChange | allDEG2$log2FoldChange<(-foldChange))),]
diff_signif2 = diff_signif2[order(diff_signif2$log2FoldChange),]
save(diff_signif2, file = 'DESeq2_diff2.Rdata')
rt=read.table("combined_RNAseq_counts.txt",sep="\t",header=T,check.names=F)
rt=as.matrix(rt)
rownames(rt)=rt[,1]
exp=rt[,2:ncol(rt)]
dimnames=list(rownames(exp),colnames(exp))
data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames)
data=avereps(data)
data=data[rowMeans(data)>1,]
data2=as.data.frame(data)
exp_data_T = data2%>% dplyr::select(str_which(colnames(.), "-01A$"))
nT = ncol(exp_data_T)
exp_data_N = data2%>% dplyr::select(ends_with("-11A"))
nN = ncol(exp_data_N)
data= cbind(exp_data_N, exp_data_T)
group1=sapply(strsplit(colnames(data),"\\-"), "[", 4)
group1=sapply(strsplit(group1,""), "[", 1)
group1=gsub("2", "1", group1)
conNum=length(group1[group1==1])
treatNum=length(group1[group1==0])
group=c(rep("normal",conNum),rep("tumor",treatNum))
design <- model.matrix(~group)
y <- DGEList(counts=data,group=group)
y <- calcNormFactors(y)
y <- estimateCommonDisp(y)
y <- estimateTagwiseDisp(y)
et <- exactTest(y,pair = c("normal","tumor"))
topTags(et)
ordered_tags <- topTags(et, n=100000)
allDiff=ordered_tags$table
allDiff=allDiff[is.na(allDiff$FDR)==FALSE,]
padj = 0.05
foldChange= 1
diff_signif = allDiff[(allDiff$FDR < padj & (allDiff$logFC>foldChange | allDiff$logFC<(-foldChange))),]
diff_signif = diff_signif[order(diff_signif$logFC),]
save(diff_signif, file = 'edger_diff.Rdata')
edgeR = rownames(diff_signif)
dim(diff_signif)
limma = rownames(diff_signif1)
dim(diff_signif1)
DESeq2 = rownames(diff_signif2)
dim(diff_signif2)
venn.diagram(
x = list(
'edgeR' = edgeR,
'limma' = limma,
'DESeq2' = DESeq2
),
filename = 'VN.png',
col = "black",
fill = c("dodgerblue", "goldenrod1", "darkorange1"),
alpha = 0.5,
cex = 0.8,
cat.col = 'black',
cat.cex = 0.8,
cat.fontface = "bold",
margin = 0.05,
main = "三种包的差异表达基因比较",
main.cex = 1.2
)
edgeR=as.data.frame(edgeR)
limma=as.data.frame(limma)
DESeq2=as.data.frame(DESeq2)
sameSample=intersect(edgeR$edgeR, limma$limma)
sameSample=as.data.frame(sameSample)
sameSample=intersect(DESeq2$DESeq2,sameSample$sameSample)
sameSample1=as.data.frame(sameSample)
write.table(sameSample1, file="merge_genes.xls",sep="\t",quote=F)
data1=exp[sameSample,,drop=F]
write.table(data1, file="merge_counts.xls",sep="\t",quote=F)
data2=exp1[sameSample,,drop=F]
write.table(data2, file="merge_FPKM.xls",sep="\t",quote=F)