实验目的
实验代码
###加载R包
library(readxl)
library(tidyverse)
library(GEOquery)
library(tidyverse)
library(GEOquery)
library(limma)
library(affy)
library(stringr)
library(FactoMineR)
library(factoextra)
library(sva)
###下载数据,如果文件夹中有会直接读入
gset = getGEO('GSE205185', destdir=".", AnnotGPL = T, getGPL = T)
class(gset)
gset1
gset[["GSE205185_series_matrix.txt.gz"]]@phenoData@data[["data_processing"]]
gset2 = getGEO('GSE29431', destdir=".", AnnotGPL = T, getGPL = T)
class(gset2)
gset21
gset2[["GSE29431_series_matrix.txt.gz"]]@phenoData@data[["data_processing"]]
gset3 = getGEO('GSE20711', destdir=".", AnnotGPL = T, getGPL = T)
class(gset3)
gset31
gset3[["GSE20711_series_matrix.txt.gz"]]@phenoData@data[["data_processing"]]
#提取子集
plf1<-gset1@annotation
plf2<-gset21@annotation
plf3<-gset31@annotation
#提取平台文件
GPL_data<- getGEO(filename ="GPL21185.soft.gz", AnnotGPL = T)
GPL_data_11 <- Table(GPL_data)
GPL_data1<- getGEO(filename ="GPL570.annot.gz", AnnotGPL = T)
GPL_data_22 <- Table(GPL_data1)
GPL_data2<- getGEO(filename ="GPL570.annot.gz", AnnotGPL = T)
GPL_data_33 <- Table(GPL_data2)
###提取表达量
exp <- exprs(gset1)
###使用boxplot函数查看
boxplot(exp)
###使用判别函数判断
ex <- exp
qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
LogC <-(qx[5]>100)||
(qx[6]-qx[1] > 50 && qx[2] > 0) ||
(qx[2]>0&&qx[2]<1&&qx[4]>1&&qx[4]<2)
if(LogC){
ex[which(ex<=0)]<-NaN
exprSet <- log2(ex)
print("需要取log2")}else{print("无需取log2")
}
probe_name<-rownames(exp)
###提取表达量
exp2 <- exprs(gset21)
###使用boxplot函数查看
boxplot(exp2)
###使用判别函数判断
ex <- exp2
qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
LogC <-(qx[5]>100)||
(qx[6]-qx[1] > 50 && qx[2] > 0) ||
(qx[2]>0&&qx[2]<1&&qx[4]>1&&qx[4]<2)
if(LogC){
ex[which(ex<=0)]<-NaN
exprSet <- log2(ex)
print("需要取log2")}else{print("无需取log2")
}
probe_name2<-rownames(exp2)
###提取表达量
exp3 <- exprs(gset31)
###使用boxplot函数查看
boxplot(exp3)
###使用判别函数判断
ex <- exp3
qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
LogC <-(qx[5]>100)||
(qx[6]-qx[1] > 50 && qx[2] > 0) ||
(qx[2]>0&&qx[2]<1&&qx[4]>1&&qx[4]<2)
if(LogC){
ex[which(ex<=0)]<-NaN
exprSet <- log2(ex)
print("需要取log2")}else{print("无需取log2")
}
probe_name3<-rownames(exp3)
setwd("F:/001微生物课题/公共数据库生信挖掘/02.Hp感染24小时后AGS的差异基因/001.判断GEO是否需要log2")
library(readxl)
library(tidyverse)
library(GEOquery)
library(tidyverse)
library(GEOquery)
library(limma)
library(affy)
library(stringr)
library(FactoMineR)
library(factoextra)
library(sva)
gset = getGEO('GSE70394', destdir=".", AnnotGPL = T, getGPL = T)
class(gset)
gset[[1]]
gset[["GSE74577_series_matrix.txt.gz"]]@phenoData@data[["data_processing"]]
plf1<-gset[[1]]@annotation
exp <- exprs(gset[[1]])
boxplot(exp)
ex <- exp
qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
LogC <-(qx[5]>100)||
(qx[6]-qx[1] > 50 && qx[2] > 0) ||
(qx[2]>0&&qx[2]<1&&qx[4]>1&&qx[4]<2)
if(LogC){
ex[which(ex<=0)]<-NaN
exprSet <- log2(ex)
print("需要取log2")}else{print("不需要取log2")
}
probe_name<-rownames(exp)
最终的结果
