setwd("D:\\小杜的生信笔记\\2024\\20241217_系统发育树专栏二")
rm(list=ls())
library(ggtreeExtra)
library(ggtree)
library(treeio)
library(tidytree)
library(ggstar)
library(ggplot2)
library(ggnewscale)
tree <- read.tree("data/kegg.nwk")
dt1 <- read.csv("data/tippoint_attr.csv")
dt2 <- read.csv("data/firstring_attr.csv")
dt3 <- read.csv("data/secondring_attr.csv")
dt4 <- read.csv("data/barplot_attr.csv")
head(dt1)
head(dt2)
head(dt3)
head(dt4)
dt1 <- aggregate(.~ID, dt1, paste0, collapse="/")
dt1$Phyla <- factor(dt1$Phyla, levels=c("Actinobacteria","Aquificae","Bacteroidetes",
"Chlamydiae","Chlorobi","Chloroflexi","Crenarchaeota",
"Cyanobacteria","Euryarchaeota","Firmicutes",
"Spirochaetes/Proteobacteria",
"Tenericutes","Thermi","Thermotogae","Other"))
dt3$Type2 <- factor(dt3$Type2, levels=c("FA synth init", "FA synth elong",
"acyl-CoA synth", "beta-Oxidation",
"Ketone biosynth"))
nodelab <- tree$node.label[nchar(tree$node.label)>0]
nodeids <- nodeid(tree, nodelab)
textex <- c(1.0, 0.4, 0.2, 1.4, 1.4, 0.4, 1.4, 1.4, 0.4, 0.4,
0.8, 1, 0.6, 0.6, 0.4, 0.3, 0, 0.4, 0.1, 0.25,
0.2, 0.3, 0.8, 0.8, 0.8, 0.6, 2.4)
cladelabels <- mapply(function(x, y, z){geom_cladelabel(node=x, label=y, barsize=NA, extend=0.3,
offset.text=z, fontsize=1.3, angle="auto",
hjust=0.5, horizontal=FALSE, fontface="italic")},
nodeids, nodelab, textex, SIMPLIFY=FALSE)
fills <- c("#808080", "#808080", "#808080", "#808080", "#808080",
"#191970", "#87CEFA", "#FFC125", "#B0171F", "#B0171F",
"#B0171F", "#B0171F", "#B0171F", "#B0171F", "#B0171F",
"#B0171F", "#B0171F", "#B0171F", "#B0171F", "#B0171F",
"#B0171F", "#B0171F", "#9ACD32", "#9ACD32", "#9ACD32",
"#006400", "#800000")
highlights <- mapply(function(x, y){geom_hilight(node=x, extendto=5.8, alpha=0.3,
fill=y, color=y, size=0.05)},
nodeids, fills, SIMPLIFY=FALSE)
colors <- c("#9ACD32", "#EE6A50", "#87CEFA", "#FFC125", "#D15FEE", "#8DEEEE", "#800000",
"#006400", "#800080", "#808080", "#B0171F", "#191970", "#7B68EE",
"#00CD00", "Black")
p1 <- ggtree(
tree,
layout="circular",
size=0.1
)
p1
ggsave("./output/fig1.jpg",width = 6, height = 6)
p1 <- p1 +
highlights
p1
ggsave("./output/fig2.jpg",width = 6, height = 6)
p2 <- p1 %<+% dt1 +
geom_tippoint(
mapping=aes(
fill=Phyla
),
shape = 21,
size = 1.2,
stroke = 0.05,
position = "identity",
show.legend = FALSE
)+
scale_fill_manual(values=colors)
p2
ggsave("./output/fig3.jpg",width = 6, height = 6)
p2 <- p2 +
cladelabels +
new_scale_fill()
p2
ggsave("./output/fig3-2.jpg", plot = p2, width = 6, height = 6)
p3 <- p2 +
geom_fruit(
data = dt2,
geom = geom_tile,
mapping = aes(
y = ID,
x = ring,
fill = Type1
),
offset = 0.01,
pwidth = 0.14
) +
scale_fill_manual(
name = "ATP synthesis",
values = c("#339933", "#dfac03"),
guide = guide_legend(
keywidth = 0.35,
keyheight = 0.35,
order = 1
)
) +
new_scale_fill()
p3
ggsave("./output/fig4.jpg", plot = p3, width = 6, height = 6)
p4 <- p3 +
geom_fruit(
data = dt3,
geom = geom_tile,
mapping = aes(
y = ID,
alpha = Abundance,
x = Type2,
fill = Type2
),
offset = 0.001,
pwidth = 0.18
) +
scale_fill_manual(
name = "Fatty Acid metabolism",
values = c("#b22222", "#005500", "#0000be", "#9f1f9f", "#793a07"),
guide = guide_legend(
keywidth = 0.35,
keyheight = 0.35,
order = 2
)
) +
scale_alpha_continuous(
range = c(0, 0.4),
guide = guide_legend(
keywidth = 0.35,
keyheight = 0.35,
order = 3
)
) +
new_scale_fill()
p4
ggsave("./output/fig5.jpg", width = 7, height = 6)
p5 <- p4 +
geom_fruit(
data = dt4,
geom = geom_bar,
mapping = aes(
y = ID,
x = Length,
fill = Phyla
),
stat = "identity",
orientation = "y",
pwidth = 0.3,
position = position_dodgex()
) +
scale_fill_manual(
values = colors,
guide = guide_legend(
keywidth = 0.35,
keyheight = 0.35,
order = 4
)
) +
geom_treescale(
fontsize = 1.2,
linesize = 0.3
) +
theme(
legend.position = c(0.93, 0.76),
legend.background = element_rect(fill = NA),
legend.title = element_text(size = 6),
legend.text = element_text(size = 4.5),
legend.spacing.y = unit(0.02, "cm")
)
p5
ggsave("./output/fig6.jpg", plot = p5, width = 8, height = 6)
# 设置工作目录
setwd("F:\\生信代码复现\\系统发育树")
rm(list=ls())
# 安装必要的包
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install(c("DECIPHER", "Biostrings", "ggtree", "ggtreeExtra", "treeio", "ggmsa"))
# 加载所有需要的包
library(DECIPHER)
library(Biostrings)
library(ape)
library(ggtreeExtra)
library(ggtree)
library(treeio)
library(tidytree)
library(ggstar)
library(ggplot2)
library(ggnewscale)
library(ggmsa)
# 1. 处理FASTA文件生成进化树
seqs <- readDNAStringSet("进化树.fa") # 替换为您的FASTA文件路径
# 清理序列名称
names(seqs) <- gsub(" ", "_", names(seqs))
names(seqs) <- make.unique(names(seqs))
# 序列比对
alignment <- AlignSeqs(seqs,
iterations = 2,
refinements = 2,
processors = 1,
gapOpening = -18,
gapExtension = -3)
# 转换为DNAbin对象
aligned_seqs <- as.DNAbin(alignment)
# 构建进化树
dist_matrix <- dist.dna(aligned_seqs, model = "K80")
tree <- nj(dist_matrix)
# 添加bootstrap支持率
tree_boot <- boot.phylo(tree, aligned_seqs,
function(x) nj(dist.dna(x)),
B = 1000)
# 保存进化树
write.tree(tree, file = "data/kegg.nwk")
# 2. 创建必要的CSV文件
# tippoint_attr.csv
tip_data <- data.frame(
ID = tree$tip.label,
Phyla = "Ascomycota",
Class = "Leotiomycetes",
Order = "Helotiales",
Family = "Helotiaceae",
Genus = "Lunulospora"
)
write.csv(tip_data, "data/tippoint_attr.csv", row.names = FALSE)
# firstring_attr.csv
first_ring <- data.frame(
ID = tree$tip.label,
ring = rep(1:2, length.out = length(tree$tip.label)),
Type1 = sample(c("Environmental", "Clinical"),
length(tree$tip.label), replace = TRUE)
)
write.csv(first_ring, "data/firstring_attr.csv", row.names = FALSE)
# secondring_attr.csv
second_ring <- data.frame(
ID = tree$tip.label,
Type2 = sample(c("Morphotype A", "Morphotype B", "Morphotype C"),
length(tree$tip.label), replace = TRUE),
Abundance = runif(length(tree$tip.label))
)
write.csv(second_ring, "data/secondring_attr.csv", row.names = FALSE)
# barplot_attr.csv
barplot_data <- data.frame(
ID = tree$tip.label,
Length = sapply(seqs, length),
Phyla = "Ascomycota"
)
write.csv(barplot_data, "data/barplot_attr.csv", row.names = FALSE)
# 3. 数据处理
dt1 <- read.csv("data/tippoint_attr.csv")
dt2 <- read.csv("data/firstring_attr.csv")
dt3 <- read.csv("data/secondring_attr.csv")
dt4 <- read.csv("data/barplot_attr.csv")
dt1 <- aggregate(.~ID, dt1, paste0, collapse="/")
# 4. 设置颜色和其他视觉参数
colors <- c("#4DAF4A") # 单一物种的颜色
# 提取支系层的节点标签
nodelab <- tree$node.label[nchar(tree$node.label)>0]
nodeids <- nodeid(tree, nodelab)
# 支系标签的位置
textex <- rep(0.5, length(nodeids)) # 简化的标签位置
# 设置层级标签的属性
cladelabels <- mapply(function(x, y, z){
geom_cladelabel(node=x,
label=y,
barsize=NA,
extend=0.3,
offset.text=z,
fontsize=1.3,
angle="auto",
hjust=0.5,
horizontal=FALSE,
fontface="italic")
}, nodeids, nodelab, textex, SIMPLIFY=FALSE)
# 5. 绘图
# 基础树
p1 <- ggtree(tree,
layout="circular",
size=0.1,
aes(color=tree_boot)) +
scale_color_gradient(
low="red",
high="blue",
name="Bootstrap support"
)
# 保存基础树
ggsave("./output/fig1.jpg", p1, width = 6, height = 6)
# 添加分类信息
p2 <- p1 %<+% dt1 +
geom_tippoint(
mapping=aes(fill=Phyla),
shape = 21,
size = 1.2,
stroke = 0.05,
position = "identity",
show.legend = FALSE
) +
scale_fill_manual(values=colors)
# 保存带分类信息的树
ggsave("./output/fig2.jpg", p2, width = 6, height = 6)
# 添加标签
p3 <- p2 +
cladelabels +
new_scale_fill()
# 保存带标签的树
ggsave("./output/fig3.jpg", p3, width = 6, height = 6)
# 添加第一环
p4 <- p3 +
geom_fruit(
data = dt2,
geom = geom_tile,
mapping = aes(
y = ID,
x = ring,
fill = Type1
),
offset = 0.01,
pwidth = 0.14
) +
scale_fill_manual(
name = "Sample Source",
values = c("#339933", "#dfac03"),
guide = guide_legend(
keywidth = 0.35,
keyheight = 0.35,
order = 1
)
) +
new_scale_fill()
# 保存带第一环的树
ggsave("./output/fig4.jpg", p4, width = 7, height = 6)
# 添加第二环
p5 <- p4 +
geom_fruit(
data = dt3,
geom = geom_tile,
mapping = aes(
y = ID,
alpha = Abundance,
x = Type2,
fill = Type2
),
offset = 0.001,
pwidth = 0.18
) +
scale_fill_manual(
name = "Morphotype",
values = c("#b22222", "#005500", "#0000be"),
guide = guide_legend(
keywidth = 0.35,
keyheight = 0.35,
order = 2
)
) +
scale_alpha_continuous(
range = c(0, 0.4),
guide = guide_legend(
keywidth = 0.35,
keyheight = 0.35,
order = 3
)
) +
new_scale_fill()
# 保存带第二环的树
ggsave("./output/fig5.jpg", p5, width = 7, height = 6)
# 最终图形:添加序列长度条形图
p6 <- p5 +
geom_fruit(
data = dt4,
geom = geom_bar,
mapping = aes(
y = ID,
x = Length,
fill = Phyla
),
stat = "identity",
orientation = "y",
pwidth = 0.3,
position = position_dodgex()
) +
scale_fill_manual(
values = colors,
guide = guide_legend(
keywidth = 0.35,
keyheight = 0.35,
order = 4
)
) +
geom_treescale(
fontsize = 1.2,
linesize = 0.3
) +
theme(
legend.position = "right",
legend.background = element_rect(fill = NA),
legend.title = element_text(size = 8),
legend.text = element_text(size = 6),
legend.spacing.y = unit(0.02, "cm")
)
# 保存最终图形
ggsave("./output/fig6.jpg", p6, width = 8, height = 6)
# 6. 添加序列比对可视化(可选)
aln_plot <- ggmsa(alignment, font = NULL, color = "Chemistry_NT") +
theme_minimal()
# 保存序列比对图
ggsave("./output/alignment.jpg", aln_plot, width = 12, height = 6)