010 渐变火山图的绘制

目的

渐变火山图是一种结合了火山图(Volcano Plot)和颜色渐变元素的数据可视化工具。它主要用于生物信息学、统计学和数据分析领域,帮助分析高维数据的显著性和相关性。

普通火山图代码

####这个是我已经复现出来的
# 清空环境
remove(list = ls())

# 设置工作目录为文件所在的文件夹
setwd("D:/生信代码复现/渐变火山图")

# 加载必要的R包
library(dplyr)
library(ggplot2)
library(ggrepel)
library(readxl)

# 加载数据
data <- read_excel("data.xls", sheet = 1)
head(data)

# 阈值
FC <- 1.5
P.Value_threshold <- 0.05  # 设置 P 值阈值

# 添加差异信息
data <- data %>%
  mutate(change = case_when(
    (P.Value < P.Value_threshold) & (logFC > log2(FC)) ~ "up",
    (P.Value < P.Value_threshold) & (logFC < -log2(FC)) ~ "down",
    TRUE ~ "stable"
  ))

# 检查差异分类
table(data$change)

# 筛选top10和top100基因
top10 <- data %>%
  filter(change != "stable") %>%
  slice_max(abs(logFC), n = 10)

top100 <- data %>%
  filter(change != "stable") %>%
  slice_max(abs(logFC), n = 100)

# 创建保存目录
if (!dir.exists("./figure")) {
  dir.create("./figure")
}

# 绘制基础火山图
huoshan <- ggplot(data = data, 
                  aes(x = logFC, y = -log10(P.Value))) +
  geom_point(alpha = 0.4, size = 3.5, aes(color = change)) +
  ylab("-log10(P-value)") +
  scale_color_manual(values = c("down" = "blue4", 
                                "stable" = "grey", 
                                "up" = "red3")) +
  geom_vline(xintercept = c(-log2(FC), log2(FC)), linetype = 4, color = "black", linewidth = 0.8) +
  geom_hline(yintercept = -log10(P.Value_threshold), linetype = 4, color = "black", linewidth = 0.8) +
  theme_bw()

# 保存火山图
ggsave("./figure/huoshan.png", plot = huoshan, device = "png", width = 8)

# 添加top10标签
huoshanlabel <- huoshan +
  geom_text_repel(data = top10,
                  aes(x = logFC, y = -log10(P.Value), label = Gene),
                  force = 20, color = 'black', size = 3,
                  point.padding = 0.5,
                  segment.color = "black",
                  segment.size = 0.3)

# 保存带标签的火山图
ggsave("./figure/huoshanlabel.png", plot = huoshanlabel, device = "png", width = 8)


huoshanlabel.png

双曲线火山图代码

# 清空环境
remove(list = ls())

# 设置工作目录为文件所在的文件夹
setwd("D:/生信代码复现/渐变火山图")

# 加载必要的 R 包
library(dplyr)
library(ggplot2)
library(ggrepel)
library(readxl)

# 加载数据
data <- read_excel("data.xls", sheet = 1)  # 确保data.xls文件路径正确
head(data)

# 确保数据列名匹配代码
data <- data %>% 
  rename(avg_log2FC = logFC, p_val = `P.Value`, GeneName = Gene)

# 检查列名是否匹配预期
if (!all(c("avg_log2FC", "p_val", "GeneName") %in% colnames(data))) {
  stop("数据中缺少必需的列,请检查文件结构!")
}

# 复制数据
c <- data

# 自定义log2FC和p-val阈值
FC = 1.5
log2_FC <- log2(FC)
P.Value = 0.05

# 根据阈值给DEG添加change列存放差异结果信息
k1 <- (c$p_val < P.Value) & (c$avg_log2FC < -log2_FC)
k2 <- (c$p_val < P.Value) & (c$avg_log2FC > log2_FC)
c <- mutate(c, change = ifelse(k1, "down", ifelse(k2, "up", "stable")))
head(c)

# 筛选显著性上调和下调的top10
top10_up <- c %>%
  filter(change == "up") %>%
  arrange(desc(avg_log2FC)) %>%  # 按 avg_log2FC 降序排列
  slice(1:10)  # 取前10个

top10_down <- c %>%
  filter(change == "down") %>%
  arrange(avg_log2FC) %>%  # 按 avg_log2FC 升序排列
  slice(1:10)  # 取前10个

# 打印筛选结果
print(top10_up)
print(top10_down)

# 合并两个集合用于标注
top10_combined <- bind_rows(top10_up, top10_down)

# 加载R包
library(ggVolcano)
library(RColorBrewer)

# 根据反比例函数 y = 1/X 和设定的阈值自定义双曲线函数
f <- function(x) {
  inputx <- seq(0.0001, x, by = 0.0001)
  y <- 1 / inputx + (-log10(P.Value))
  dff <- rbind(data.frame(x = inputx + log2_FC, y = y),
               data.frame(x = -(inputx + log2_FC), y = y))
  return(dff)
}

# 生成所需的曲线数据
dff_curve <- f(4)
head(dff_curve)

# 定义主题
mytheme <- theme_classic() +
  theme(
    axis.title = element_text(size = 15),
    axis.text = element_text(size = 14),
    legend.text = element_text(size = 14)
  )

# 制作双曲线火山图并添加标签
huoshan <- ggplot(data = c,
                  aes(x = avg_log2FC, y = -log10(p_val))) +
  geom_point(size = 2.2, aes(color = change)) +
  scale_x_continuous(limits = c(-5, 5), breaks = seq(-5, 5, by = 2.5)) +
  scale_y_continuous(expand = expansion(add = c(2, 0)),
                     limits = c(0, 300), breaks = seq(0, 300, by = 100)) +
  scale_colour_manual(values = c("#4A1985", "#d8d8d8", "#F8B606")) +
  geom_line(data = dff_curve,
            aes(x = x, y = y),
            color = "black", lty = "dashed", size = 0.7) +
  geom_text_repel(data = top10_combined,
                  aes(x = avg_log2FC, y = -log10(p_val), label = GeneName),
                  size = 4, box.padding = 0.4, max.overlaps = 15) +
  labs(
    title = "双曲线火山图(显著性 top10 上调和下调)",
    x = "log2(FC)",
    y = "-log10(p-value)"
  ) +
  mytheme

# 确保输出文件夹存在
if (!dir.exists("./figure")) dir.create("./figure")

# 保存火山图
ggsave("./figure/huoshan_top10_up_down_labels.png", plot = huoshan, device = "png", width = 8, height = 6, dpi = 300)

出图

huoshan_top10_up_down_labels.png

渐变火山图

代码

# 清空环境
remove(list = ls())

# 设置工作目录为文件所在的文件夹
setwd("F:/生信代码复现/渐变火山图")




# 加载必要的 R 包
library(dplyr)
library(ggplot2)
library(ggrepel)
library(readxl)
library(RColorBrewer)

# 加载数据
data <- read_excel("data.xls", sheet = 1)  # 确保 data.xls 文件路径正确
head(data)

# 确保数据列名匹配代码
data <- data %>%
  rename(avg_log2FC = logFC, p_val = `P.Value`, GeneName = Gene)

# 检查列名是否匹配预期
if (!all(c("avg_log2FC", "p_val", "GeneName") %in% colnames(data))) {
  stop("数据中缺少必需的列,请检查文件结构!")
}

# 手动添加 regulate 信息,替代 `add_regulate` 功能
data <- data %>%
  mutate(
    regulate = case_when(
      p_val < 0.05 & avg_log2FC > log2(1.5) ~ "up",     # 显著上调
      p_val < 0.05 & avg_log2FC < -log2(1.5) ~ "down",  # 显著下调
      TRUE ~ "stable"                                  # 稳定
    )
  )

# 去掉 NA 值
data <- data %>% filter(!is.na(avg_log2FC) & !is.na(p_val))

data <- data %>% filter(p_val > 0)

# 转换 regulate 列为因子
data <- data %>%
  mutate(
    regulate = factor(regulate, levels = c("stable", "up", "down"))
  )




# 添加颜色列
data <- data %>%
  mutate(color = case_when(
    regulate == "stable" ~ "grey30",
    regulate == "up" ~ "#00CD6C",
    regulate == "down" ~ "#FF1F5B"
  ))

# 创建火山图,不添加标签
volcano_plot <- ggplot(data, aes(x = avg_log2FC, y = -log10(p_val), color = color)) +
  geom_point(alpha = 0.8, size = 2) +
  scale_color_identity() +  # 直接使用数据中的颜色
  theme_minimal() +
  labs(x = "Log2 Fold Change", y = "-Log10 p-value")

# 保存火山图
ggsave("./figure/huoshan4_custom.png", plot = volcano_plot, device = "png", width = 8, height = 6, dpi = 300)


# 假设 data 数据框已经准备好,包含 avg_log2FC, p_val 和 GeneName 列
# 添加显著性分类
data <- data %>%
  mutate(
    significance = case_when(
      p_val < 0.05 & abs(avg_log2FC) > 1 ~ "Significant",
      TRUE ~ "Not Significant"
    )
  )

# 计算 -log10(p_val) 用于图形
data <- data %>%
  mutate(log10_pval = -log10(p_val))

# 选择需要标注的基因,示例为 p 值最小的 10 个
top_genes <- data %>%
  filter(significance == "Significant") %>%
  arrange(p_val) %>%
  slice(1:10)

# 绘制火山图
gradual_huoshan <- ggplot(data, aes(x = avg_log2FC, y = log10_pval)) +
  geom_point(aes(color = log10_pval), alpha = 0.8, size = 2) +
  scale_color_gradientn(colors = c("blue", "yellow", "red")) +  # 渐变颜色
  geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black", linewidth = 0.5) +  # Fold Change 阈值线
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black", linewidth = 0.5) +  # p-value 阈值线
  labs(
    x = "Log2 Fold Change",
    y = "-Log10 P-value",
    color = "-Log10 P-value",
    title = "渐变火山图"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12)
  ) +
  geom_text_repel(
    data = top_genes,
    aes(label = GeneName),
    size = 4,
    color = "black",
    box.padding = 0.5,
    point.padding = 0.5,
    max.overlaps = 10
  )

# 显示火山图
print(gradual_huoshan)

# 保存火山图
ggsave("./figure/gradual_huoshan.png", plot = gradual_huoshan, width = 10, height = 8, dpi = 300)



最终出图

gradual_huoshan.png

# 清空环境
remove(list = ls())

# 设置工作目录为文件所在的文件夹
setwd("F:/生信代码复现/渐变火山图")

# 加载必要的 R 包
library(dplyr)
library(ggplot2)
library(ggrepel)
library(readxl)
library(RColorBrewer)

# 加载数据
data <- read_excel("data.xls", sheet = 1)
head(data)

# 确保数据列名匹配代码
data <- data %>%
  rename(avg_log2FC = logFC, p_val = `P.Value`, GeneName = Gene)

# 检查列名是否匹配预期
if (!all(c("avg_log2FC", "p_val", "GeneName") %in% colnames(data))) {
  stop("数据中缺少必需的列,请检查文件结构!")
}

# 添加 regulate 信息
data <- data %>%
  mutate(
    regulate = case_when(
      p_val < 0.05 & avg_log2FC > log2(1.5) ~ "up",
      p_val < 0.05 & avg_log2FC < -log2(1.5) ~ "down",
      TRUE ~ "stable"
    )
  )

# 去掉 NA 值和0值
data <- data %>% 
  filter(!is.na(avg_log2FC) & !is.na(p_val)) %>%
  filter(p_val > 0)

# 转换 regulate 列为因子
data <- data %>%
  mutate(
    regulate = factor(regulate, levels = c("stable", "up", "down"))
  )

# 添加显著性分类
data <- data %>%
  mutate(
    significance = case_when(
      p_val < 0.05 & abs(avg_log2FC) > 1 ~ "Significant",
      TRUE ~ "Not Significant"
    )
  )

# 计算 -log10(p_val)
data <- data %>%
  mutate(log10_pval = -log10(p_val))

# 选择需要标注的基因
top_genes <- data %>%
  filter(significance == "Significant") %>%
  arrange(p_val) %>%
  slice(1:10)

# 定义三色渐变方案
# 方案一:蓝-粉-紫
color_scheme_1 <- c(
  "#4B9CD3",  # 蓝色
  "#EAA4B8",  # 粉色(主题色)
  "#663366"   # 紫色
)

# 方案二:绿-粉-紫
color_scheme_2 <- c(
  "#66C2A5",  # 绿色
  "#EAA4B8",  # 粉色(主题色)
  "#9966CC"   # 紫色
)

# 方案三:橙-粉-蓝
color_scheme_3 <- c(
  "#FF9966",  # 橙色
  "#EAA4B8",  # 粉色(主题色)
  "#6699CC"   # 蓝色
)

# 创建火山图函数
create_volcano_plot <- function(data, top_genes, color_scheme, scheme_name) {
  plot <- ggplot(data, aes(x = avg_log2FC, y = log10_pval)) +
    # 添加点
    geom_point(aes(color = log10_pval), alpha = 0.8, size = 2) +
    # 使用三色渐变
    scale_color_gradientn(
      colors = color_scheme,
      name = "-Log10 P-value",
      # 设置渐变断点,使渐变更均匀
      values = c(0, 0.5, 1)
    ) +
    # 添加阈值线
    geom_vline(xintercept = c(-1, 1), linetype = "dashed", 
               color = "#8B4B6F", linewidth = 0.5) +
    geom_hline(yintercept = -log10(0.05), linetype = "dashed", 
               color = "#8B4B6F", linewidth = 0.5) +
    # 添加标签
    labs(
      x = "Log2 Fold Change",
      y = "-Log10 P-value",
      title = paste("Volcano Plot -", scheme_name)
    ) +
    # 设置主题
    theme_minimal() +
    theme(
      plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
      axis.title = element_text(size = 14),
      axis.text = element_text(size = 12),
      panel.grid = element_line(color = "#F0F0F0", linewidth = 0.2),
      legend.position = "right",
      legend.title = element_text(size = 12),
      legend.text = element_text(size = 10),
      plot.background = element_rect(fill = "white", color = NA),
      panel.background = element_rect(fill = "white", color = NA)
    ) +
    # 添加基因标签
    geom_text_repel(
      data = top_genes,
      aes(label = GeneName),
      size = 4,
      color = "#8B4B6F",
      box.padding = 0.5,
      point.padding = 0.5,
      max.overlaps = 10,
      segment.color = "#8B4B6F",
      segment.alpha = 0.6
    )
  
  return(plot)
}

# 创建并保存三种不同配色的火山图
# 方案一:蓝-粉-紫
volcano_1 <- create_volcano_plot(data, top_genes, color_scheme_1, "Blue-Pink-Purple")
ggsave("./figure/volcano_blue_pink_purple.png", plot = volcano_1, width = 10, height = 8, dpi = 300)

# 方案二:绿-粉-紫
volcano_2 <- create_volcano_plot(data, top_genes, color_scheme_2, "Green-Pink-Purple")
ggsave("./figure/volcano_green_pink_purple.png", plot = volcano_2, width = 10, height = 8, dpi = 300)

# 方案三:橙-粉-蓝
volcano_3 <- create_volcano_plot(data, top_genes, color_scheme_3, "Orange-Pink-Blue")
ggsave("./figure/volcano_orange_pink_blue.png", plot = volcano_3, width = 10, height = 8, dpi = 300)

# 显示所有图形
print(volcano_1)
print(volcano_2)
print(volcano_3)


volcano_green_pink_purple.png
volcano_blue_pink_purple.png
volcano_orange_pink_blue.png

# 清空环境
remove(list = ls())

# 设置工作目录为文件所在的文件夹
setwd("F:/生信代码复现/渐变火山图")

# 加载必要的 R 包
library(dplyr)
library(ggplot2)
library(ggrepel)
library(readxl)
library(RColorBrewer)

# 加载数据
data <- read_excel("data.xls", sheet = 1)
head(data)

# 确保数据列名匹配代码
data <- data %>%
  rename(avg_log2FC = logFC, p_val = `P.Value`, GeneName = Gene)

# 检查列名是否匹配预期
if (!all(c("avg_log2FC", "p_val", "GeneName") %in% colnames(data))) {
  stop("数据中缺少必需的列,请检查文件结构!")
}

# 添加 regulate 信息
data <- data %>%
  mutate(
    regulate = case_when(
      p_val < 0.05 & avg_log2FC > log2(1.5) ~ "up",
      p_val < 0.05 & avg_log2FC < -log2(1.5) ~ "down",
      TRUE ~ "stable"
    )
  )

# 去掉 NA 值和0值
data <- data %>% 
  filter(!is.na(avg_log2FC) & !is.na(p_val)) %>%
  filter(p_val > 0)

# 转换 regulate 列为因子
data <- data %>%
  mutate(
    regulate = factor(regulate, levels = c("stable", "up", "down"))
  )

# 添加显著性分类
data <- data %>%
  mutate(
    significance = case_when(
      p_val < 0.05 & abs(avg_log2FC) > 1 ~ "Significant",
      TRUE ~ "Not Significant"
    )
  )

# 计算 -log10(p_val)
data <- data %>%
  mutate(log10_pval = -log10(p_val))

# 选择需要标注的基因
top_genes <- data %>%
  filter(significance == "Significant") %>%
  arrange(p_val) %>%
  slice(1:10)

# 定义更鲜明的三色渐变方案
# 方案一:深蓝-粉-深紫(强对比)
color_scheme_1 <- c(
  "#1E90FF",  # 亮蓝色
  "#EAA4B8",  # 粉色(主题色)
  "#800080"   # 深紫色
)

# 方案二:翠绿-粉-深紫(自然过渡)
color_scheme_2 <- c(
  "#00B386",  # 翠绿色
  "#EAA4B8",  # 粉色(主题色)
  "#8B008B"   # 深紫色
)

# 方案三:橙红-粉-深蓝(强烈对比)
color_scheme_3 <- c(
  "#FF4500",  # 橙红色
  "#EAA4B8",  # 粉色(主题色)
  "#000080"   # 深蓝色
)

# 创建火山图函数
create_volcano_plot <- function(data, top_genes, color_scheme, scheme_name) {
  # 计算p值的范围用于调整渐变
  max_log10_pval <- max(data$log10_pval)
  
  plot <- ggplot(data, aes(x = avg_log2FC, y = log10_pval)) +
    # 添加点
    geom_point(aes(color = log10_pval), alpha = 0.9, size = 2) +
    # 使用增强的三色渐变
    scale_color_gradientn(
      colors = color_scheme,
      name = "-Log10 P-value",
      # 调整渐变断点位置以增强效果
      values = c(0, 0.3, 1),
      # 设置颜色刻度
      breaks = seq(0, max_log10_pval, length.out = 5),
      labels = round(seq(0, max_log10_pval, length.out = 5), 1)
    ) +
    # 添加阈值线
    geom_vline(xintercept = c(-1, 1), linetype = "dashed", 
               color = "#666666", linewidth = 0.6) +
    geom_hline(yintercept = -log10(0.05), linetype = "dashed", 
               color = "#666666", linewidth = 0.6) +
    # 添加标签
    labs(
      x = "Log2 Fold Change",
      y = "-Log10 P-value",
      title = paste("Volcano Plot -", scheme_name)
    ) +
    # 优化主题
    theme_minimal() +
    theme(
      plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
      axis.title = element_text(size = 14, face = "bold"),
      axis.text = element_text(size = 12, color = "black"),
      panel.grid.major = element_line(color = "#DDDDDD", linewidth = 0.3),
      panel.grid.minor = element_line(color = "#EEEEEE", linewidth = 0.2),
      legend.position = "right",
      legend.title = element_text(size = 12, face = "bold"),
      legend.text = element_text(size = 10),
      plot.background = element_rect(fill = "white", color = NA),
      panel.background = element_rect(fill = "white", color = NA)
    ) +
    # 添加基因标签
    geom_text_repel(
      data = top_genes,
      aes(label = GeneName),
      size = 4,
      fontface = "bold",
      color = "#333333",
      box.padding = 0.6,
      point.padding = 0.5,
      max.overlaps = 10,
      segment.color = "#666666",
      segment.alpha = 0.8,
      force = 10
    )
  
  return(plot)
}

# 创建并保存三种不同配色的火山图
# 方案一:蓝-粉-紫
volcano_1 <- create_volcano_plot(data, top_genes, color_scheme_1, "Blue-Pink-Purple")
ggsave("./figure/volcano_blue_pink_purple_enhanced.png", 
       plot = volcano_1, 
       width = 10, 
       height = 8, 
       dpi = 300)

# 方案二:绿-粉-紫
volcano_2 <- create_volcano_plot(data, top_genes, color_scheme_2, "Green-Pink-Purple")
ggsave("./figure/volcano_green_pink_purple_enhanced.png", 
       plot = volcano_2, 
       width = 10, 
       height = 8, 
       dpi = 300)

# 方案三:橙-粉-蓝
volcano_3 <- create_volcano_plot(data, top_genes, color_scheme_3, "Orange-Pink-Blue")
ggsave("./figure/volcano_orange_pink_blue_enhanced.png", 
       plot = volcano_3, 
       width = 10, 
       height = 8, 
       dpi = 300)

# 显示所有图形
print(volcano_1)
print(volcano_2)
print(volcano_3)


volcano_blue_pink_purple_enhanced.png
volcano_green_pink_purple_enhanced.png
volcano_orange_pink_blue_enhanced.png

# 清空环境
remove(list = ls())

# 设置工作目录为文件所在的文件夹
setwd("F:/生信代码复现/渐变火山图")

# 加载必要的 R 包
library(dplyr)
library(ggplot2)
library(ggrepel)
library(readxl)
library(RColorBrewer)

# 加载数据
data <- read_excel("data.xls", sheet = 1)
head(data)

# 确保数据列名匹配代码
data <- data %>%
  rename(avg_log2FC = logFC, p_val = `P.Value`, GeneName = Gene)

# 检查列名是否匹配预期
if (!all(c("avg_log2FC", "p_val", "GeneName") %in% colnames(data))) {
  stop("数据中缺少必需的列,请检查文件结构!")
}

# 添加 regulate 信息
data <- data %>%
  mutate(
    regulate = case_when(
      p_val < 0.05 & avg_log2FC > log2(1.5) ~ "up",
      p_val < 0.05 & avg_log2FC < -log2(1.5) ~ "down",
      TRUE ~ "stable"
    )
  )

# 去掉 NA 值和0值
data <- data %>% 
  filter(!is.na(avg_log2FC) & !is.na(p_val)) %>%
  filter(p_val > 0)

# 转换 regulate 列为因子
data <- data %>%
  mutate(
    regulate = factor(regulate, levels = c("stable", "up", "down"))
  )

# 添加显著性分类
data <- data %>%
  mutate(
    significance = case_when(
      p_val < 0.05 & abs(avg_log2FC) > 1 ~ "Significant",
      TRUE ~ "Not Significant"
    )
  )

# 计算 -log10(p_val)
data <- data %>%
  mutate(log10_pval = -log10(p_val))

# 选择需要标注的基因
top_genes <- data %>%
  filter(significance == "Significant") %>%
  arrange(p_val) %>%
  slice(1:10)

# 定义6种不同的三色渐变方案
# 方案一:蓝-粉-紫(经典配色)
color_scheme_1 <- c(
  "#4B9CD3",  # 蓝色
  "#EAA4B8",  # 粉色(主题色)
  "#663366"   # 紫色
)

# 方案二:绿-粉-紫(自然过渡)
color_scheme_2 <- c(
  "#66C2A5",  # 绿色
  "#EAA4B8",  # 粉色
  "#9966CC"   # 紫色
)

# 方案三:橙-粉-蓝(强烈对比)
color_scheme_3 <- c(
  "#FF9966",  # 橙色
  "#EAA4B8",  # 粉色
  "#6699CC"   # 蓝色
)

# 方案四:青-粉-红(清新风格)
color_scheme_4 <- c(
  "#40E0D0",  # 青色
  "#EAA4B8",  # 粉色
  "#FF69B4"   # 红色
)

# 方案五:金-粉-深蓝(高贵风格)
color_scheme_5 <- c(
  "#FFD700",  # 金色
  "#EAA4B8",  # 粉色
  "#000080"   # 深蓝色
)

# 方案六:薄荷-粉-紫罗兰(柔和风格)
color_scheme_6 <- c(
  "#98FF98",  # 薄荷绿
  "#EAA4B8",  # 粉色
  "#9370DB"   # 紫罗兰
)

# 创建火山图函数
create_volcano_plot <- function(data, top_genes, color_scheme, scheme_name) {
  # 计算p值的范围用于调整渐变
  max_log10_pval <- max(data$log10_pval)
  
  # 为标签添加位置调整
  set.seed(123)
  
  # 定义箭头样式(样式2)
  current_arrow <- arrow(
    length = unit(0.2, "cm"),
    type = "open",
    ends = "last",
    angle = 30
  )
  
  plot <- ggplot(data, aes(x = avg_log2FC, y = log10_pval)) +
    # 添加点
    geom_point(aes(color = log10_pval), alpha = 0.9, size = 2) +
    # 使用三色渐变
    scale_color_gradientn(
      colors = color_scheme,
      name = "-Log10 P-value",
      values = c(0, 0.3, 1),
      # 修改刻度间隔为50
      breaks = seq(0, ceiling(max_log10_pval/50)*50, by = 50),
      labels = seq(0, ceiling(max_log10_pval/50)*50, by = 50)
    ) +
    # 添加阈值线
    geom_vline(xintercept = c(-1, 1), linetype = "dashed", 
               color = "#666666", linewidth = 0.6) +
    geom_hline(yintercept = -log10(0.05), linetype = "dashed", 
               color = "#666666", linewidth = 0.6) +
    # 使用 geom_label_repel 添加带背景框的标签
    geom_label_repel(
      data = top_genes,
      aes(label = GeneName),
      size = 4,
      fontface = "bold",
      color = "#333333",
      box.padding = 1.5,
      point.padding = 0.5,
      max.overlaps = 10,
      arrow = current_arrow,
      segment.size = 0.4,
      segment.color = "#666666",
      segment.alpha = 0.8,
      segment.curvature = -0.1,
      force = 10,
      direction = "both",
      nudge_x = ifelse(top_genes$avg_log2FC > 0, 1, -1),
      nudge_y = 1,
      fill = "white",
      label.size = 0.5,
      alpha = 0.8
    ) +
    labs(
      x = "Log2 Fold Change",
      y = "-Log10 P-value",
      title = paste("Volcano Plot -", scheme_name)
    ) +
    theme_minimal() +
    theme(
      plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
      axis.title = element_text(size = 14, face = "bold"),
      axis.text = element_text(size = 12, color = "black"),
      panel.grid.major = element_line(color = "#DDDDDD", linewidth = 0.3),
      panel.grid.minor = element_line(color = "#EEEEEE", linewidth = 0.2),
      legend.position = "right",
      legend.title = element_text(size = 12, face = "bold"),
      legend.text = element_text(size = 10),
      plot.background = element_rect(fill = "white", color = NA),
      panel.background = element_rect(fill = "white", color = NA)
    ) +
    coord_cartesian(clip = "off") +
    scale_x_continuous(expand = expansion(mult = c(0.2, 0.2))) +
    scale_y_continuous(
      expand = expansion(mult = c(0.1, 0.2)),
      # 确保y轴也使用50为间隔
      breaks = seq(0, ceiling(max_log10_pval/50)*50, by = 50)
    )
  
  return(plot)
}

# 创建并保存六种不同配色的火山图
# 方案一:蓝-粉-紫
volcano_1 <- create_volcano_plot(data, top_genes, color_scheme_1, "Blue-Pink-Purple")
ggsave("./figure/volcano_blue_pink_purple.png", plot = volcano_1, width = 12, height = 10, dpi = 300)

# 方案二:绿-粉-紫
volcano_2 <- create_volcano_plot(data, top_genes, color_scheme_2, "Green-Pink-Purple")
ggsave("./figure/volcano_green_pink_purple.png", plot = volcano_2, width = 12, height = 10, dpi = 300)

# 方案三:橙-粉-蓝
volcano_3 <- create_volcano_plot(data, top_genes, color_scheme_3, "Orange-Pink-Blue")
ggsave("./figure/volcano_orange_pink_blue.png", plot = volcano_3, width = 12, height = 10, dpi = 300)

# 方案四:青-粉-红
volcano_4 <- create_volcano_plot(data, top_genes, color_scheme_4, "Cyan-Pink-Pink")
ggsave("./figure/volcano_cyan_pink_pink.png", plot = volcano_4, width = 12, height = 10, dpi = 300)

# 方案五:金-粉-深蓝
volcano_5 <- create_volcano_plot(data, top_genes, color_scheme_5, "Gold-Pink-Navy")
ggsave("./figure/volcano_gold_pink_navy.png", plot = volcano_5, width = 12, height = 10, dpi = 300)

# 方案六:薄荷-粉-紫罗兰
volcano_6 <- create_volcano_plot(data, top_genes, color_scheme_6, "Mint-Pink-Violet")
ggsave("./figure/volcano_mint_pink_violet.png", plot = volcano_6, width = 12, height = 10, dpi = 300)

# 显示所有图形
print(volcano_1)
print(volcano_2)
print(volcano_3)
print(volcano_4)
print(volcano_5)
print(volcano_6)


image.png

#分析GEO数据库Hp感染GES-1细胞以后LNCRNA变化
# 清空环境
remove(list = ls())

# 设置工作目录为文件所在的文件夹
setwd("F:/生信代码复现/渐变火山图")

# 加载必要的 R 包
library(dplyr)
library(ggplot2)
library(ggrepel)
library(readxl)
library(RColorBrewer)

# 加载数据
data <- read_excel("data.xls", sheet = 1)
head(data)

# 确保数据列名匹配代码
data <- data %>%
  rename(avg_log2FC = logFC, p_val = `P.Value`, GeneName = Gene) %>%
  # 添加数据类型转换
  mutate(
    avg_log2FC = as.numeric(avg_log2FC),
    p_val = as.numeric(p_val)
  )

# 检查列名是否匹配预期
if (!all(c("avg_log2FC", "p_val", "GeneName") %in% colnames(data))) {
  stop("数据中缺少必需的列,请检查文件结构!")
}

# 添加 regulate 信息
data <- data %>%
  mutate(
    regulate = case_when(
      p_val < 0.05 & avg_log2FC > log2(1.5) ~ "up",
      p_val < 0.05 & avg_log2FC < -log2(1.5) ~ "down",
      TRUE ~ "stable"
    )
  )

# 去掉 NA 值和0值
data <- data %>% 
  filter(!is.na(avg_log2FC) & !is.na(p_val)) %>%
  filter(p_val > 0)

# 转换 regulate 列为因子
data <- data %>%
  mutate(
    regulate = factor(regulate, levels = c("stable", "up", "down"))
  )

# 添加显著性分类
data <- data %>%
  mutate(
    significance = case_when(
      p_val < 0.05 & abs(avg_log2FC) > 1 ~ "Significant",
      TRUE ~ "Not Significant"
    )
  )

# 计算 -log10(p_val)
data <- data %>%
  mutate(log10_pval = -log10(p_val))

# 选择需要标注的基因
top_genes <- data %>%
  filter(significance == "Significant") %>%
  arrange(p_val) %>%
  slice(1:10)

# 定义6种不同的三色渐变方案
# 方案一:蓝-粉-紫(经典配色)
color_scheme_1 <- c(
  "#4B9CD3",  # 蓝色
  "#EAA4B8",  # 粉色(主题色)
  "#663366"   # 紫色
)

# 方案二:绿-粉-紫(自然过渡)
color_scheme_2 <- c(
  "#66C2A5",  # 绿色
  "#EAA4B8",  # 粉色
  "#9966CC"   # 紫色
)

# 方案三:橙-粉-蓝(强烈对比)
color_scheme_3 <- c(
  "#FF9966",  # 橙色
  "#EAA4B8",  # 粉色
  "#6699CC"   # 蓝色
)

# 方案四:青-粉-红(清新风格)
color_scheme_4 <- c(
  "#40E0D0",  # 青色
  "#EAA4B8",  # 粉色
  "#FF69B4"   # 红色
)

# 方案五:金-粉-深蓝(高贵风格)
color_scheme_5 <- c(
  "#FFD700",  # 金色
  "#EAA4B8",  # 粉色
  "#000080"   # 深蓝色
)

# 方案六:薄荷-粉-紫罗兰(柔和风格)
color_scheme_6 <- c(
  "#98FF98",  # 薄荷绿
  "#EAA4B8",  # 粉色
  "#9370DB"   # 紫罗兰
)

# 创建火山图函数
create_volcano_plot <- function(data, top_genes, color_scheme, scheme_name) {
  # 计算p值的范围用于调整渐变
  max_log10_pval <- max(data$log10_pval)
  
  # 为标签添加位置调整
  set.seed(123)
  
  # 定义箭头样式(样式2)
  current_arrow <- arrow(
    length = unit(0.2, "cm"),
    type = "open",
    ends = "last",
    angle = 30
  )
  
  plot <- ggplot(data, aes(x = avg_log2FC, y = log10_pval)) +
    # 添加点
    geom_point(aes(color = log10_pval), alpha = 0.9, size = 2) +
    # 使用三色渐变
    scale_color_gradientn(
      colors = color_scheme,
      name = "-Log10 P-value",
      values = c(0, 0.3, 1),
      # 修改刻度间隔为50
      breaks = seq(0, ceiling(max_log10_pval/50)*50, by = 50),
      labels = seq(0, ceiling(max_log10_pval/50)*50, by = 50)
    ) +
    # 添加阈值线
    geom_vline(xintercept = c(-1, 1), linetype = "dashed", 
               color = "#666666", linewidth = 0.6) +
    geom_hline(yintercept = -log10(0.05), linetype = "dashed", 
               color = "#666666", linewidth = 0.6) +
    # 使用 geom_label_repel 添加带背景框的标签
    geom_label_repel(
      data = top_genes,
      aes(label = GeneName),
      size = 4,
      fontface = "bold",
      color = "#333333",
      box.padding = 1.5,
      point.padding = 0.5,
      max.overlaps = 10,
      arrow = current_arrow,
      segment.size = 0.4,
      segment.color = "#666666",
      segment.alpha = 0.8,
      segment.curvature = -0.1,
      force = 10,
      direction = "both",
      nudge_x = ifelse(top_genes$avg_log2FC > 0, 1, -1),
      nudge_y = 1,
      fill = "white",
      label.size = 0.5,
      alpha = 0.8
    ) +
    labs(
      x = "Log2 Fold Change",
      y = "-Log10 P-value",
      title = paste("Volcano Plot -", scheme_name)
    ) +
    theme_minimal() +
    theme(
      plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
      axis.title = element_text(size = 14, face = "bold"),
      axis.text = element_text(size = 12, color = "black"),
      panel.grid.major = element_line(color = "#DDDDDD", linewidth = 0.3),
      panel.grid.minor = element_line(color = "#EEEEEE", linewidth = 0.2),
      legend.position = "right",
      legend.title = element_text(size = 12, face = "bold"),
      legend.text = element_text(size = 10),
      plot.background = element_rect(fill = "white", color = NA),
      panel.background = element_rect(fill = "white", color = NA)
    ) +
    coord_cartesian(clip = "off") +
    scale_x_continuous(expand = expansion(mult = c(0.2, 0.2))) +
    scale_y_continuous(
      expand = expansion(mult = c(0.1, 0.2)),
      # 确保y轴也使用50为间隔
      breaks = seq(0, ceiling(max_log10_pval/50)*50, by = 50)
    )
  
  return(plot)
}

# 创建并保存六种不同配色的火山图
# 方案一:蓝-粉-紫
volcano_1 <- create_volcano_plot(data, top_genes, color_scheme_1, "Blue-Pink-Purple")
ggsave("./figure/volcano_blue_pink_purple.png", plot = volcano_1, width = 12, height = 10, dpi = 300)

# 方案二:绿-粉-紫
volcano_2 <- create_volcano_plot(data, top_genes, color_scheme_2, "Green-Pink-Purple")
ggsave("./figure/volcano_green_pink_purple.png", plot = volcano_2, width = 12, height = 10, dpi = 300)

# 方案三:橙-粉-蓝
volcano_3 <- create_volcano_plot(data, top_genes, color_scheme_3, "Orange-Pink-Blue")
ggsave("./figure/volcano_orange_pink_blue.png", plot = volcano_3, width = 12, height = 10, dpi = 300)

# 方案四:青-粉-红
volcano_4 <- create_volcano_plot(data, top_genes, color_scheme_4, "Cyan-Pink-Pink")
ggsave("./figure/volcano_cyan_pink_pink.png", plot = volcano_4, width = 12, height = 10, dpi = 300)

# 方案五:金-粉-深蓝
volcano_5 <- create_volcano_plot(data, top_genes, color_scheme_5, "Gold-Pink-Navy")
ggsave("./figure/volcano_gold_pink_navy.png", plot = volcano_5, width = 12, height = 10, dpi = 300)

# 方案六:薄荷-粉-紫罗兰
volcano_6 <- create_volcano_plot(data, top_genes, color_scheme_6, "Mint-Pink-Violet")
ggsave("./figure/volcano_mint_pink_violet.png", plot = volcano_6, width = 12, height = 10, dpi = 300)

# 显示所有图形
print(volcano_1)
print(volcano_2)
print(volcano_3)
print(volcano_4)
print(volcano_5)
print(volcano_6)


image.png