/**
* jQuery Plugin: Sticky Tabs
*
* @author Aidan Lister
// Set the correct tab when the page loads showStuffFromHash(context);
// Set the correct tab when a user uses their back/forward button $(window).on('hashchange', function() { showStuffFromHash(context); });
// Change the URL when tabs are clicked $('a', context).on('click', function(e) { history.pushState(null, null, this.href); showStuffFromHash(context); });
return this; }; }(jQuery));
window.buildTabsets = function(tocID) {
// build a tabset from a section div with the .tabset class function buildTabset(tabset) {
// check for fade and pills options var fade = tabset.hasClass("tabset-fade"); var pills = tabset.hasClass("tabset-pills"); var navClass = pills ? "nav-pills" : "nav-tabs";
// determine the heading level of the tabset and tabs var match = tabset.attr('class').match(/level(\d) /); if (match === null) return; var tabsetLevel = Number(match[1]); var tabLevel = tabsetLevel + 1;
// find all subheadings immediately below var tabs = tabset.find("div.section.level" + tabLevel); if (!tabs.length) return;
// create tablist and tab-content elements var tabList = $('
'); $(tabs[0]).before(tabList); var tabContent = $('
'); $(tabs[0]).before(tabContent);
// build the tabset var activeTab = 0; tabs.each(function(i) {
// get the tab div var tab = $(tabs[i]);
// get the id then sanitize it for use with bootstrap tabs var id = tab.attr('id');
// see if this is marked as the active tab if (tab.hasClass('active')) activeTab = i;
// remove any table of contents entries associated with // this ID (since we'll be removing the heading element) $("div#" + tocID + " li a[href='#" + id + "']").parent().remove();
// sanitize the id for use with bootstrap tabs id = id.replace(/[.\/?&!#<>]/g, '').replace(/\s/g, '_'); tab.attr('id', id);
// get the heading element within it, grab it's text, then remove it var heading = tab.find('h' + tabLevel + ':first'); var headingText = heading.html(); heading.remove();
// build and append the tab list item var a = $('' + headingText + ''); a.attr('href', '#' + id); a.attr('aria-controls', id); var li = $('
'); li.append(a); tabList.append(li);
// set it's attributes tab.attr('role', 'tabpanel'); tab.addClass('tab-pane'); tab.addClass('tabbed-pane'); if (fade) tab.addClass('fade');
// move it into the tab content div tab.detach().appendTo(tabContent); });
// set active tab $(tabList.children('li')[activeTab]).addClass('active'); var active = $(tabContent.children('div.section')[activeTab]); active.addClass('active'); if (fade) active.addClass('in');
if (tabset.hasClass("tabset-sticky")) tabset.rmarkdownStickyTabs(); }
// convert section divs with the .tabset class to tabsets var tabsets = $("div.section.tabset"); tabsets.each(function(i) { buildTabset($(tabsets[i])); }); };
ComplexHeatmap Tutorial: create publication-ready heatmaps in R
In this easy step-by-step tutorial we will learn how to create and customise a heatmap with R package ComplexHeatmap.
ComplexHeatmap is one of the most powerful and flexible R packages for creating publication-quality heatmaps. It allows you to add multiple layers of annotations, customize every aspect of your visualisation, and create complex multi-panel figures. You will probably have seen a lot of beautiful and professional-looking figures in publications that are created using this package in R.
But before we start… what is a heatmap?
A heatmap is basically a plot that uses colour to represent the intensity or magnitude of values, making it easy to visualise patterns. For example, it is a great way to display the expression of many genes across many samples. But you can use it to show any kind of quantitative data! If you are not that familiar with heatmaps, you might want to check my other post on how to interpret a heatmap. I go over the basic concepts and then put theory into practice with a ‘real-life’ published heatmap. Otherwise, keep reading and follow my step-by-step guide to create your own heatmap!
If you are a beginner in R, don’t be overwhelmed! This tutorial will go step-by-step and I will explain (almost!) every line of code so you know what is happening at each point of the workflow. Just have fun with it!
So if you are ready… let’s dive in!
|
Check out my YouTube tutorial and follow along with me to create publication-ready heatmaps in R with ComplexHeatmap! |
Installing ComplexHeatmap
For this tutorial you will need R, or Rstudio, and you will need to install the following packages:
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("ComplexHeatmap")
Setting Up the Environment
First, let’s set up our R environment and load the necessary packages. We’ll be using simulated patient data throughout this tutorial to demonstrate real-world applications; but feel free to use your own dataset!
# Clean environment and optimize settings
rm(list = ls(all.names = TRUE))
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 621645 33.2 1414794 75.6 719562 38.5
## Vcells 1162728 8.9 8388608 64.0 1876272 14.4
options(max.print = .Machine$integer.max, scipen = 999, stringsAsFactors = F, dplyr.summarise.inform = F)
# Load required libraries
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
library(dendextend)
library(dplyr)
# Set seed for reproducibility
set.seed(123)
SquidTip! Always clean your environment and set a seed to ensure reproducible results.
Creating sample patient data
For this tutorial, we’ll create a realistic dataset simulating gene expression data from cancer patients. Feel free to skip this section if you already have your own dataset.
# Generate sample data for 50 patients and 100 genes
n_patients <- 50
n_genes <- 100
# Create meaningful names
gene_names <- paste0("Gene_", 1:n_genes)
patient_ids <- paste0("Patient_", sprintf("%02d", 1:n_patients))
# Simulate gene expression data (log2 fold changes)
expression_data <- matrix(
rnorm(n_patients * n_genes, mean = 0, sd = 1.5),
nrow = n_genes,
ncol = n_patients,
dimnames = list(gene_names, patient_ids)
)
# Add biological structure to make heatmap more interesting
expression_data[1:20, 1:15] <- expression_data[1:20, 1:15] + 2 # Upregulated cluster
expression_data[21:40, 16:30] <- expression_data[21:40, 16:30] - 2 # Downregulated cluster
expression_data[41:60, 31:45] <- expression_data[41:60, 31:45] + 1.5 # Moderate upregulation
Let’s have a look at the dimensions of our dataset.
dim(expression_data)
## [1] 100 50
What does it look like? As you can see we have a matrix were columns are sample IDs (or patient IDs), and the rows are gene IDs. This is the main data we are trying to plot as a heatmap: the gene expression values will be colour coded, but each column in our heatmap will be a patient/sample, and each row will be a gene.
If you want it the other way around, just transpose your matrix with t(), and make sure you interchange “columns” and “rows” as we go through this tutorial.
head(expression_data[1:5, 1:5])
## Patient_01 Patient_02 Patient_03 Patient_04 Patient_05
## Gene_1 1.159287 0.9343902 5.298216 0.9271367 1.8896660
## Gene_2 1.654734 2.3853256 3.968619 0.8709665 0.2470229
## Gene_3 4.338062 1.6299622 1.602282 0.5921919 1.0478776
## Gene_4 2.105763 1.4786861 2.814791 0.4212301 1.9567377
## Gene_5 2.193932 0.5725721 1.378490 1.3442607 3.0060440
Important! If you start of from a data.frame, use the function matrix() to convert it to a matrix. You can check if it worked by calling class() on your data object:
class(expression_data)
## [1] "matrix" "array"
Apart from the main data, you might want to add annotations to your heatmap. For example, to track information about the patients like their age or cancer stage; or to mark gene information - for example if certain genes are related to specific pathways.
So let’s create comprehensive patient annotation data that includes both continuous and categorical variables:
# Create patient annotation data
patient_annotations <- data.frame(
Patient_ID = patient_ids,
# Continuous variables
Age = round(rnorm(n_patients, mean = 65, sd = 12)),
BMI = round(rnorm(n_patients, mean = 25, sd = 4), 1),
Tumor_Size = round(rnorm(n_patients, mean = 3.5, sd = 1.2), 1),
# Discrete variables
Gender = sample(c("Male", "Female"), n_patients, replace = TRUE, prob = c(0.6, 0.4)),
Stage = sample(c("I", "II", "III", "IV"), n_patients, replace = TRUE, prob = c(0.2, 0.3, 0.3, 0.2)),
Treatment = sample(c("Surgery", "Chemo", "Radiation", "Combined"), n_patients, replace = TRUE),
Response = sample(c("Complete", "Partial", "Stable", "Progressive"), n_patients, replace = TRUE),
stringsAsFactors = FALSE
)
# Ensure realistic bounds for continuous variables
patient_annotations$Age <- pmax(18, pmin(90, patient_annotations$Age))
patient_annotations$BMI <- pmax(15, pmin(45, patient_annotations$BMI))
patient_annotations$Tumor_Size <- pmax(0.5, pmin(10, patient_annotations$Tumor_Size))
head(patient_annotations)
## Patient_ID Age BMI Tumor_Size Gender Stage Treatment Response
## 1 Patient_01 59 15.0 3.8 Male IV Combined Progressive
## 2 Patient_02 79 25.0 3.8 Female III Chemo Partial
## 3 Patient_03 51 25.4 4.4 Male III Radiation Stable
## 4 Patient_04 83 25.0 4.4 Female III Surgery Complete
## 5 Patient_05 76 25.6 3.5 Female IV Combined Progressive
## 6 Patient_06 69 20.3 3.0 Male III Surgery Stable
Nice! As you can see, the row names of your annotation dataset are our patient names - make sure they match the column names of your data matrix.
SquidTip!
If you want to use Seurat normalised or raw expression values, you can use the function GetAssayData()
Create a basic heatmap with ComplexHeatmap
Nice! Now that we have our sample dataset, we’re ready to go!
Let’s start with a simple heatmap to understand the basics. Below you can see the main arguments the function Heatmap() takes. Don’t forget you can use ?Heatmap to find our more.
The first argument is our expression data, our matrix. “name” refers to the name of the heatmap. By default the heatmap name is used as the title of the heatmap legend. I decided to show column names, but not row names - and you can control the size (and other text properties like colour, face, … etc with gp). Finally, I gave the heatmap a title.
This created a Heatmap object, but to visualise it we need to call draw(). Easy!
# Create basic heatmap
basic_heatmap <- Heatmap(
expression_data,
name = "Expression",
show_row_names = FALSE,
show_column_names = TRUE,
column_names_gp = gpar(fontsize = 8),
column_title = "Patient Gene Expression Heatmap"
)
# Draw the heatmap
draw(basic_heatmap)
Note how the heatmap is clustered, meaning that rows and columns are grouped by their similarity in gene expression values. We’ll see in a minute how you can control how this grouping is done, or disable it completely. The dendrograms show relationships between samples and features (genes in this case).
The default color scheme uses blue-white-red gradient, which we can also edit, of course!
Changing the colours of your heatmap
Color choice is crucial for effective data visualization. Let’s explore different color schemes and when to use them. You can use the package circlize() to define colour palettes - it is very handy when you want continuous colour palettes, for example, going from blue to red, or green to orange.
Note that you define the minimum, midpoint and maximum with the vector - you might need to adjust this to cover the range of values in your dataset!
Essentially, the colour of the heatmap is defined by the argument col.
We’ll also edit the heatmap legend and place it in the bottom, horizontally. The argument heatmap_legend_param controls the heatmap legend. Note that we haven’t added annotations yet - we’re just talking about the main values in the heatmap!
You can change the title, direction, size, colour… basically anything.
The position of the legend is defined by “heatmap_legend_side” inside draw().
# Define custom color functions
col_fun1 <- colorRamp2(c(-3, 0, 3), c("darkgreen", "white", "orange3"))
col_fun2 <- colorRamp2(c(-4, 0, 4), c("navy", "lightgray", "darkred"))
# Heatmap with custom colors
custom_color_heatmap <- Heatmap(
expression_data,
name = "Expression",
col = col_fun2,
show_row_names = FALSE,
show_column_names = FALSE,
column_title = "Custom Color Heatmap",
heatmap_legend_param = list(
title = "Log2 FC",
legend_direction = "horizontal",
legend_width = unit(6, "cm")
)
)
draw(custom_color_heatmap, heatmap_legend_side = "bottom")
- Use diverging colours for data with meaningful zero point (like fold changes)
- Use sequential colours for data with natural ordering (for example, tumour size, or age)
- Use qualitative colours for categorical data (for example, cancer stage)
- Always consider colorblind-friendly palettes
These are some useful resources for colours in R:
Paleteer package
flatuicolors.com
Clustering
ComplexHeatmap allows different options for clustering. The clustering system allows independent control over rows and columns, supports mixed distance metrics, and provides extensive customization for both the clustering algorithm and visual appearance of the resulting dendrograms.
You can control whether to cluster columns or rows (or not) with the arguments cluster_columns, cluster_rows.
There’s different distance metrics:
- euclidean: Standard straight-line distance between points
- manhattan: Sum of absolute differences (city-block distance)
- pearson/spearman: Correlation-based distances for expression data
- binary: For presence/absence data
- maximum/canberra: Alternative distance calculations for specific data types
Different clustering methods are available such as complete (tight, spherical clusters), single (can create elongated clusters), average (uses mean distance), and ward.D2 (minimizes within-cluster variance, often preferred for gene expression).
Additional options include k-means pre-clustering (row_km/column_km), manual grouping with split variables (row_split/column_split), and dendrogram customization through graphics parameters and callback functions. Check ?Heatmap() for complete documentation!
# Clustering
annotated_heatmap <- Heatmap(
expression_data,
name = "Expression",
# Clustering parameters
clustering_distance_rows = "spearman",
clustering_method_rows = "ward.D2",
clustering_distance_columns = "spearman",
clustering_method_columns = "ward.D2",
col = col_fun2,
show_row_names = FALSE,
show_column_names = FALSE,
)
Adding column annotations
Nice! Now that we have the basics, it’s time to add some annotations to our heatmap! This is where ComplexHeatmap truly shines - there’s so many customisation options!
We’ll start by adding annotations to our samples / patients.
First, I’ll create a colour list, setting the colours for all of our annotations. This is not necessary - if you don’t specify colours, ComplexHeatmap will choose a palette for you - but it might change every time you run the heatmap.
# Define color schemes for all annotations
colour_list <- list(
Age = colorRamp2(c(18, 90), c("white", "magenta4")),
BMI = colorRamp2(c(15, 45), c("white", "magenta4")),
Tumor_Size = colorRamp2(c(0.5, 10), c("white", "magenta4")),
Gender = c("Male" = "aquamarine3", "Female" = "darkgoldenrod2"),
Stage = c("I" = "white", "II" = "aquamarine3", "III" = "deepskyblue2", "IV" = "deepskyblue4"),
Treatment = c("Surgery" = "white", "Chemo" = "lightblue", "Radiation" = "lightpink", "Combined" = "darkred"),
Response = c("Complete" = "white", "Partial" = "lightblue", "Stable" = "lightpink", "Progressive" = "darkred")
)
The important part is creating a HeatmapAnnotation
object. This is separate to our main Heatmap()
function.
-
First, we define annotation tracks, one for each variable we want to plot (in this case, four: age, BMI, tumour size and gender). These are vectors of values (numeric or categorical) corresponding to each row or column of your heatmap (depending on whether the annotation is row or column based). HeatmapAnnotation() by default creates column annotations — you can use
HeatmapAnnotation(…, which = “row”)
for row annotations. -
colour_list
is a named list of color mappings, which we created earlier. na_col specifies the color to use when a value is missing (NA), which is quite useful if you have missing values in your data as you won’t need to specify it for each variable you want to plot. -
gp controls graphical parameters for the annotation blocks. Here,
col = “white”
likely sets the border color of the annotation cells to white. -
annotation_name_gp
also controls graphical parameters, but for the annotation labels. It essentially controls the appearance (e.g., font size, color) of the annotation labels (the names like “Age”, “BMI” on the annotation bar). In this case, sets the font size to 10 pt. -
Finally,
annotation_legend_param
customizes the legends for each annotation. You can customise practically everything, for each variable separately. Check out some examples here.
Once you’re happy with your HeatmapAnnotation, just pass it to the arguments top_annotation, bottom_annotation, right_annotation, and left_annotation
in the main Heatmap()
function.
Let’s do it!
Just to make it a bit more interesting, I’m going to create two different HeatmapAnnotation objects, one for the top and one for the bottom.
# Create top annotation for continuous variables
top_ann <- HeatmapAnnotation(
Age = patient_annotations$Age,
BMI = patient_annotations$BMI,
Tumor_Size = patient_annotations$Tumor_Size,
Gender = patient_annotations$Gender,
col = colour_list,
na_col = "gray",
gp = gpar(col = "white"),
annotation_name_gp = gpar(fontsize = 10),
annotation_legend_param = list(
Age = list(title = "Age (years)"),
BMI = list(title = "BMI"),
Tumor_Size = list(title = "Tumor Size (cm)")
)
)
# Create bottom annotation for treatment variables
bottom_ann <- HeatmapAnnotation(
Stage = patient_annotations$Stage,
Treatment = patient_annotations$Treatment,
Response = patient_annotations$Response,
col = colour_list,
gp = gpar(col = "white"),
annotation_name_gp = gpar(fontsize = 10)
)
Nice! Now let’s put it all together:
# Heatmap with annotations
annotated_heatmap <- Heatmap(
expression_data,
name = "Expression",
# Clustering parameters
clustering_distance_rows = "spearman",
clustering_method_rows = "ward.D2",
clustering_distance_columns = "spearman",
clustering_method_columns = "ward.D2",
col = col_fun2,
top_annotation = top_ann,
bottom_annotation = bottom_ann,
show_row_names = FALSE,
show_column_names = FALSE,
column_title = "Gene Expression with Patient Annotations"
)
draw(annotated_heatmap)
Row Annotations and Data Splitting
Of course, the same applies to rows. We can also add annotations to our genes.
I’m just going to create some categories for the genes related to pathways and their function. Just like with our sample/patient data, you just have to make sure that the column names of the annotation dataframe (in this case called gene_categories) match the rownames of your matrix.
# Create gene categories for row annotations
gene_categories <- data.frame(
Gene = gene_names,
Pathway = sample(c("Metabolism", "Cell Cycle", "Apoptosis", "DNA Repair"),
n_genes, replace = TRUE),
Function = sample(c("Oncogene", "Tumor Suppressor", "Metabolic", "Structural"),
n_genes, replace = TRUE),
stringsAsFactors = FALSE
)
head(gene_categories)
## Gene Pathway Function
## 1 Gene_1 Apoptosis Oncogene
## 2 Gene_2 DNA Repair Structural
## 3 Gene_3 DNA Repair Oncogene
## 4 Gene_4 Metabolism Tumor Suppressor
## 5 Gene_5 Cell Cycle Tumor Suppressor
## 6 Gene_6 DNA Repair Structural
Note that here I am using rowAnnotation
- I could have used columnAnnotation
before when I created an annotation object for our samples.
Again, I list the different annotation blocks, and customise the appearance. The pass it to left_annotation
, because the genes are rows - so annotations can only be left or right. Easy!
# Row annotation
row_annotation <- rowAnnotation(
Pathway = gene_categories$Pathway,
Function = gene_categories$Function,
col = list(
Pathway = c("Metabolism" = "cyan2", "Cell Cycle" = "gold3",
"Apoptosis" = "magenta4", "DNA Repair" = "green4"),
Function = c("Oncogene" = "gold3", "Tumor Suppressor" = "magenta4",
"Metabolic" = "cyan2", "Structural" = "magenta4")
),
annotation_name_gp = gpar(fontsize = 10),
annotation_legend_param = list(
Pathway = list(title = "Biological Pathway"),
Function = list(title = "Gene Function")
)
)
# Full heatmap with row and column annotations plus splitting
full_heatmap <- Heatmap(
expression_data,
name = "Expression",
clustering_distance_rows = "euclidean",
clustering_method_rows = "complete",
clustering_distance_columns = "euclidean",
clustering_method_columns = "complete",
col = col_fun2,
top_annotation = top_ann,
bottom_annotation = bottom_ann,
left_annotation = row_annotation,
show_row_names = FALSE,
show_column_names = TRUE,
column_names_rot = 45,
column_names_side = "top",
column_names_gp = gpar(fontsize = 8, face = 'bold'),
column_title = "Gene Expression with Patient Annotations",
# Splitting creates visual groups
column_split = patient_annotations$Stage,
row_split = gene_categories$Pathway,
row_title_rot = 0,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
# Gaps between splits
column_gap = unit(2, "mm"),
row_gap = unit(2, "mm"),
# Borders and spacing
rect_gp = gpar(col = "white", lwd = 0.5),
border = TRUE
)
draw(full_heatmap)
Cool!
But… wait a minute! Why is the heatmap split into sections?
You’ll notice how I used column_split
and row_split
to split the values by both patient cancer stage and gene pathways. Note that in this case the clustering is done for each group.
You can control the spacing of the gaps between groups, and also add a border.
SquidTip! If you already know groupings (e.g., patient subtypes, treatment arms, tissue types), splitting lets you show those categories explicitly and cluster within each group.This is particularly powerful for showing treatment groups, disease stages, or functional gene categories.
Remember that splitting overrides clustering across the full axis and instead clusters within each split group. So when you want to discover natural groupings via unsupervised clustering - do not split!
Advanced Annotations: Barplots and Compositions
One of the best things about ComplexHeatmap is that it allows you to add all kinds of plots to the main heatmap as annotations. For example, it supports complex annotation types like barplots, which are perfect for showing compositional data.
Let’s create a dataframe with the proportions of cell types for each tumour sample:
# Generate cell composition data (percentages that sum to 100%)
cell_composition <- data.frame(
Patient_ID = patient_ids,
Tumor_cells = runif(n_patients, 20, 70),
T_cells = runif(n_patients, 10, 40),
Macrophages = runif(n_patients, 5, 25),
B_cells = runif(n_patients, 2, 15),
stringsAsFactors = FALSE
)
row.names(cell_composition) <- cell_composition$Patient_ID
# Normalize to sum to 100%
row_sums <- rowSums(cell_composition[, 2:5])
cell_composition[, 2:5] <- cell_composition[, 2:5] / row_sums * 100
head(cell_composition)
## Patient_ID Tumor_cells T_cells Macrophages B_cells
## Patient_01 Patient_01 60.53756 18.80622 9.057229 11.598986
## Patient_02 Patient_02 51.15594 13.32668 22.955958 12.561423
## Patient_03 Patient_03 39.62236 37.66656 20.281257 2.429825
## Patient_04 Patient_04 53.77020 17.74485 19.275656 9.209295
## Patient_05 Patient_05 46.22138 19.27910 23.374464 11.125062
## Patient_06 Patient_06 42.47161 41.79977 10.972697 4.755916
Now, we can add it as a barplot annotation, on top of our previous “top_ann”:
# Cell type colors
cell_colors <- c("Tumor_cells" = "#E74C3C", "T_cells" = "#3498DB",
"Macrophages" = "#F39C12", "B_cells" = "#27AE60")
# Cell composition barplot annotation
cell_barplot_ann <- HeatmapAnnotation(
Cell_Composition = anno_barplot(
cell_composition[, c("Tumor_cells", "T_cells", "Macrophages", "B_cells")],
gp = gpar(fill = cell_colors, col = "white"),
bar_width = 0.8,
height = unit(2, "cm")
),
annotation_name_gp = gpar(fontsize = 10, fontface = "bold")
)
# Combine with expression heatmap
heatmap_with_cells <- Heatmap(
expression_data,
name = "Expression",
clustering_distance_rows = "spearman",
clustering_method_rows = "ward.D2",
clustering_distance_columns = "spearman",
clustering_method_columns = "ward.D2",
col = col_fun2,
top_annotation = c(top_ann, cell_barplot_ann),
left_annotation = row_annotation,
show_row_names = FALSE,
show_column_names = TRUE,
column_names_rot = 90,
column_names_side = "bottom",
column_names_gp = gpar(fontsize = 8),
column_title = "Gene Expression with Cell Composition Data",
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
column_split = patient_annotations$Stage,
row_split = gene_categories$Pathway,
column_gap = unit(2, "mm"),
row_gap = unit(2, "mm"),
# Control heatmap size
width = unit(n_patients/5, "cm"),
height = unit(n_genes/5, "cm")
)
draw(heatmap_with_cells)
Nice! I won’t cover all complex annotations in this tutorial, but check out ComplexHeatmap’s reference book to find out more! You can create boxplots, pointplots, density plots, all kinds of text and charts and even add survival data!
Ordering in the heatmap, text and line annotations
Sometimes, you don’t want to do unsupervised clustering, but order the heatmap columns (or rows) using a custom order. For example, let’s order the heatmap by BMI.
# Order patients by BMI for trend visualization
patient_annotations <- patient_annotations %>% arrange(desc(BMI))
patient_order <- patient_annotations %>% pull(Patient_ID)
expression_data <- expression_data[, patient_order]
Another thing that may be useful are text annotations. I’ll add an example were I add “S + patient name: BMI = x” as an annotation for each of our patients (it’s a bit of a silly example but let’s disable the column names and add this text annotation).
Additionally, I’ll add a pointplot for age and a lineplot for BMI, just to show you some examples:
# Order patients by BMI for trend visualization
patient_annotations <- patient_annotations %>% arrange(desc(BMI))
patient_order <- patient_annotations %>% pull(Patient_ID)
expression_data <- expression_data[, patient_order]
# Annotation object
text_ann <- HeatmapAnnotation(
Patient_Info = anno_text(
paste0("S", gsub('Patient_', '', colnames(expression_data)), ': BMI = ', patient_annotations$BMI),
gp = gpar(fontsize = 8, col = "darkblue"),
rot = 90,
height = unit(1.5, "cm")
),
# Points annotation for continuous data
Age_Points = anno_points(
patient_annotations$Age,
pch = 16,
size = unit(2, "mm"),
gp = gpar(col = "darkred"),
height = unit(2, "cm"),
axis_param = list(at = c(20, 40, 60, 80), labels = c("20", "40", "60", "80"))
),
# Line plot annotation showing BMI trend
BMI_Line = anno_lines(
patient_annotations$BMI,
gp = gpar(col = "blue", lwd = 2),
height = unit(2, "cm")
),
annotation_name_gp = gpar(fontsize = 10, fontface = "bold")
)
# Heatmap without column clustering (to preserve BMI order)
heatmap_with_text_ann <- Heatmap(
expression_data,
name = "Expression",
clustering_distance_rows = "euclidean",
clustering_method_rows = "complete",
cluster_columns = FALSE, # Preserve BMI ordering
col = col_fun2,
bottom_annotation = text_ann,
left_annotation = row_annotation,
show_row_names = FALSE,
show_column_names = FALSE,
column_title = "Gene Expression Ordered by BMI",
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
row_split = gene_categories$Pathway,
row_gap = unit(0.7, "mm"),
width = unit(n_patients/4, "cm"),
height = unit(n_genes/5, "cm")
)
draw(heatmap_with_text_ann)
Nice! Once you get used to the syntax it’s really easy to add and remove annotations. If it’s less confusing, just add a separate annotation for each variable and add them to the heatmap using left_annotation = c(annotation_1, annotation_2, annotation_3)
.
You can also view the annotation only by calling draw(annotation_object)
.
Legend Customization
Professional publications require well-designed legends and ComplexHeatmap of course let’s us customise our legends too!
We can change the location of the legend…
# Heatmap with custom legend positioning
draw(heatmap_with_cells, heatmap_legend_side = "left")
… or remove it completely.
# Show only annotation legends
draw(heatmap_with_cells, show_heatmap_legend = FALSE)
Note that this only affects the heatmap legend. The legend of our annotations is controlled inside the annotation object.
Dendrogram Customization
We mentioned dendrograms when we were playing around with clustering parameters, but of course you can customise them further.
For example, let’s colour dendrograms based on a particular number of groups (k).
# Custom dendrogram colors based on clusters
# Perform clustering
row_dend <- as.dendrogram(hclust(dist(expression_data)))
col_dend <- as.dendrogram(hclust(dist(t(expression_data))))
# Color dendrograms by clusters
row_dend <- color_branches(row_dend, k = 4)
col_dend <- color_branches(col_dend, k = 3)
dendrogram_heatmap <- Heatmap(
expression_data,
name = "Expression",
col = col_fun2,
cluster_rows = row_dend,
cluster_columns = col_dend,
show_row_names = FALSE,
show_column_names = FALSE,
row_dend_width = unit(3, "cm"),
column_dend_height = unit(3, "cm"),
column_title = "Custom Colored Dendrograms"
)
draw(dendrogram_heatmap)
Saving Your Heatmaps
Amazing! To finish off, let’s save our heatmap! You can save it in different formats (png, pdf, jpeg…) and different sizes. We didn’t cover it here, but it’s also very easy to control the sizing of your heatmap, and adjust it to the size of the image you need.
# Save high-quality PDF
path_to_folder <- '/path/to/output/folder'
pdf(file.path(path_to_folder, "patient_heatmap.pdf"), width = 12, height = 15)
draw(heatmap_with_cells)
dev.off()
# For presentations, use PNG with high DPI
png(file.path(path_to_folder, "patient_heatmap.png"),
width = 12, height = 15, units = "in", res = 300)
draw(heatmap_with_cells)
dev.off()
Nice! And that’s the end of this tutorial.
In this post, we covered how to create publication-ready heatmaps in R using ComplexHeatmap. Hope you found it useful!
As you saw, ComplexHeatmap is really flexible and allows us to create professional-looking plots quite easily. The key to mastering this package is understanding how to layer different types of annotations and customize every aspect of your visualization. Once you get used to it, have fun customising it to create amazing visualisations!
Additional resources
You might be interested in…
- Don’t forget to explore the Official ComplexHeatmap book for advanced features. It has many examples of things you can do and ways to visualise your data!
- For advanced users, I recently discovered this R package that allows you to make heatmaps with animations: InteractiveComplexHeatmap
- You can find the bioconductor package here: Bioconductor Package
- And the GitHub Repository here: ComplexHeatmap GithHub
Squidtastic! You made it till the end! Hope you found this post useful.
If you have any questions, or if there are any more topics you would like to see here, leave me a comment down below.
Otherwise, have a very nice day and… see you in the next one!
Before you go, you might want to check:
// add bootstrap table styles to pandoc tables function bootstrapStylePandocTables() { $('tr.odd').parent('tbody').parent('table').addClass('table table-condensed'); } $(document).ready(function () { bootstrapStylePandocTables(); });