---
title: "Tree Visualization"
author: "Guangchuang Yu and Tommy Tsan-Yuk Lam\\

        School of Basic Medical Sciences, Southern Medical University"
date: "`r Sys.Date()`"
bibliography: ggtree.bib
biblio-style: apalike
output:
  prettydoc::html_pretty:
    toc: true
    theme: cayman
    highlight: github
  pdf_document:
    toc: true
vignette: >
  %\VignetteIndexEntry{02 Tree Visualization}
  %\VignetteEngine{knitr::rmarkdown}
  %\usepackage[utf8]{inputenc}
---

```{r style, echo=FALSE, results="asis", message=FALSE}
knitr::opts_chunk$set(tidy = FALSE,
		   message = FALSE)
```


```{r echo=FALSE, results="hide", message=FALSE}
library("ape")
library("grid")
library("ggplot2")
library("cowplot")
library("treeio")
library("ggtree")


CRANpkg <- function (pkg) {
    cran <- "https://CRAN.R-project.org/package"
    fmt <- "[%s](%s=%s)"
    sprintf(fmt, pkg, cran, pkg)
}

Biocpkg <- function (pkg) {
    sprintf("[%s](http://bioconductor.org/packages/%s)", pkg, pkg)
}
```


To view a phylogenetic tree, we first need to parse the tree file into *R*. The
`r Biocpkg('ggtree')` [@yu_ggtree:_2017] package supports many file formats via the `r Biocpkg('treeio')` package, including output files of commonly used software
packages in evolutionary biology. For more details, plase refer to
the [treeio vignette](https://bioconductor.org/packages/devel/bioc/vignettes/treeio/inst/doc/Importer.html).

```{r}
library("treeio")
library("ggtree")

nwk <- system.file("extdata", "sample.nwk", package="treeio")
tree <- read.tree(nwk)
```

# Viewing a phylogenetic tree with *ggtree*

The `r Biocpkg('ggtree')` package extends `r CRANpkg('ggplot2')` [@wickham_ggplot2_2009] package to support viewing phylogenetic tree.
It implements `geom_tree` layer for displaying phylogenetic tree, as shown below:


```{r fig.width=3, fig.height=3, fig.align="center"}
ggplot(tree, aes(x, y)) + geom_tree() + theme_tree()
```

The function, `ggtree`, was implemented as a short cut to visualize a tree, and it works exactly the same as shown above.

`r Biocpkg('ggtree')` takes all the advantages of `r CRANpkg('ggplot2')`. For example, we can change the color, size and type of the lines as we do with `r CRANpkg('ggplot2')`.

```{r fig.width=3, fig.height=3, fig.align="center"}
ggtree(tree, color="firebrick", size=1, linetype="dotted")
```

By default, the tree is viewed in ladderize form, user can set the parameter *ladderize = FALSE* to disable it.

```{r fig.width=3, fig.height=3, fig.align="center"}
ggtree(tree, ladderize=FALSE)
```

The *branch.length* is used to scale the edge, user can set the parameter *branch.length = "none"* to only view the tree topology (cladogram) or other numerical variable to scale the tree (*e.g.* _d~N~/d~S~_, see also in [Tree Annotation](treeAnnotation.html) vignette).

```{r fig.width=3, fig.height=3, fig.align="center"}
ggtree(tree, branch.length="none")
```

# Layout

Currently, `r Biocpkg('ggtree')` supports several layouts, including:

+ rectangular (by default)
+ slanted
+ circular
+ fan

for *phylogram* (by default) and *cladogram* if user explicitly setting
*branch.length='none'*. Unrooted (equal angle and daylight methods), time-scaled
and 2-dimensional layouts are also supported.


## Phylogram and Cladogram


```{r eval=F}
library(ggtree)
set.seed(2017-02-16)
tr <- rtree(50)
ggtree(tr)
ggtree(tr, layout="slanted")
ggtree(tr, layout="circular")
ggtree(tr, layout="fan", open.angle=120)
ggtree(tr, layout="equal_angle")
ggtree(tr, layout="daylight")
ggtree(tr, branch.length='none')
ggtree(tr, branch.length='none', layout='circular')
ggtree(tr, layout="daylight", branch.length='none')
```

```{r echo=F,  fig.width=8, fig.height=8, message=FALSE}
library(ggtree)
set.seed(2017-02-16)
tr <- rtree(50)
library(cowplot)
theme_layout <- theme(plot.title=element_text(hjust=0.5))
plot_grid(
    ggtree(tr) + ggtitle("rectangular (phylogram)")+ theme_layout,
    ggtree(tr, layout="slanted") + ggtitle("slanted (phylogram)")+theme_layout,
    ggtree(tr, layout="circular") + ggtitle("circular (phylogram)")+theme_layout,
    ggtree(tr, layout="fan", open.angle=120) + ggtitle("fan (phylogram)")+theme_layout,
    ggtree(tr, layout="equal_angle")+ ggtitle("equal angle (unrooted)")+theme_layout,
    ggtree(tr, layout="daylight")+ ggtitle("daylight (unrooted)")+theme_layout,
    ggtree(tr, branch.length='none')+ ggtitle("rectangular (cladogram)")+theme_layout,
    ggtree(tr, branch.length='none', layout='circular')+ ggtitle("circular (cladogram)")+theme_layout,
    ggtree(tr, layout="daylight", branch.length='none')+ ggtitle("daylight (cladogram)")+theme_layout,
    ncol=3)
```


There are also other possible layouts that can be drawn by modifying
scales/coordination, for examples, [reverse label of time
scale](https://github.com/GuangchuangYu/ggtree/issues/87), [repropotion
circular/fan tree](
https://groups.google.com/d/msgid/bioc-ggtree/6db25ec7-ffb1-4776-bfe4-4a1ba239c8e8%40googlegroups.com), *etc.*.

```{r eval=FALSE}
ggtree(tr) + scale_x_reverse()
ggtree(tr) + coord_flip()
ggtree(tr) + scale_x_reverse() + coord_flip()
print(ggtree(tr), newpage=TRUE, vp=grid::viewport(angle=-30, width=.9, height=.9))
ggtree(tr, layout='slanted') + coord_flip()
ggtree(tr, layout='slanted', branch.length='none') +
    coord_flip() + scale_y_reverse() +scale_x_reverse()
ggtree(tr, layout='circular') + xlim(-10, NA)
ggtree(tr) + scale_x_reverse() + coord_polar(theta='y')
ggtree(tr) + scale_x_reverse(limits=c(10, 0)) + coord_polar(theta='y')
```

```{r fig.keep='none', echo=FALSE, warning=FALSE}
tree_angle <- grid::grid.grabExpr(print(ggtree(tr), newpage=TRUE, vp = grid::viewport(angle=-30, width=.9, height=.9)))
```

```{r fig.width=8, fig.height = 8, echo=FALSE, warning=FALSE}
plot_grid(
    ggtree(tr) + scale_x_reverse(),
    ggtree(tr) + coord_flip(),
    ggtree(tr) + scale_x_reverse() + coord_flip(),
    tree_angle,
    ggtree(tr, layout='slanted') + coord_flip(),
    ggtree(tr, layout='slanted', branch.length='none') + coord_flip() + scale_y_reverse() +scale_x_reverse(),
    ggtree(tr, layout='circular') + xlim(-10, NA),
    ggtree(tr) + scale_x_reverse() + coord_polar(theta='y'),
    ggtree(tr) + scale_x_reverse(limits=c(15, 0)) + coord_polar(theta='y'),
    ncol=3, labels=LETTERS[1:9])
```



## Time-scaled tree

A phylogenetic tree can be scaled by time (time-scaled tree) by specifying the parameter, *mrsd* (most recent sampling date).

```{r fig.width=8, fig.height=4, fig.align="center"}
tree2d <- read.beast(system.file("extdata", "twoD.tree", package="treeio"))
ggtree(tree2d, mrsd="2014-05-01") + theme_tree2()
```

## Two dimensional tree

`r Biocpkg('ggtree')` implemented two dimensional tree. It accepts parameter *yscale* to scale the y-axis based on the selected tree attribute. The attribute should be numerical variable. If it is *character*/*category* variable, user should provides a name vector of mapping the variable to numeric by passing it to parameter *yscale_mapping*.


```{r fig.width=9, fig.height=4, fig.align="center"}
ggtree(tree2d, mrsd="2014-05-01",
       yscale="NGS", yscale_mapping=c(N2=2, N3=3, N4=4, N5=5, N6=6, N7=7)) +
           theme_classic() + theme(axis.line.x=element_line(), axis.line.y=element_line()) +
               theme(panel.grid.major.x=element_line(color="grey20", linetype="dotted", size=.3),
                     panel.grid.major.y=element_blank()) +
                         scale_y_continuous(labels=paste0("N", 2:7))
```

In this example, the figure demonstrates the quantity of __*y*__ increase along the trunk. User can highlight the trunk with different line size or color using the functions described in [Tree Manipulation](treeManipulation.html) vignette.


# Displaying tree scale (evolution distance)

To show tree scale, user can use `geom_treescale()` layer.

```{r fig.width=4, fig.height=4, fig.align="center"}
ggtree(tree) + geom_treescale()
```

`geom_treescale()` supports the following parameters:

+ *x* and *y* for tree scale position
+ *width* for the length of the tree scale
+ *fontsize* for the size of the text
+ *linesize* for the size of the line
+ *offset* for relative position of the line and the text
+ *color* for color of the tree scale


```{r eval=F}
ggtree(tree) + geom_treescale(x=0, y=12, width=6, color='red')
ggtree(tree) + geom_treescale(fontsize=8, linesize=2, offset=-1)
```

```{r fig.width=8, fig.height=4, fig.align="center", echo=F}
plot_grid(
    ggtree(tree)+geom_treescale(x=0, y=12, width=6, color='red'),
    ggtree(tree)+geom_treescale(fontsize=8, linesize=2, offset=-1),
    ncol=2, labels=LETTERS[1:2])
```


We can also use `theme_tree2()` to display the tree scale by adding *x axis*.


```{r fig.width=3, fig.height=3, fig.align="center"}
ggtree(tree) + theme_tree2()
```


Tree scale is not restricted to evolution distance, `r Biocpkg('ggtree')` can re-scale the tree with other numerical variable. More details can be found in the [Tree Annotation](treeAnnotation.html) vignette.


# Displaying nodes/tips

Showing all the internal nodes and tips in the tree can be done by adding a layer of points using `geom_nodepoint`,  `geom_tippoint` or `geom_point`.


```{r fig.width=3, fig.height=3, fig.align="center"}
ggtree(tree) + geom_point(aes(shape=isTip, color=isTip), size=3)
```

```{r fig.width=3, fig.height=3, fig.align="center"}
p <- ggtree(tree) + geom_nodepoint(color="#b5e521", alpha=1/4, size=10)
p + geom_tippoint(color="#FDAC4F", shape=8, size=3)
```

## Displaying labels

Users can use `geom_text` or `geom_label` to display the node (if available) and tip labels simultaneously or `geom_tiplab` to only display tip labels:


```{r fig.width=3, fig.height=3, warning=FALSE, fig.align="center"}
p + geom_tiplab(size=3, color="purple")
```

`geom_tiplab` not only supports using *text* or *label* geom to display labels,
it also supports *image* geom to label tip with image files. A corresponding
geom, `geom_nodelab` is also provided for displaying node labels. For details of
label nodes with images, please refer to
the vignette,
[Annotating phylogenetic tree with images](https://guangchuangyu.github.io/software/ggtree/vignettes/ggtree-ggimage.html).



For *circular* and *unrooted* layout, `r Biocpkg('ggtree')` supports rotating node labels according to the angles of the branches.


```{r fig.width=6, fig.height=6, warning=FALSE, fig.align="center"}
ggtree(tree, layout="circular") + geom_tiplab(aes(angle=angle), color='blue')
```


To make it more readable for human eye, `r Biocpkg('ggtree')` provides a `geom_tiplab2` for `circular` layout (see post [1](https://groups.google.com/forum/?utm_medium=email&utm_source=footer#!topic/bioc-ggtree/o35PV3iHO-0) and [2](https://groups.google.com/forum/#!topic/bioc-ggtree/p42R5l8J-14)).


```{r fig.width=6, fig.height=6, warning=FALSE, fig.align="center"}
ggtree(tree, layout="circular") + geom_tiplab2(color='blue')
```

By default, the positions are based on the node positions, we can change them to based on the middle of the branch/edge.


```{r fig.width=4, fig.height=3, warning=FALSE, fig.align="center"}
p + geom_tiplab(aes(x=branch), size=3, color="purple", vjust=-0.3)
```


Based on the middle of branch is very useful when annotating transition from parent node to child node.


# Update tree view with a new tree

In previous example, we have a _`p`_ object that stored the tree viewing of 13 tips and internal nodes highlighted with specific colored big dots. If users want to apply this pattern (we can imaging a more complex one) to a new tree, you don't need to build the tree step by step. `ggtree` provides an operator, _`%<%`_, for applying the visualization pattern to a new tree.

For example, the pattern in the _`p`_ object will be applied to a new tree with 50 tips as shown below:
```{r fig.width=3, fig.height=3, fig.align="center"}
p %<% rtree(50)
```

# Theme

`theme_tree()` defined a totally blank canvas, while _`theme_tree2()`_ adds
phylogenetic distance (via x-axis). These two themes all accept a parameter of
_`bgcolor`_ that defined the background color. Users can pass
any [theme components](http://ggplot2.tidyverse.org/reference/theme.html) to the
`theme_tree()` function to modify them.


```{r eval=F}
ggtree(rtree(30), color="red") + theme_tree("steelblue")
ggtree(rtree(20), color="white") + theme_tree("black")
```

```{r fig.width=8, fig.height=3, fig.align="center", echo=F}
cowplot::plot_grid(
    ggtree(rtree(30), color="red") + theme_tree("steelblue"),
    ggtree(rtree(20), color="purple") + theme_tree("black"),
    ncol=2)
```


# Visualize a list of trees

`ggtree` supports `multiPhylo` object and a list of trees can be viewed simultaneously.

```{r fig.width=12, fig.height=4}
trees <- lapply(c(10, 20, 40), rtree)
class(trees) <- "multiPhylo"
ggtree(trees) + facet_wrap(~.id, scale="free") + geom_tiplab()
```

One hundred bootstrap trees can also be view simultaneously.
```{r fig.width=20, fig.height=20}
btrees <- read.tree(system.file("extdata/RAxML", "RAxML_bootstrap.H3", package="treeio"))
ggtree(btrees) + facet_wrap(~.id, ncol=10)
```

Another way to view the bootstrap trees is to merge them together to form a density tree. We can add a layer of the best tree on the top of the density tree.

```{r}
p <- ggtree(btrees, layout="rectangular", color="lightblue", alpha=.3)

best_tree <- read.tree(system.file("extdata/RAxML", "RAxML_bipartitionsBranchLabels.H3", package="treeio"))
df <- fortify(best_tree, branch.length='none')
p+geom_tree(data=df, color='firebrick')
```

# Rescale tree

Most of the phylogenetic trees are scaled by evolutionary distance (substitution/site). In `ggtree`, users can re-scale a phylogenetic tree by any numerical variable inferred by evolutionary analysis (e.g. *dN/dS*).


```{r fig.width=10, fig.height=5}
library("treeio")
beast_file <- system.file("examples/MCC_FluA_H3.tree", package="ggtree")
beast_tree <- read.beast(beast_file)
beast_tree
p1 <- ggtree(beast_tree, mrsd='2013-01-01') + theme_tree2() +
    ggtitle("Divergence time")
p2 <- ggtree(beast_tree, branch.length='rate') + theme_tree2() +
    ggtitle("Substitution rate")

library(cowplot)
plot_grid(p1, p2, ncol=2)
```

```{r fig.width=10, fig.height=5}
mlcfile <- system.file("extdata/PAML_Codeml", "mlc", package="treeio")
mlc_tree <- read.codeml_mlc(mlcfile)
p1 <- ggtree(mlc_tree) + theme_tree2() +
    ggtitle("nucleotide substitutions per codon")
p2 <- ggtree(mlc_tree, branch.length='dN_vs_dS') + theme_tree2() +
    ggtitle("dN/dS tree")
plot_grid(p1, p2, ncol=2)
```

In addition to specify `branch.length` in tree visualization, users can change branch length stored in tree object by using `rescale_tree` function.

```{r}
beast_tree2 <- rescale_tree(beast_tree, branch.length='rate')
ggtree(beast_tree2) + theme_tree2()
```

# Zoom on a portion of tree

`ggtree` provides _`gzoom`_ function that similar to _`zoom`_ function provided in `ape`. This function plots simultaneously a whole phylogenetic tree and a portion of it. It aims at exploring very large trees.

```{r fig.width=9, fig.height=5, fig.align="center"}
library("ape")
data(chiroptera)
library("ggtree")
gzoom(chiroptera, grep("Plecotus", chiroptera$tip.label))
```

Zoom in selected clade of a tree that was already annotated with `ggtree` is also supported.

```{r fig.width=9, fig.height=5, message=FALSE, warning=FALSE}
groupInfo <- split(chiroptera$tip.label, gsub("_\\w+", "", chiroptera$tip.label))
chiroptera <- groupOTU(chiroptera, groupInfo)
p <- ggtree(chiroptera, aes(color=group)) + geom_tiplab() + xlim(NA, 23)
gzoom(p, grep("Plecotus", chiroptera$tip.label), xmax_adjust=2)
```


# Color tree

In `ggtree`, coloring phylogenetic tree is easy, by using `aes(color=VAR)` to map the color of tree based on a specific variable (numeric and category are both supported).

```{r fig.width=5, fig.height=5}
ggtree(beast_tree, aes(color=rate)) +
    scale_color_continuous(low='darkgreen', high='red') +
    theme(legend.position="right")
```

User can use any feature (if available), including clade posterior and *dN/dS* _etc._, to scale the color of the tree.


# References