diff --git a/counting_topologies.md b/counting_topologies.md
index be5bf255..b6a0c134 100644
--- a/counting_topologies.md
+++ b/counting_topologies.md
@@ -49,9 +49,9 @@ def create_notebook_data():
 **Yan Wong**
 
 This tutorial is intended to be a gentle introduction to the combinatorial
-treatment of tree topologies in `tskit`. For a more formal introduction,
+treatment of tree topologies in {program}`tskit`. For a more formal introduction,
 see the {ref}`sec_combinatorics` section of the
-official `tskit` {ref}`documentation<tskit:sec_introduction>`.
+official {program}`tskit` {ref}`documentation<tskit:sec_introduction>`.
 
 The *topology* of a single tree is the term used to describe the branching pattern,
 regardless of the lengths of the branches. For example, both trees below have the
@@ -68,7 +68,7 @@ display(deep_tree.draw_svg(node_labels=node_labels, y_axis=True))
 ```
 
 :::{note}
-The treatment of topologies in `tskit` is restricted to trees with a single defined root,
+The treatment of topologies in {program}`tskit` is restricted to trees with a single defined root,
 without nodes with a single child (i.e. trees must consist of nodes that are either leaves,
 or internal nodes with two or more children).  For convenience in the examples
 below, trees are drawn with the tips flagged as samples, although whether a node is a sample or
@@ -84,34 +84,68 @@ different topologies:
 ```{code-cell}
 :tags: [hide-input]
 from string import ascii_lowercase
-from IPython.display import SVG
-
-def str_none(s, prefix=None):
-    if s is not None:
-        if prefix is None:
-            return str(s)
-        else:
-            return prefix + " = " + str(s)
-    return None
-
-def draw_svg_trees(trees, node_labels={}, x_lab_attr=None, width=100, height=150, space=10):
-    w = width + space
-    h = height + space
-    trees = list(trees)
-    s = f'<svg height="{h}" width="{w * len(trees)}" xmlns="http://www.w3.org/2000/svg">'
-    s += f'<style>.x-axis {{transform: translateY({space}px)}}</style>'
-    for i, tree in enumerate(trees):
-        s += tree.draw_svg(
-            size=(width, height),
-            canvas_size=(w, h),
-            root_svg_attributes={"x": i * w},
-            node_labels=node_labels,
-            x_label=str_none(getattr(tree.rank(), x_lab_attr or "", None), x_lab_attr)
+
+# This helper is copied from the side-by-side SVG example in the visualization tutorial.
+def draw_svg_side_by_side(
+    drawables,
+    *,
+    size=(200, 200),
+    sizes=None,
+    padding=40,
+    canvas_size=None,
+    per_svg_kwargs=None,
+    **kwargs,
+):
+    if len(drawables) == 0:
+        raise ValueError("Need at least one drawable")
+    if per_svg_kwargs is None:
+        per_svg_kwargs = [{} for _ in drawables]
+    if len(per_svg_kwargs) != len(drawables):
+        raise ValueError("per_svg_kwargs must have the same length as drawables")
+    if sizes is None:
+        sizes = [size for _ in drawables]
+    if len(sizes) != len(drawables):
+        raise ValueError("sizes must have the same length as drawables")
+    if canvas_size is None:
+        canvas_size = (
+            sum(s[0] for s in sizes) + (len(drawables) - 1) * padding,
+            max(s[1] for s in sizes),
         )
-    s += '</svg>'
-    return SVG(s)
 
-draw_svg_trees(tskit.all_tree_labellings(tree), node_labels={u: ascii_lowercase[u] for u in tree.samples()})
+    preamble = []
+    x_offset = sizes[0][0] + padding
+    for j, drawable in enumerate(drawables[1:], start=1):
+        svg_kwargs = dict(kwargs)
+        svg_kwargs.update(per_svg_kwargs[j])
+        svg_kwargs["size"] = sizes[j]
+        svg_kwargs["root_svg_attributes"] = {
+            **svg_kwargs.get("root_svg_attributes", {}),
+            "x": x_offset,
+        }
+        preamble.append(drawable.draw_svg(**svg_kwargs))
+        x_offset += sizes[j][0] + padding
+
+    first_kwargs = dict(kwargs)
+    first_kwargs.update(per_svg_kwargs[0])
+    first_kwargs["size"] = sizes[0]
+    first_kwargs["canvas_size"] = canvas_size
+    first_kwargs["preamble"] = first_kwargs.get("preamble", "") + "".join(preamble)
+    return drawables[0].draw_svg(**first_kwargs)
+
+
+trees = list(tskit.all_tree_labellings(tree))
+draw_svg_side_by_side(
+    trees,
+    size=(100, 150),
+    padding=10,
+    per_svg_kwargs=[
+        {
+            "node_labels": {u: ascii_lowercase[u] for u in tree.samples()},
+            "title": f"label {tree.rank().label}",
+        }
+        for tree in trees
+    ],
+)
 ```
 
 These are, in fact, the only possible three labellings for a three-tip tree of that shape.
@@ -124,8 +158,9 @@ possible labelling):
 tskit.Tree.generate_star(3).draw_svg(node_labels={})
 ```
 
-A 3-tip tree therefore has only four possible topologies.
-These can be generated with the {func}`~tskit.all_trees` function.
+A 3-tip tree therefore has two shapes, one of which has 3 labellings and one of which
+has only one labelling. That gives four possible topologies, which
+can be generated using the {func}`~tskit.all_trees` function.
 
 ```{code-cell}
 generated_trees = tskit.all_trees(3)
@@ -136,10 +171,17 @@ Here they are, plotted out with their shapes enumerated from zero:
 
 ```{code-cell}
 :tags: [hide-input]
-draw_svg_trees(
-    tskit.all_trees(3),
-    node_labels={u: ascii_lowercase[u] for u in tree.samples()},
-    x_lab_attr="shape"
+trees = list(tskit.all_trees(3))
+draw_svg_side_by_side(
+    trees,
+    size=(100, 150),
+    per_svg_kwargs=[
+        {
+            "node_labels": {u: ascii_lowercase[u] for u in tree.samples()},
+            "title": f"shape {tree.rank().shape} (lab {tree.rank().label})",
+        }
+        for tree in trees
+    ],
 )
 ```
 
@@ -160,7 +202,14 @@ Again, we can give each shape a number or *index*, starting from zero:
 
 ```{code-cell}
 :tags: [hide-input]
-draw_svg_trees(tskit.all_tree_shapes(4), x_lab_attr="shape")
+trees = list(tskit.all_tree_shapes(4))
+draw_svg_side_by_side(
+    trees,
+    size=(100, 150),
+    padding=10,
+    node_labels={},
+    per_svg_kwargs=[{"title": f"shape {tree.rank().shape}"} for tree in trees],
+)
 ```
 
 Each of these shapes will have a separate number of possible labellings, and trees with
@@ -203,8 +252,8 @@ new_tree = tskit.Tree.unrank(num_tips, (1270, 21580))
 new_tree.draw_svg(node_labels=ascii_node_labels)
 ```
 
-Note that this method generates a single tree in a new tree sequence
-whose a default sequence length is 1.0.
+Note that this method generates a single tree in a new tree sequence,
+using a default sequence length of 1.0.
 
 ## Methods for large trees
 
@@ -236,7 +285,7 @@ rather than comparing ranks directly.
 simulated_tree = simulated_ts.first(sample_lists=True)  # kc_distance requires sample lists
 if simulated_ts.first(sample_lists=True).kc_distance(simulated_tree) == 0:
     print("Trees are identical")
-    # To compare to the new_tree we need to fix 
+    # NB: to compare to the new_tree we need to ensure that their sample IDs are equivalent.
     # print("The simulated and topology-constructed trees have the same topology")
 ```
 
@@ -246,7 +295,7 @@ the number of *embedded topologies* in a large tree.
 
 ### Embedded topologies
 
-An embedded topology is a a topology involving a subset of the tips of a tree.
+An embedded topology is a topology involving a subset of the tips of a tree.
 If the tips are classified into (say) three groups, red, green, and blue,
 we can efficiently count all the embedded three-tip trees which have
 one tip from each group using the {meth}`Tree.count_topologies` method.
@@ -260,7 +309,7 @@ styles = [
     f".node.sample.p{p.id} > .sym " + "{" + f"fill: {colour}" + "}"
     for colour, p in zip(['red', 'green', 'blue'], big_tree.tree_sequence.populations())
 ]
-big_tree.draw_svg(style="".join(styles), node_labels={}, time_scale="rank", x_label="big_tree")
+big_tree.draw_svg(style="".join(styles), node_labels={}, time_scale="rank")
 ```
 
 In this tree, it is clear that the green and blue tips never cluster together.
@@ -280,21 +329,30 @@ colours = ['red', 'green', 'blue']
 styles = [f".n{u}>.sym {{fill: {c} }}" for u, c in enumerate(colours)]
 
 embedded_counts = topology_counter[0, 1, 2]
-for embedded_tree in tskit.all_trees(3):
-    rank = embedded_tree.rank()
-    number_of_instances = embedded_counts[rank]
-    label = f"{number_of_instances} instances embedded in big_tree"
-    display(embedded_tree.draw_svg(style="".join(styles), node_labels={}, x_label=label))
+embedded_trees = list(tskit.all_trees(3))
+draw_svg_side_by_side(
+    embedded_trees,
+    size=(160, 150),
+    padding=10,
+    per_svg_kwargs=[
+        {
+            "style": "".join(styles),
+            "node_labels": {},
+            "title": f"{embedded_counts[embedded_tree.rank()]} embedded instances",
+        }
+        for embedded_tree in embedded_trees
+    ],
+)
 ```
 
 ## Methods over tree sequences
 
 It can be useful to count embedded topologies over an entire tree sequence.
 For instance, we might want to know the number of embedded topologies
-that support Neanderthals as a sister group to europeans versus africans.
-`Tskit` provides the efficient {meth}`TreeSequence.count_topologies` method to
-do this [incrementally](sec_incremental), without having to re-count the topologies
-independently in each tree.
+that support Neanderthals as a sister group to Europeans versus Africans.
+{program}`Tskit` provides the efficient {meth}`TreeSequence.count_topologies` method
+to do this [incrementally](sec_incremental), without having to re-count the
+topologies independently in each tree.
 
 ```{code-cell}
 :tags: [hide-input]
@@ -340,10 +398,20 @@ for p in range(ntips):
     styles += f".n{p}>.sym {{fill: {colours[name]} }}"
 
 total = sum(topology_span.values())
-for rank, weight in topology_span.items():
-    label = f"{weight/total *100:.1f}% of genome"
-    embedded_tree = tskit.Tree.unrank(ntips, rank)
-    display(embedded_tree.draw_svg(size=(160, 150), style="".join(styles), node_labels=node_labels, x_label=label))
+embedded_trees = [tskit.Tree.unrank(ntips, rank) for rank in topology_span]
+draw_svg_side_by_side(
+    embedded_trees,
+    size=(160, 150),
+    padding=10,
+    per_svg_kwargs=[
+        {
+            "style": "".join(styles),
+            "node_labels": node_labels,
+            "title": f"{weight/total * 100:.1f}% of genome",
+        }
+        for weight in topology_span.values()
+    ],
+)
 ```
 
 Perhaps unsurprisingly, the most common topology is the one that groups the non-African
@@ -351,4 +419,4 @@ populations together (although there are many trees of the other two topologies,
 mostly reflecting genetic divergence prior to the emergence of humans out of Africa).
 
 For an example with real data, see {ref}`sec_popgen_topological`
-in the {ref}`sec_intro_popgen` tutorial.
\ No newline at end of file
+in the {ref}`sec_intro_popgen` tutorial.
diff --git a/phylogen.md b/phylogen.md
index 87cd61ca..6b2492ba 100644
--- a/phylogen.md
+++ b/phylogen.md
@@ -16,9 +16,10 @@ kernelspec:
 
 (sec_phylogen)=
 
-# `Tskit` for phylogenetics
+# {program}`Tskit` for phylogenetics
 
-`Tskit`, the tree sequence toolkit, can be used as an efficient library for very large evolutionary trees. `Tskit` makes it easy to deal with trees with millions of
+{program}`Tskit`, the tree sequence toolkit, can be used as an efficient library for
+very large evolutionary trees. {program}`Tskit` makes it easy to deal with trees with millions of
 tips, as in the example below:
 
 ```{code-cell}
@@ -77,7 +78,7 @@ import tsconvert  # used for reading tree sequences from different formats
 ts = tsconvert.from_newick("(A:6,((B:1,C:1):2,(D:2,E:2):1):3);", span=1000)
 ```
 
-The "succinct tree sequence" format used by `tskit` can also store mutations
+The "succinct tree sequence" format used by {program}`tskit` can also store mutations
 (and optionally a reference genome) along with the tree(s). This results in a
 single unified representation of large genomic datasets, storing trees,
 sequence data and metadata in a single efficient structure. Examples are given
@@ -92,11 +93,11 @@ sorting. An overview, and links to further details are given at the
 
 ## Hints for phylogeneticists
 
-Unlike other phylogenetic libraries, `tskit` is designed to efficiently store not just 
+Unlike other phylogenetic libraries, {program}`tskit` is designed to efficiently store not just 
 single trees, but sequences of correlated trees along a genome. This means that the
 library has some features not found in more standard phylogenetic libraries.
 Here we focus on the {ref}`sec_python_api`,
-introducing seven `tskit` concepts that may be useful to those with a background in
+introducing seven {program}`tskit` concepts that may be useful to those with a background in
 phylogenetics (each is linked to a separate section below):
 
 1. An evolutionary tree is always contained within a "tree sequence".
@@ -149,7 +150,7 @@ tree.tree_sequence  # When output in a notebook, prints a summary of the tree se
 (sec_phylogen_ids)=
 ### Integer node and edge IDs
 
-The plot above labels nodes by their name, but internally the `tskit` library relies
+The plot above labels nodes by their name, but internally the {program}`tskit` library relies
 heavily on integer IDs. Here's the same tree with node IDs plotted instead:
 
 ```{code-cell}
@@ -164,9 +165,9 @@ integer ID starting from 0 to `ts.num_nodes - 1` (IDs can be allocated in any or
 often the tips are labelled starting from 0 but this is not necessarily so, and
 is not the case in the example above).
 
-For efficiency reasons, tree traversal routines, as well as many other `tskit` methods,
-tend to return integer IDs. You can use this ID to get specific information about the
-node and its position in the tree, for example
+For efficiency reasons, tree traversal routines, as well as many other {program}`tskit`
+methods, tend to return integer IDs. You can use this ID to get specific information
+about the node and its position in the tree, for example
 
 ```{code-cell}
 node_id = 4
@@ -187,8 +188,8 @@ Other methods also exist to
 Rather than refer to "branches" of a tree, tskit tends to refer to
 {ref}`sec_terminology_edges` (the term "edge" emphasises that these can span
 {ref}`sec_phylogen_multiple_trees`, although for tree sequences containing a single
-tree, the terms are interchangeable). Like other entities in `tskit`, edges are referred
-to by an integer ID. For instance, here is the edge above the internal node 4
+tree, the terms are interchangeable). Like other entities in {program}`tskit`, edges
+are referred to by an integer ID. For instance, here is the edge above the internal node 4
 
 ```{code-cell}
 node_id = 4
@@ -233,7 +234,7 @@ Often we are only have detailed information about specific nodes that we have sa
 such as genomes A, B, C, D, and E in the example above. These are designated as
 *sample nodes*, and are plotted as square nodes. The concept of
 {ref}`sample nodes<sec_data_model_definitions_sample>` is integral
-to the `tskit` format. They can be identified by using the
+to the {program}`tskit` format. They can be identified by using the
 {meth}`Node.is_sample` and {meth}`Tree.is_sample` methods, or can be listed using
 {meth}`TreeSequence.samples` or {meth}`Tree.samples()` (internally, the `node.flags`
 field is used to {ref}`flag up<sec_node_table_definition>` which nodes are samples):
@@ -283,19 +284,19 @@ tree.tree_sequence.nodes_time
 (sec_phylogen_node_time)=
 ### Nodes must have times
 
-Perhaps the most noticable different between a `tskit` tree and the encoding of trees
-in other phylogenetic libraries is that `tskit` does not explicitly store branch lengths.
+Perhaps the most noticable different between a {program}`tskit` tree and the encoding of trees
+in other phylogenetic libraries is that {program}`tskit` does not explicitly store branch lengths.
 Instead, each node has a *time* associated with it. Branch lengths can therefore be
 found by calculating the difference between the time of a node and the time of its
 parent node.
 
-Since nodes *must* have a time, `tskit` trees aways have these (implicit) branch
+Since nodes *must* have a time, {program}`tskit` trees aways have these (implicit) branch
 lengths. To represent a tree ("cladogram") in which the branch lengths are not
 meaningful, the {attr}`TreeSequence.time_units` of a tree sequence can be
 specified as `"uncalibrated"` (see below)
 
-Another implication of storing node times rather than branch lengths is that `tskit`
-trees are always directional (i.e. they are "rooted"). The reason that `tskit` stores
+Another implication of storing node times rather than branch lengths is that {program}`tskit`
+trees are always directional (i.e. they are "rooted"). The reason that {program}`tskit` stores
 times of nodes (rather than e.g. genetic distances between them) is to ensure temporal 
 consistency. In particular it makes it impossible for a node to be an ancestor of a
 node in one tree, and a descendant of the same node in another tree in the tree sequence.
@@ -310,7 +311,7 @@ print("Time units are", tree.tree_sequence.time_units)
 tree.draw_svg(y_axis=True)
 ```
 
-Although branch lengths are not stored explicitly, for convenience `tskit` provides a
+Although branch lengths are not stored explicitly, for convenience {program}`tskit` provides a
 {meth}`Tree.branch_length` method:
 
 ```{code-cell}
@@ -342,7 +343,7 @@ print(
 
 It is worth noting that this distance is the basis for the "genetic divergence"
 between two samples in a tree. For this reason, an equivalent way to carry out the
-calculation is to use {meth}`TreeSequence.divergence`, part of the the standard `tskit`
+calculation is to use {meth}`TreeSequence.divergence`, part of the the standard {program}`tskit`
 {ref}`sec_stats` framework, setting `mode="branch"` and
 `windows="trees"`. This is a more flexible approach, as it allows the distance between
 multiple sets of samples in {ref}`sec_phylogen_multiple_trees` to be calculated
@@ -364,7 +365,7 @@ print(
 (sec_phylogen_multiroot)=
 ### Roots and multiroot trees
 
-In `tskit`, {ref}`sec_data_model_tree_roots` of trees are defined with respect to the
+In {program}`tskit`, {ref}`sec_data_model_tree_roots` of trees are defined with respect to the
 sample nodes. In particular, if we move back in time along the tree branches from a
 sample, the oldest node that we encounter is defined as a root. The ID of a root can be
 obtained using {attr}`Tree.root`:
@@ -374,7 +375,7 @@ print("The root node of the following tree has ID", tree.root)
 tree.draw_svg()
 ```
 
-But in `tskit`, we can also create a single "tree" consisting of multiple unlinked
+But in {program}`tskit`, we can also create a single "tree" consisting of multiple unlinked
 clades. In our example, we can create one of these phylogenetically unusual objects
 if we remove the edge above node 4, by
 {ref}`editing the underlying tables<sec_tables_editing>`:
@@ -389,8 +390,8 @@ new_tree = new_ts.first()
 new_tree.draw_svg()
 ```
 
-Although there are two separate topologies in this plot, in `tskit` terminology, it is
-considered a single tree, but with two roots:
+Although there are two separate topologies in this plot, in {program}`tskit` terminology,
+it is considered a single tree, but with two roots:
 
 ```{code-cell}
 print("The first tree has", len(new_tree.roots), "roots:", new_tree.roots)
@@ -408,7 +409,7 @@ empty_tree.draw_svg()
 ```
 
 The samples here are {ref}`sec_data_model_tree_isolated_nodes`. This may seem like a
-strange corner case, but in `tskit`, isolated sample nodes are used to represent
+strange corner case, but in {program}`tskit`, isolated sample nodes are used to represent
 {ref}`sec_data_model_missing_data`. This therefore represents a tree in which
 relationships between the samples are not known. This could apply, for instance,
 in regions of the genome where no genetic data exists, or where genetic ancestry
@@ -429,7 +430,7 @@ Demo some phylogenetic methods. e.g.
 (sec_phylogen_unified_structure)=
 ## Storing and accessing genetic data
 
-`Tskit` has been designed to capture both evolutionary tree topologies and the genetic
+{program}`Tskit` has been designed to capture both evolutionary tree topologies and the genetic
 sequences that evolve along the branches of these trees. This is achieved by defining
 {ref}`sec_terminology_mutations_and_sites` which are associated with specific positions
 along the genome.
@@ -468,13 +469,13 @@ for node_id, alignment in zip(
 (sec_phylogen_multiple_trees)=
 ## Multiple trees
 
-Where `tskit` really shines is when the ancestry of your dataset cannot be adequately
+Where {program}`tskit` really shines is when the ancestry of your dataset cannot be adequately
 represented by a single tree. This is a pervasive issue in genomes (even from different
 species) that have undergone recombination in the past. The resulting series of
 {ref}`local trees<sec_what_is_local_trees>` along a genome are highly correlated
 (see {ref}`sec_concepts`).
 
-Instead of storing each tree along a genome separately, `tskit` records the genomic
+Instead of storing each tree along a genome separately, {program}`tskit` records the genomic
 coordinates of each edge, which leads to enormous efficiencies in storage and
 analysis. As a basic demonstration, we can repeat the edge removal example
 {ref}`above <sec_phylogen_multiroot>`, but only remove the ancestral link above node 4
@@ -498,7 +499,7 @@ generate 2 trees in the tree sequence, which differ only in the presence of abse
 a single branch. We do not have to separately store the entire tree on the right: all
 the edges that are shared between trees are stored only once.
 
-The rest of the `tskit` tutorials will lead you through the concepts involved with
+The rest of the {program}`tskit` tutorials will lead you through the concepts involved with
 storing and analysing sequences of many correlated trees. For a simple introduction, you
 might want to start with {ref}`sec_what_is`.
 
diff --git a/popgen.md b/popgen.md
index 134c5f78..986efcd3 100644
--- a/popgen.md
+++ b/popgen.md
@@ -76,7 +76,7 @@ def create_notebook_data():
 
 (sec_intro_popgen)=
 
-# `Tskit` for population genetics
+# {program}`Tskit` for population genetics
 
 {ref}`Tskit<tskit:sec_introduction>`, the tree sequence toolkit, brings the power of
 evolutionary trees to the field of population genetics. The
@@ -85,9 +85,9 @@ is designed to store DNA sequences jointly with their ancestral history (the
 "genetic genealogy" or {ref}`ARG<sec_args>`). Storing population genetic data in this
 form enables highly efficient computation and analysis.
 
-The core `tskit` library provides methods for storing genetic data, a flexible
+The core {program}`tskit` library provides methods for storing genetic data, a flexible
 analysis framework, and APIs to build your own efficient population genetic algorithms.
-Because of its speed and scalability, `tskit` is well-suited to interactive analysis of
+Because of its speed and scalability, {program}`tskit` is well-suited to interactive analysis of
 large genomic datasets. 
 
 ## Population genetic simulation
@@ -181,7 +181,7 @@ plt.title("Allele frequency spectrum")
 plt.show()
 ```
 
-Similarly `tskit` allows fast and easy
+Similarly {program}`tskit` allows fast and easy
 {ref}`calculation of statistics<sec_tutorial_stats>` along the genome. Here is
 a plot of windowed $F_{st}$ between Africans and admixed Americans over this
 region of chromosome:
@@ -202,7 +202,7 @@ plt.xlabel("Genome position")
 plt.show()
 ```
 
-Extracting the genetic tree at a specific genomic location is easy using `tskit`, which
+Extracting the genetic tree at a specific genomic location is easy using {program}`tskit`, which
 also provides methods to {ref}`plot<sec_tskit_viz>` these trees. Here we
 grab the tree at position 10kb, and colour the different populations by
 grab the tree at position 10kb, and colour the samples according to their population,
@@ -355,7 +355,7 @@ plt.show()
 
 Other population genetic libraries such as
 [scikit-allel](https://scikit-allel.readthedocs.io/en/stable/) (which is
-{ref}`interoperable<sec_tskit_getting_started_exporting_data_allel>` with `tskit`)
+{ref}`interoperable<sec_tskit_getting_started_exporting_data_allel>` with {program}`tskit`)
 could also have been used to produce the plot above. In this case, the advantage of
 using tree sequences is simply that they allow these sorts of analysis to
 {ref}`scale<plot_incremental_calculation>` to datasets of millions of whole genomes.
@@ -438,30 +438,75 @@ embedded_topologies = topology_counter[range(simplified_ts.num_populations)]
 
 ```{code-cell}
 :"tags": ["hide-input"]
-# All the following code is simply to plot the embedded_topologies nicely
+# This helper is copied from the side-by-side SVG example in the visualization tutorial.
+def draw_svg_side_by_side(
+    drawables,
+    *,
+    size=(200, 200),
+    sizes=None,
+    padding=40,
+    canvas_size=None,
+    per_svg_kwargs=None,
+    **kwargs,
+):
+    if len(drawables) == 0:
+        raise ValueError("Need at least one drawable")
+    if per_svg_kwargs is None:
+        per_svg_kwargs = [{} for _ in drawables]
+    if len(per_svg_kwargs) != len(drawables):
+        raise ValueError("per_svg_kwargs must have the same length as drawables")
+    if sizes is None:
+        sizes = [size for _ in drawables]
+    if len(sizes) != len(drawables):
+        raise ValueError("sizes must have the same length as drawables")
+    if canvas_size is None:
+        canvas_size = (
+            sum(s[0] for s in sizes) + (len(drawables) - 1) * padding,
+            max(s[1] for s in sizes),
+        )
+
+    preamble = []
+    x_offset = sizes[0][0] + padding
+    for j, drawable in enumerate(drawables[1:], start=1):
+        svg_kwargs = dict(kwargs)
+        svg_kwargs.update(per_svg_kwargs[j])
+        svg_kwargs["size"] = sizes[j]
+        svg_kwargs["root_svg_attributes"] = {
+            **svg_kwargs.get("root_svg_attributes", {}),
+            "x": x_offset,
+        }
+        preamble.append(drawable.draw_svg(**svg_kwargs))
+        x_offset += sizes[j][0] + padding
+
+    first_kwargs = dict(kwargs)
+    first_kwargs.update(per_svg_kwargs[0])
+    first_kwargs["size"] = sizes[0]
+    first_kwargs["canvas_size"] = canvas_size
+    first_kwargs["preamble"] = first_kwargs.get("preamble", "") + "".join(preamble)
+    return drawables[0].draw_svg(**first_kwargs)
+
+
 all_trees = list(tskit.all_trees(simplified_ts.num_populations))
-last = len(all_trees) - 1
-svgs = ""
 style = "".join(styles) + ".sample text.lab {baseline-shift: super; font-size: 0.7em;}"
 style = style.replace(".leaf.p", ".leaf.n")  # Hack to map node IDs to population colours
-params = {
-    "size": (160, 150),
-    "node_labels": {pop.id: pop.metadata["name"] for pop in simplified_ts.populations()}
-}
-for i, t in enumerate(all_trees):
-    rank = t.rank()
-    count = embedded_topologies[rank]
-    params["title"] = f"{count} trees"
-    if i != last:
-        svgs += t.draw_svg(root_svg_attributes={'x': (last - i) * 150}, **params)
-    else:
-        # Plot the last svg and stack the previous ones to the right
-        display(t.draw_svg(preamble=svgs, canvas_size=(1000, 150), style=style, **params))
+draw_svg_side_by_side(
+    all_trees,
+    size=(160, 150),
+    padding=10,
+    style=style,
+    per_svg_kwargs=[
+        {
+            "node_labels": {pop.id: pop.metadata["name"] for pop in simplified_ts.populations()},
+            "title": f"{embedded_topologies[t.rank()]} trees",
+        }
+        for t in all_trees
+    ],
+)
 ```
 
 
 See {ref}`sec_counting_topologies` for an introduction to topological methods in
-`tskit`.
+{program}`tskit`.
 
 ## Further information
 
diff --git a/viz.md b/viz.md
index a3dafd8d..4d1d8226 100644
--- a/viz.md
+++ b/viz.md
@@ -398,13 +398,84 @@ ts_mutated.draw_svg(
 )
 ```
 
+#### Plotting side-by-side
+
 As any conceivable SVG commands can be added (including nesting one SVG inside another),
-this is extremely flexible, but can be fiddly. You can, for example, plot one tree
-next to another by adding one into the preamble of the other. This is demonstrated in
-the example below, which allows space for the extra embeded tree by using the
-`canvas_size` parameter to increase the size of the canvas. This adds more space to the
-right and bottom of the plot, without rescaling the image itself. 
+using `preamble` is extremely flexible, but can be fiddly. You can, for example, plot
+one tree next to a completely different one by adding one into the preamble of the other.
+This is demonstrated in the helper function below. It adds space into the first
+plot without rescaling the image itself by using the `canvas_size` parameter. Further
+plots are then added into the preamble, moved to the left using
+`root_svg_attributes={"x", x_offset}`.
+
+```{code-cell} ipython3
+def draw_svg_side_by_side(
+    drawables,
+    *,
+    size=(200, 200),
+    sizes=None,
+    padding=40,
+    canvas_size=None,
+    per_svg_kwargs=None,
+    **kwargs,
+):
+    """
+    Plot multiple Tree or TreeSequence objects side-by-side by embedding the SVG for
+    each later object in the preamble of the first.
+
+    :param list drawables: A list of Tree or TreeSequence objects.
+    :param tuple size: The size of each individual SVG plot, if `sizes` is not given.
+    :param list sizes: An optional list of sizes, one per plot.
+    :param int padding: The horizontal gap between adjacent plots.
+    :param tuple canvas_size: The overall canvas size. If None, infer it from `size`.
+    :param list per_svg_kwargs: An optional list of dicts of keyword arguments to pass
+        to each individual `draw_svg` call. These are merged on top of `**kwargs`.
+    :param kwargs: Common keyword arguments passed to each `draw_svg` call.
+    """
+    if len(drawables) == 0:
+        raise ValueError("Need at least one drawable")
+    if per_svg_kwargs is None:
+        per_svg_kwargs = [{} for _ in drawables]
+    if len(per_svg_kwargs) != len(drawables):
+        raise ValueError("per_svg_kwargs must have the same length as drawables")
+    if sizes is None:
+        sizes = [size for _ in drawables]
+    if len(sizes) != len(drawables):
+        raise ValueError("sizes must have the same length as drawables")
+    if canvas_size is None:
+        canvas_size = (
+            sum(s[0] for s in sizes) + (len(drawables) - 1) * padding,
+            max(s[1] for s in sizes),
+        )
 
+    preamble = []
+    x_offset = sizes[0][0] + padding
+    for j, drawable in enumerate(drawables[1:], start=1):
+        svg_kwargs = dict(kwargs)
+        svg_kwargs.update(per_svg_kwargs[j])
+        svg_kwargs["size"] = sizes[j]
+        svg_kwargs["root_svg_attributes"] = {
+            **svg_kwargs.get("root_svg_attributes", {}),
+            "x": x_offset,
+        }
+        preamble.append(drawable.draw_svg(**svg_kwargs))
+        x_offset += sizes[j][0] + padding
+
+    first_kwargs = dict(kwargs)
+    first_kwargs.update(per_svg_kwargs[0])
+    first_kwargs["size"] = sizes[0]
+    first_kwargs["canvas_size"] = canvas_size
+    first_kwargs["preamble"] = first_kwargs.get("preamble", "") + "".join(preamble)
+    return drawables[0].draw_svg(**first_kwargs)
+```
+
+```{code-cell} ipython3
+draw_svg_side_by_side(
+    [tree3, tree3],  # these could be different trees from different tree sequences
+    size=(300, 300),
+    per_svg_kwargs=[{"title": "3rd tree"}, {"title": "3rd tree, no sites", "omit_sites": True}],
+)
+```
 
 (sec_tskit_viz_reordering nodes)=
 
@@ -420,7 +491,8 @@ as required (e.g. using the `subset` method). If you are labelling nodes using
 metadata, this will all be fine, but if you are using the default ID labelling scheme,
 you'll should provide labels that map back to the original IDs, to avoid confusion.
 
-Here's an example, with a reordering function:
+Here's an example, using a generic reordering function to reverse the visual
+order of nodes 0...4 ()
 
 ```{code-cell} ipython3
 import numpy as np
@@ -440,19 +512,12 @@ def reorder_leaves(tree, leaf_order_ids):
 orig_tree = ts_mutated.first()
 new_tree, node_map = reorder_leaves(orig_tree, [4, 3, 2, 1, 0, 5, 7, 6])
 
-svg = new_tree.draw_svg(
-    title="Leaf nodes reordered",
-    size=(200, 200),
-    node_labels={u: v for u, v in enumerate(node_map)},  # map IDs back to the orig
-    root_svg_attributes={"x": 300},  # Shift the svg rightwards by 200 units
-)
-
-# Plot the new_tree next to the orig_tree using the preamble argument
-orig_tree.draw_svg(
-    title="Original tree",
-    size=(200, 200),
-    canvas_size=(500, 200),  # make space for the adjacent tree
-    preamble=svg,
+draw_svg_side_by_side(
+    [orig_tree, new_tree],
+    per_svg_kwargs=[
+        {"title": "Original tree"},
+        {"title": "Leaf nodes reordered", "node_labels": {u: v for u, v in enumerate(node_map)}},
+    ],
 )
 ```