GenericMappingTools · seisman · May 29, 2026 · May 29, 2026
diff --git a/README.md b/README.md
@@ -12,7 +12,28 @@ conda env create -f environment.yml
 conda activate pygmt-benchmarking
 ```
 
+## Benchmarks
+
+The benchmarks were run on a MacBook Pro (Apple M5, 32GB RAM) running macOS
+Tahoe 26.1.
+
+### Benchmark 1: Diamonds Dataset
+
+The diamonds benchmark compares PyGMT and matplotlib when plotting the seaborn
+diamonds dataset. It reports plotting time and `savefig` time separately.
+
+Run the benchmark:
+
+```bash
+python benchmarks/bench_matplotlib_diamonds.py
+```
+
+| Step | matplotlib | PyGMT | PyGMT / matplotlib |
+| --- | --- | --- | --- |
+| Plotting | 0.021 | 0.028 | 0.75x |
+| Savefig | 0.13 | 1.135 | 0.11x |
+
 ## License
 
 This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for
-details.
+details.
diff --git a/benchmarks/bench_matplotlib_diamonds.py b/benchmarks/bench_matplotlib_diamonds.py
@@ -0,0 +1,174 @@
+"""
+Benchmark PyGMT and matplotlib when plotting the diamonds dataset.
+"""
+
+import statistics
+import time
+from pathlib import Path
+
+import pandas as pd
+import pygmt
+import matplotlib.pyplot as plt  # noqa: E402
+
+
+COLORS = ("#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd")
+BACKENDS = ("matplotlib", "pygmt")
+OUTPUT_DIR = Path("plots/diamonds")
+REPEATS = 10
+DIAMONDS_DATA_URL = (
+    "https://github.com/mwaskom/seaborn-data/raw/master/diamonds.csv"
+)
+CUT_ORDER = ("Fair", "Good", "Very Good", "Premium", "Ideal")
+
+# Matplotlib interprets scatter ``s`` as marker area in points squared, while PyGMT's
+# circle style uses marker diameter. Keep one diameter-like value here and convert
+# it for matplotlib inside ``plot_matplotlib``.
+MARKER_SIZE_POINTS = 2
+
+
+def load_diamonds_data() -> pd.DataFrame:
+    """Load the diamonds dataset from seaborn's example-data repository."""
+    return pd.read_csv(DIAMONDS_DATA_URL)
+
+
+def plot_matplotlib(data: pd.DataFrame):
+    """Create the diamonds scatter plot with matplotlib."""
+    fig, ax = plt.subplots(figsize=(6, 4), dpi=300)
+
+    for cut_id, cut_name in enumerate(CUT_ORDER):
+        cut_data = data[data["cut"] == cut_name]
+        ax.scatter(
+            cut_data["carat"],
+            cut_data["price"],
+            label=cut_name,
+            s=MARKER_SIZE_POINTS**2,
+            marker="o",
+            color=COLORS[cut_id],
+            alpha=0.5,
+            linewidths=0,
+        )
+    ax.set_xlabel("Carat")
+    ax.set_ylabel("Price (USD)")
+    ax.set_title("Diamond price by carat")
+    ax.legend(title="Cut", frameon=False, markerscale=4)
+    return fig
+
+
+def save_matplotlib(fig, output: Path) -> None:
+    """Save a matplotlib figure and release it."""
+    fig.savefig(output)
+    plt.close(fig)
+
+
+def plot_pygmt(data: pd.DataFrame) -> pygmt.Figure:
+    """Create the diamonds scatter plot with PyGMT.
+    """
+    fig = pygmt.Figure()
+    fig.basemap(
+        region=[0, 5.5, 0, 20000],
+        projection="X6i/4i",
+        frame=pygmt.params.Frame(
+            axes="WSne",
+            title="Diamond price by carat",
+            xaxis=pygmt.params.Axis(annot=True, tick=True, label="Carat"),
+            yaxis=pygmt.params.Axis(annot=True, tick=True, label="Price (USD)"),
+        )
+    )
+    for cut_id, cut_name in enumerate(CUT_ORDER):
+        cut_data = data[data["cut"] == cut_name]
+        fig.plot(
+            x=cut_data["carat"],
+            y=cut_data["price"],
+            style=f"c{MARKER_SIZE_POINTS}p",
+            fill=f"{COLORS[cut_id]}@50",
+            label=cut_name,
+        )
+    fig.legend(position=pygmt.params.Position("TR", offset=0.1))
+    return fig
+
+
+def save_pygmt(fig: pygmt.Figure, output: Path) -> None:
+    """Save a PyGMT figure."""
+    fig.savefig(output)
+
+
+def benchmark(
+    name: str,
+    plot_func,
+    save_func,
+    data: pd.DataFrame,
+    output_dir: Path,
+    repeats: int,
+) -> tuple[list[float], list[float]]:
+    """Time repeated plot creation and figure export runs.
+
+    The first call is an untimed warmup. It absorbs one-time backend setup such as font
+    discovery, and data downloading.
+    """
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Warm up each backend once before recording timings.
+    fig = plot_func(data)
+    save_func(fig, output_dir / f"{name}_warmup.png")
+
+    plot_timings = []
+    save_timings = []
+    for run_id in range(repeats):
+        output = output_dir / f"{name}_{run_id + 1}.png"
+
+        start = time.perf_counter()
+        fig = plot_func(data)
+        plot_timings.append(time.perf_counter() - start)
+
+        save_func(fig, output)
+        save_timings.append(time.perf_counter() - start)
+
+    return plot_timings, save_timings
+
+
+def format_summary(name: str, timings: list[float]) -> str:
+    """Format benchmark timing statistics."""
+    mean = statistics.fmean(timings)
+    median = statistics.median(timings)
+    minimum = min(timings)
+    maximum = max(timings)
+    return (
+        f"{name:10s} "
+        f"mean={mean:.4f}s "
+        f"median={median:.4f}s "
+        f"min={minimum:.4f}s "
+        f"max={maximum:.4f}s"
+    )
+
+
+def main() -> None:
+    """Run the diamonds plotting benchmark."""
+    data = load_diamonds_data()
+    plotters = {
+        "matplotlib": plot_matplotlib,
+        "pygmt": plot_pygmt,
+    }
+    savers = {
+        "matplotlib": save_matplotlib,
+        "pygmt": save_pygmt,
+    }
+
+    print(f"Running {REPEATS} timed run(s) per backend")
+    print(f"Writing PNG files to {OUTPUT_DIR}")
+
+    for backend in BACKENDS:
+        print(f"Benchmarking {backend}...", flush=True)
+        plot_timings, save_timings = benchmark(
+            name=backend,
+            plot_func=plotters[backend],
+            save_func=savers[backend],
+            data=data,
+            output_dir=OUTPUT_DIR,
+            repeats=REPEATS,
+        )
+        print(format_summary(f"{backend} plot", plot_timings))
+        print(format_summary(f"{backend} savefig", save_timings))
+
+
+if __name__ == "__main__":
+    main()