Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
181 changes: 140 additions & 41 deletions plotting/plot_latest_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,22 @@
import sys
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from ucc_bench.results import SuiteResultsDatabase, to_df_timing, to_df_simulation

from shared import calculate_abs_relative_error, get_compiler_colormap

BAR_WIDTH = 0.2
BAR_WIDTH = 0.35


def generate_plot(
def generate_compilation_subplots(
df: pd.DataFrame,
plot_configs: list[dict],
latest_date: str,
out_path: Path,
use_pdf: bool = False,
):
"""Generic plotting function to create bar charts for benchmark data."""
"""Generate subplots for compilation benchmarks with separate subplot per benchmark."""
# Configure matplotlib for LaTeX output if PDF export is requested
if use_pdf:
plt.rcParams.update(
Expand All @@ -27,62 +28,160 @@ def generate_plot(
}
)

circuit_names = sorted(df["benchmark_id"].unique())
x_positions = range(len(circuit_names))
circuit_name_to_index = {name: i for i, name in enumerate(circuit_names)}
color_map = get_compiler_colormap()

num_plots = len(plot_configs)
fig, axes = plt.subplots(1, num_plots, figsize=(7 * num_plots, 7), squeeze=False)
axes = axes.flatten()

benchmarks = sorted(df["benchmark_id"].unique())
compilers = df["compiler"].unique()
for i, compiler_name in enumerate(compilers):
grp = df[df["compiler"] == compiler_name]
grp_indices = grp["benchmark_id"].map(circuit_name_to_index)
bar_positions = [idx + i * BAR_WIDTH for idx in grp_indices]

for ax, config in zip(axes, plot_configs):
ax.bar(
bar_positions,
grp[config["y_col"]],
width=BAR_WIDTH,
label=compiler_name,
color=color_map.get(compiler_name),
)

for ax, config in zip(axes, plot_configs):
ax.set_title(f"{config['title']} (Date: {latest_date})")
ax.set_xlabel("Circuit Name")
ax.set_ylabel(config["ylabel"])
ax.set_xticks(x_positions)
ax.set_xticklabels(circuit_names, rotation=75, ha="right")
ax.set_yscale("log")
ax.legend(title="Compiler")

plt.tight_layout()
print(f"Saving plot to {out_path}")
fig.savefig(out_path, dpi=300, bbox_inches="tight")
plt.close(fig)
n_benchmarks = len(benchmarks)
ncols = 3
nrows = 2

# Create separate figures for each metric
for config in plot_configs:
fig, axes = plt.subplots(nrows, ncols, figsize=(5 * ncols, 4 * nrows), squeeze=False)
axes = axes.flatten()
color_map = get_compiler_colormap()

for i, ax in enumerate(axes):
if i < n_benchmarks:
benchmark = benchmarks[i]
sub = df[df["benchmark_id"] == benchmark]

# Extract values for each compiler
values = []
compiler_names = []
for compiler in compilers:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the motivation for this loop, versus just working with sub directly? That is, wouldn't sub[config["y_col"]] give you the values array?

Depending on the answer I may have additional feedback on the code below.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking I would have to iterate the compilers to get the values for each specific compiler, but in hindsight that was overcomplicating. Using sub directly works too.

row = sub[sub["compiler"] == compiler]
if not row.empty:
values.append(row[config["y_col"]].values[0])
compiler_names.append(compiler)

# Create bars
x_positions = np.arange(len(compiler_names))
bars = ax.bar(
x_positions,
values,
color=[color_map.get(compiler, "#4C72B0") for compiler in compiler_names],
width=0.5,
)

ax.set_xticks(x_positions)
ax.set_xticklabels(compiler_names, rotation=30, ha="right")
ax.set_title(f"Benchmark: {benchmark}")
ax.set_ylabel(config["ylabel"])
# Use log scale only if specified in config (default to True for backwards compatibility)
if config.get("use_log_scale", True):
ax.set_yscale("log")
else:
ax.set_visible(False)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there are more than 6 benchmark results, does this silently just not show the additional plots? I'd consider either supporting arbitrary number of benchmark results, or at least asserting/erroring its not the hard coded 6 results.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good point. An error could perhaps be raised when the number of benchmarks exceeds the 6 available subplots.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@AdiBak -- I missed this earlier, but I would prefer if we made the number of subplots explicitly dependent on the number of benchmarks, rather than hard-coding it at 2 x 3

Copy link
Author

@AdiBak AdiBak Dec 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure! I made it dynamically determine how many rows and cols should exist, so essentially it'd follow this:

  • 1-3 benchmarks: 1 row, 1-3 columns
  • 4-6 benchmarks: 2 rows, 3 columns (same as before)
  • 7-9 benchmarks: 3 rows, 3 columns
  • 10-12 benchmarks: 3 rows, 4 columns
  • More than 12: Calculate optimal grid

Let me know what you think. Thanks!


plt.suptitle(f"{config['title']} (Date: {latest_date})", fontsize=16)
plt.tight_layout(rect=[0, 0, 1, 0.96])

# Save with metric-specific filename
metric_name = config["y_col"].replace("_", "-")
metric_out_path = out_path.parent / f"{out_path.stem}_{metric_name}{out_path.suffix}"
print(f"Saving plot to {metric_out_path}")
fig.savefig(metric_out_path, dpi=300, bbox_inches="tight")
plt.close(fig)


def plot_compilation(
df: pd.DataFrame, latest_date: str, out_path: Path, use_pdf: bool = False
):
"""Generates and saves plots for compilation benchmark data."""
df_comp = df.copy()
df_comp["compiled_ratio"] = df_comp["compiled_multiq_gates"] / df_comp["raw_multiq_gates"]

plot_configs = [
{
"y_col": "compile_time",
"title": "Compiler Performance",
"ylabel": "Compile Time (s)",
"use_log_scale": True,
},
{
"y_col": "compiled_multiq_gates",
"title": "Gate Counts",
"ylabel": "Compiled Gate Count",
"use_log_scale": True,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be better to have ylabel be "Compiled Multi-Qubit Gate Count" to be clear it doesn't include single qubit gates.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also noting this is a gap in the existing code already!

},
{
"y_col": "compiled_ratio",
"title": "Compiled Gate Ratio",
"ylabel": "Compiled Gates / Raw Gates",
"use_log_scale": False,
},
]
generate_plot(df, plot_configs, latest_date, out_path, use_pdf)
generate_compilation_subplots(df_comp, plot_configs, latest_date, out_path, use_pdf)


def generate_simulation_subplots(
df: pd.DataFrame,
plot_configs: list[dict],
latest_date: str,
out_path: Path,
use_pdf: bool = False,
):
"""Generate subplots for simulation benchmarks with separate subplot per benchmark."""
# Configure matplotlib for LaTeX output if PDF export is requested
if use_pdf:
plt.rcParams.update(
{
"text.usetex": True, # for matching math & fonts (optional)
"font.family": "serif",
}
)

benchmarks = sorted(df["benchmark_id"].unique())
compilers = df["compiler"].unique()
n_benchmarks = len(benchmarks)
ncols = 3
nrows = 2

# Create separate figures for each metric (like compilation plots)
for config in plot_configs:
fig, axes = plt.subplots(nrows, ncols, figsize=(5 * ncols, 4 * nrows), squeeze=False)
axes = axes.flatten()
color_map = get_compiler_colormap()

for i, ax in enumerate(axes):
if i < n_benchmarks:
benchmark = benchmarks[i]
sub = df[df["benchmark_id"] == benchmark]

# Extract values for each compiler
values = []
compiler_names = []
for compiler in compilers:
row = sub[sub["compiler"] == compiler]
if not row.empty:
values.append(row[config["y_col"]].values[0])
compiler_names.append(compiler)

# Create bars
x_positions = np.arange(len(compiler_names))
bars = ax.bar(
x_positions,
values,
color=[color_map.get(compiler, "#4C72B0") for compiler in compiler_names],
width=0.5,
)

ax.set_xticks(x_positions)
ax.set_xticklabels(compiler_names, rotation=30, ha="right")
ax.set_title(f"Benchmark: {benchmark}")
ax.set_ylabel(config["ylabel"])
else:
ax.set_visible(False)

plt.suptitle(f"{config['title']} (Date: {latest_date})", fontsize=16)
plt.tight_layout(rect=[0, 0, 1, 0.96])

# Save with metric-specific filename
metric_name = config["y_col"].replace("_", "-")
metric_out_path = out_path.parent / f"{out_path.stem}_{metric_name}{out_path.suffix}"
print(f"Saving plot to {metric_out_path}")
fig.savefig(metric_out_path, dpi=300, bbox_inches="tight")
plt.close(fig)
Copy link

Copilot AI Oct 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The generate_simulation_subplots function contains nearly identical code to generate_compilation_subplots. Consider extracting the common subplot generation logic into a shared helper function to reduce code duplication.

Copilot uses AI. Check for mistakes.


def plot_simulation(
Expand All @@ -109,7 +208,7 @@ def plot_simulation(
"ylabel": "Absolute Relative Error",
},
]
generate_plot(df_sim, plot_configs, latest_date, out_path, use_pdf)
generate_simulation_subplots(df_sim, plot_configs, latest_date, out_path, use_pdf)


def main():
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is more a style nit, but the logscale for the prep_select and qv benchmarks has way more hashes labeled. Is there a nice way to make it less "busy"?

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading