Skip to content

Algorithms

Top-k eigenpairs

lanczos(operator, *, k=10, max_iter=None, tol=0.0001, reorthogonalize=None, which='LM', seed=None, backend=None)

Compute top-k eigenpairs by symmetric Lanczos + tridiagonal eigendecomposition.

Parameters:

Name Type Description Default
operator CurvatureOperator

symmetric curvature operator providing matvec.

required
k int

number of Ritz pairs to return.

10
max_iter int | None

number of Lanczos steps. Defaults to min(2 * k, n - 1).

None
tol float

convergence tolerance for Ritz residuals (|β_m s_m,i| < tol * |λ_i|).

0.0001
reorthogonalize bool | None

if True, full Gram-Schmidt against all prior basis vectors after each step. None (default) selects True for max_iter <= 50.

None
which Which

'LM' largest magnitude, 'LA' largest algebraic, 'SA' smallest algebraic.

'LM'
seed int | None

seed for the initial random vector.

None
backend LinAlgBackend[Tensor] | None

vector-arithmetic backend.

None
Source code in hessian_eigenthings/algorithms/lanczos.py
def lanczos(
    operator: CurvatureOperator,
    *,
    k: int = 10,
    max_iter: int | None = None,
    tol: float = 1e-4,
    reorthogonalize: bool | None = None,
    which: Which = "LM",
    seed: int | None = None,
    backend: LinAlgBackend[torch.Tensor] | None = None,
) -> EigenResult:
    """Compute top-k eigenpairs by symmetric Lanczos + tridiagonal eigendecomposition.

    Args:
        operator: symmetric curvature operator providing matvec.
        k: number of Ritz pairs to return.
        max_iter: number of Lanczos steps. Defaults to `min(2 * k, n - 1)`.
        tol: convergence tolerance for Ritz residuals (`|β_m s_m,i| < tol * |λ_i|`).
        reorthogonalize: if True, full Gram-Schmidt against all prior basis vectors
            after each step. None (default) selects True for `max_iter <= 50`.
        which: 'LM' largest magnitude, 'LA' largest algebraic, 'SA' smallest algebraic.
        seed: seed for the initial random vector.
        backend: vector-arithmetic backend.
    """
    n = operator.size
    if k < 1:
        raise ValueError(f"k must be >= 1, got {k}")
    if k > n:
        raise ValueError(f"k={k} exceeds operator size {n}")

    if max_iter is None:
        max_iter = min(2 * k, n - 1)
    if max_iter < k:
        raise ValueError(f"max_iter={max_iter} must be at least k={k}")
    if max_iter > n:
        raise ValueError(f"max_iter={max_iter} exceeds operator size {n}")

    if reorthogonalize is None:
        reorthogonalize = max_iter <= 50

    backend = backend or SingleDeviceBackend()

    gen = torch.Generator(device="cpu")
    if seed is not None:
        gen.manual_seed(seed)
    v0 = torch.randn(n, dtype=operator.dtype, generator=gen).to(operator.device)

    td = lanczos_tridiagonal(
        operator, v0, max_iter, reorthogonalize=reorthogonalize, backend=backend
    )

    tridiag = _build_tridiag_matrix(td)
    theta, s = torch.linalg.eigh(tridiag)

    if which == "LM":
        order = torch.argsort(theta.abs(), descending=True)
    elif which == "LA":
        order = torch.argsort(theta, descending=True)
    elif which == "SA":
        order = torch.argsort(theta, descending=False)
    else:  # pragma: no cover
        raise ValueError(f"unknown which={which!r}")

    sel = order[:k]
    eigenvalues = theta[sel]

    # Accumulate Ritz vectors directly into the final (k, n) layout via rank-1
    # outer-product updates. Avoids both the (n, m) basis matrix and the
    # transient (n, k) -> (k, n) transpose-copy, which together would peak at
    # >2x the final eigenvector size at LLM scale.
    s_sel = s[:, sel]
    n = td.basis[0].shape[0]
    eigenvectors = torch.zeros(sel.shape[0], n, dtype=operator.dtype, device=operator.device)
    for j, basis_vec in enumerate(td.basis):
        eigenvectors.addr_(s_sel[j], basis_vec)

    last_components = s[-1, sel]
    residuals = last_components.abs() * td.last_beta

    converged = residuals < tol * eigenvalues.abs().clamp(min=_EPS)

    return EigenResult(
        eigenvalues=eigenvalues,
        eigenvectors=eigenvectors,
        residuals=residuals,
        iterations=td.iterations,
        converged=converged,
    )

lanczos_tridiagonal(operator, v0, max_iter, *, reorthogonalize=True, backend=None)

Run max_iter Lanczos steps from v0 and return the tridiagonal + basis.

Public so SLQ and other quadrature consumers can reuse the same Lanczos kernel.

Source code in hessian_eigenthings/algorithms/lanczos.py
def lanczos_tridiagonal(
    operator: CurvatureOperator,
    v0: torch.Tensor,
    max_iter: int,
    *,
    reorthogonalize: bool = True,
    backend: LinAlgBackend[torch.Tensor] | None = None,
) -> LanczosTridiag:
    """Run `max_iter` Lanczos steps from `v0` and return the tridiagonal + basis.

    Public so SLQ and other quadrature consumers can reuse the same Lanczos kernel.
    """
    backend = backend or SingleDeviceBackend()

    nrm0 = backend.norm(v0)
    if nrm0.item() < _EPS:
        raise ValueError("v0 has near-zero norm")
    v = backend.scale(1.0 / nrm0, v0)

    basis: list[torch.Tensor] = [v]
    alphas_list: list[float] = []
    betas_list: list[float] = []
    prev_v = backend.zeros_like(v)
    beta = 0.0
    last_beta = 0.0
    iterations = 0

    for j in range(max_iter):
        iterations = j + 1
        av = operator.matvec(basis[j])
        if j > 0:
            av = backend.axpy(-beta, prev_v, av)

        alpha = backend.dot(basis[j], av).item()
        alphas_list.append(alpha)
        av = backend.axpy(-alpha, basis[j], av)

        if reorthogonalize:
            for vi in basis:
                av = backend.axpy(-backend.dot(vi, av).item(), vi, av)

        beta_next = backend.norm(av).item()
        last_beta = beta_next
        if beta_next < _EPS:
            break
        if j < max_iter - 1:
            betas_list.append(beta_next)
            prev_v = basis[j]
            basis.append(backend.scale(1.0 / beta_next, av))
            beta = beta_next

    alphas = torch.tensor(alphas_list, dtype=operator.dtype, device=operator.device)
    betas = torch.tensor(betas_list, dtype=operator.dtype, device=operator.device)
    return LanczosTridiag(
        alphas=alphas, betas=betas, basis=basis, last_beta=last_beta, iterations=iterations
    )

LanczosTridiag dataclass

Output of one Lanczos run: tridiagonal coefficients + the basis used to build them.

Source code in hessian_eigenthings/algorithms/lanczos.py
@dataclass(frozen=True)
class LanczosTridiag:
    """Output of one Lanczos run: tridiagonal coefficients + the basis used to build them."""

    alphas: torch.Tensor  # (m,) diagonal
    betas: torch.Tensor  # (m-1,) off-diagonal
    basis: list[torch.Tensor]  # length m, each (n,)
    last_beta: float  # ||r_m|| residual norm at termination
    iterations: int  # m, the actual number of Lanczos steps completed

deflated_power_iteration(operator, *, k=10, max_iter=100, tol=0.0001, momentum=0.0, seed=None, backend=None)

Top-k eigenpairs by repeatedly computing the dominant eigenpair and deflating it out.

After finding (λ_i, v_i), the operator is replaced with A - sum_i λ_i v_i v_i^T so the next iteration can find the next-largest eigenpair.

Source code in hessian_eigenthings/algorithms/power_iteration.py
def deflated_power_iteration(
    operator: CurvatureOperator,
    *,
    k: int = 10,
    max_iter: int = 100,
    tol: float = 1e-4,
    momentum: float = 0.0,
    seed: int | None = None,
    backend: LinAlgBackend[torch.Tensor] | None = None,
) -> EigenResult:
    """Top-k eigenpairs by repeatedly computing the dominant eigenpair and deflating it out.

    After finding `(λ_i, v_i)`, the operator is replaced with `A - sum_i λ_i v_i v_i^T`
    so the next iteration can find the next-largest eigenpair.
    """
    if k < 1:
        raise ValueError(f"k must be >= 1, got {k}")
    if k > operator.size:
        raise ValueError(f"k={k} exceeds operator size {operator.size}")

    backend = backend or SingleDeviceBackend()

    eigenvalues: list[float] = []
    eigenvectors: list[torch.Tensor] = []
    residuals: list[float] = []
    converged_flags: list[bool] = []
    total_iters = 0

    current_op: CurvatureOperator = operator

    for i in range(k):
        sub_seed = None if seed is None else seed + i
        lam, vec, res, iters, conv = power_iteration_one(
            current_op,
            max_iter=max_iter,
            tol=tol,
            momentum=momentum,
            seed=sub_seed,
            backend=backend,
        )
        eigenvalues.append(lam)
        eigenvectors.append(vec)
        residuals.append(res)
        converged_flags.append(conv)
        total_iters += iters

        if i < k - 1:
            current_op = _deflate(current_op, lam, vec, backend)

    vals = torch.tensor(eigenvalues, dtype=operator.dtype, device=operator.device)
    res_t = torch.tensor(residuals, dtype=operator.dtype, device=operator.device)
    conv_t = torch.tensor(converged_flags, dtype=torch.bool, device=operator.device)

    # Build the (k, n) eigenvector matrix by copying each vector into its
    # final row, instead of torch.stack which materializes the list as one
    # contiguous block. At LLM scale a single torch.stack of k=10 vectors of
    # n=162M params is 6.5 GB allocated in one shot; row-by-row copy is the
    # same total memory but no transient peak from the stacking allocator.
    n = eigenvectors[0].shape[0]
    vecs = torch.empty(k, n, dtype=operator.dtype, device=operator.device)
    for i, v in enumerate(eigenvectors):
        vecs[i].copy_(v)

    order = torch.argsort(vals.abs(), descending=True)
    return EigenResult(
        eigenvalues=vals[order],
        eigenvectors=vecs[order.cpu()] if vecs.device != order.device else vecs[order],
        residuals=res_t[order],
        iterations=total_iters,
        converged=conv_t[order],
    )

power_iteration_one(operator, *, max_iter=100, tol=0.0001, momentum=0.0, init=None, seed=None, backend=None)

Compute the dominant eigenpair via power iteration with optional Polyak momentum.

Returns (eigenvalue, eigenvector, residual_norm, iterations, converged).

Source code in hessian_eigenthings/algorithms/power_iteration.py
def power_iteration_one(
    operator: CurvatureOperator,
    *,
    max_iter: int = 100,
    tol: float = 1e-4,
    momentum: float = 0.0,
    init: torch.Tensor | None = None,
    seed: int | None = None,
    backend: LinAlgBackend[torch.Tensor] | None = None,
) -> tuple[float, torch.Tensor, float, int, bool]:
    """Compute the dominant eigenpair via power iteration with optional Polyak momentum.

    Returns (eigenvalue, eigenvector, residual_norm, iterations, converged).
    """
    backend = backend or SingleDeviceBackend()

    if init is None:
        gen = torch.Generator(device="cpu")
        if seed is not None:
            gen.manual_seed(seed)
        v = torch.randn(operator.size, dtype=operator.dtype, generator=gen).to(operator.device)
    else:
        v = init.clone()

    nrm0 = backend.norm(v)
    if nrm0.item() < _EPS:
        raise ValueError("init vector has near-zero norm")
    v = backend.scale(1.0 / nrm0, v)

    prev_v = backend.zeros_like(v)
    lambda_prev = 0.0
    lambda_est = 0.0
    residual = float("inf")
    converged = False
    last_iter = 0

    for it in range(max_iter):
        last_iter = it + 1
        av = operator.matvec(v)
        if momentum != 0.0:
            av = backend.axpy(-momentum, prev_v, av)

        lambda_est = backend.dot(v, av).item()
        residual = backend.norm(backend.axpy(-lambda_est, v, av)).item()

        nrm = backend.norm(av).item()
        if nrm < _EPS:
            return 0.0, v, 0.0, last_iter, True

        denom = max(abs(lambda_est), _EPS)
        rel_change = abs(lambda_est - lambda_prev) / denom
        rel_residual = residual / denom

        if it > 0 and rel_change < tol and rel_residual < tol:
            converged = True
            break

        prev_v = v
        v = backend.scale(1.0 / nrm, av)
        lambda_prev = lambda_est

    return lambda_est, v, residual, last_iter, converged

Trace estimation

trace(operator, *, num_matvecs=100, method='hutch++', distribution='rademacher', seed=None, backend=None)

Stochastic estimate of tr(A) using num_matvecs matrix-vector products.

Source code in hessian_eigenthings/algorithms/trace.py
def trace(
    operator: CurvatureOperator,
    *,
    num_matvecs: int = 100,
    method: Method = "hutch++",
    distribution: Distribution = "rademacher",
    seed: int | None = None,
    backend: LinAlgBackend[torch.Tensor] | None = None,
) -> TraceResult:
    """Stochastic estimate of `tr(A)` using `num_matvecs` matrix-vector products."""
    if num_matvecs < 1:
        raise ValueError(f"num_matvecs must be >= 1, got {num_matvecs}")
    backend = backend or SingleDeviceBackend()

    if method == "hutchinson":
        return hutchinson(
            operator, num_samples=num_matvecs, distribution=distribution, seed=seed, backend=backend
        )
    if method == "hutch++":
        return hutch_plus_plus(operator, num_matvecs=num_matvecs, seed=seed, backend=backend)
    raise ValueError(f"unknown method={method!r}")  # pragma: no cover

hutchinson(operator, *, num_samples=100, distribution='rademacher', seed=None, backend=None)

Hutchinson's (1/m) Σ vᵢᵀ A vᵢ estimator. Rademacher gives lower variance for trace.

Source code in hessian_eigenthings/algorithms/trace.py
def hutchinson(
    operator: CurvatureOperator,
    *,
    num_samples: int = 100,
    distribution: Distribution = "rademacher",
    seed: int | None = None,
    backend: LinAlgBackend[torch.Tensor] | None = None,
) -> TraceResult:
    """Hutchinson's `(1/m) Σ vᵢᵀ A vᵢ` estimator. Rademacher gives lower variance for trace."""
    backend = backend or SingleDeviceBackend()
    gen = _generator(seed)

    # Use a 1-D probe for shape; backend allocators copy shape/dtype/device from it.
    probe = torch.empty(operator.size, dtype=operator.dtype, device=operator.device)
    samples = torch.empty(num_samples, dtype=operator.dtype, device=operator.device)
    for i in range(num_samples):
        v = _draw(distribution, probe, gen, backend)
        av = operator.matvec(v)
        samples[i] = backend.dot(v, av)

    estimate = samples.mean().item()
    if num_samples > 1:
        stderr = (samples.std(unbiased=True) / (num_samples**0.5)).item()
    else:
        stderr = float("nan")
    return TraceResult(estimate=estimate, stderr=stderr, samples=samples)

hutch_plus_plus(operator, *, num_matvecs=99, seed=None, backend=None)

Hutch++ (Meyer et al. 2021): low-rank sketch + residual Hutchinson.

Splits the matvec budget into thirds: m/3 for AS (the sketch), m/3 for AQ (exact trace on the projected component), and m/3 for A G_perp (Hutchinson on the orthogonal complement). Achieves O(1/ε) total matvecs vs Hutchinson's O(1/ε²) for (1±ε)·tr(A) accuracy on PSD matrices.

Source code in hessian_eigenthings/algorithms/trace.py
def hutch_plus_plus(
    operator: CurvatureOperator,
    *,
    num_matvecs: int = 99,
    seed: int | None = None,
    backend: LinAlgBackend[torch.Tensor] | None = None,
) -> TraceResult:
    """Hutch++ (Meyer et al. 2021): low-rank sketch + residual Hutchinson.

    Splits the matvec budget into thirds: `m/3` for `AS` (the sketch), `m/3` for `AQ`
    (exact trace on the projected component), and `m/3` for `A G_perp` (Hutchinson on
    the orthogonal complement). Achieves O(1/ε) total matvecs vs Hutchinson's O(1/ε²)
    for `(1±ε)·tr(A)` accuracy on PSD matrices.
    """
    backend = backend or SingleDeviceBackend()
    n = operator.size
    m_per = max(1, num_matvecs // 3)

    gen = _generator(seed)
    probe = torch.empty(n, dtype=operator.dtype, device=operator.device)

    # Phase 1: build AS column-by-column. We never materialize S (the random
    # sketch) as a single (n, m_per) tensor; we generate one column, apply A,
    # store the result column. Cuts peak memory by ~50% at LLM scale.
    a_sketch = torch.empty(n, m_per, dtype=operator.dtype, device=operator.device)
    for i in range(m_per):
        s_i = backend.rademacher_like(probe, generator=gen)
        a_sketch[:, i] = operator.matvec(s_i)

    q = _thin_qr(a_sketch)
    del a_sketch  # free ~n*m_per*4 bytes before the next phase
    # QR returns min(n, m_per) columns; for tiny operators (n < m_per) this is
    # less than m_per, and the residual phase below should still use m_per.
    q_cols = q.shape[1]

    # Phase 2: trace_low = trace(Q^T A Q). Accumulate q_i^T (A q_i) per column;
    # never materialize AQ as a single (n, q_cols) tensor.
    trace_low = 0.0
    for i in range(q_cols):
        aq_i = operator.matvec(q[:, i])
        trace_low += backend.dot(q[:, i], aq_i).item()

    # Phase 3: residual Hutchinson on (I - QQ^T) A (I - QQ^T). Stream column-
    # by-column: generate g_i, project out Q's span, apply A, accumulate sample.
    per_sample = torch.empty(m_per, dtype=operator.dtype, device=operator.device)
    for i in range(m_per):
        g_i = backend.rademacher_like(probe, generator=gen)
        g_perp = g_i - q @ (q.t() @ g_i)
        a_g_perp = operator.matvec(g_perp)
        per_sample[i] = (g_perp * a_g_perp).sum()

    trace_residual = per_sample.mean().item()
    estimate = trace_low + trace_residual
    stderr = (per_sample.std(unbiased=True) / (m_per**0.5)).item() if m_per > 1 else float("nan")
    return TraceResult(estimate=estimate, stderr=stderr, samples=per_sample)

Spectral density

spectral_density(operator, *, num_runs=10, lanczos_steps=80, num_grid_points=10000, sigma=None, grid_padding=0.1, seed=None, backend=None)

Estimate the eigenvalue density of a symmetric operator via Stochastic Lanczos Quadrature.

Parameters:

Name Type Description Default
operator CurvatureOperator

symmetric curvature operator.

required
num_runs int

number of independent Lanczos runs (n_v in the paper). 10 is a reasonable default; larger reduces Monte-Carlo noise.

10
lanczos_steps int

Lanczos steps per run (m in the paper). 80-100 is typical; larger improves spectral resolution but costs more matvecs per run.

80
num_grid_points int

density evaluated on a regular grid this size.

10000
sigma float | None

Gaussian smoothing bandwidth. If None, defaults to (spectrum_range / lanczos_steps), a standard rule-of-thumb relating kernel width to quadrature resolution.

None
grid_padding float

fraction of spectrum range padded on each side of the grid.

0.1
seed int | None

base seed; per-run init vectors use seed + run_idx.

None
backend LinAlgBackend[Tensor] | None

vector-arithmetic backend.

None
Source code in hessian_eigenthings/algorithms/spectral_density.py
def spectral_density(
    operator: CurvatureOperator,
    *,
    num_runs: int = 10,
    lanczos_steps: int = 80,
    num_grid_points: int = 10000,
    sigma: float | None = None,
    grid_padding: float = 0.1,
    seed: int | None = None,
    backend: LinAlgBackend[torch.Tensor] | None = None,
) -> SpectralDensityResult:
    """Estimate the eigenvalue density of a symmetric operator via Stochastic Lanczos Quadrature.

    Args:
        operator: symmetric curvature operator.
        num_runs: number of independent Lanczos runs (n_v in the paper). 10 is a
            reasonable default; larger reduces Monte-Carlo noise.
        lanczos_steps: Lanczos steps per run (m in the paper). 80-100 is typical;
            larger improves spectral resolution but costs more matvecs per run.
        num_grid_points: density evaluated on a regular grid this size.
        sigma: Gaussian smoothing bandwidth. If None, defaults to
            `(spectrum_range / lanczos_steps)`, a standard rule-of-thumb relating
            kernel width to quadrature resolution.
        grid_padding: fraction of spectrum range padded on each side of the grid.
        seed: base seed; per-run init vectors use seed + run_idx.
        backend: vector-arithmetic backend.
    """
    if num_runs < 1:
        raise ValueError(f"num_runs must be >= 1, got {num_runs}")
    if lanczos_steps < 1:
        raise ValueError(f"lanczos_steps must be >= 1, got {lanczos_steps}")
    if lanczos_steps > operator.size:
        raise ValueError(f"lanczos_steps={lanczos_steps} exceeds operator size {operator.size}")

    backend = backend or SingleDeviceBackend()
    n = operator.size

    raw_nodes_runs: list[torch.Tensor] = []
    raw_weights_runs: list[torch.Tensor] = []

    for run in range(num_runs):
        gen = torch.Generator(device="cpu")
        gen.manual_seed((seed if seed is not None else 0) + run)
        probe = torch.empty(n, dtype=operator.dtype, device=operator.device)
        v0 = backend.rademacher_like(probe, generator=gen)

        td = lanczos_tridiagonal(operator, v0, lanczos_steps, reorthogonalize=True, backend=backend)
        m = td.alphas.shape[0]

        tridiag = torch.zeros(m, m, dtype=operator.dtype, device=operator.device)
        tridiag.diagonal().copy_(td.alphas)
        if m > 1 and td.betas.numel() > 0:
            off = td.betas[: m - 1]
            tridiag.diagonal(1).copy_(off)
            tridiag.diagonal(-1).copy_(off)

        theta, y = torch.linalg.eigh(tridiag)
        weights = y[0, :].pow(2)

        raw_nodes_runs.append(theta)
        raw_weights_runs.append(weights)

    max_m = max(t.numel() for t in raw_nodes_runs)
    raw_eigenvalues = _pad_to(raw_nodes_runs, max_m, fill=float("nan"))
    raw_weights = _pad_to(raw_weights_runs, max_m, fill=0.0)

    nodes_flat = torch.cat(raw_nodes_runs)
    lo, hi = float(nodes_flat.min()), float(nodes_flat.max())
    spread = max(hi - lo, 1e-9)
    if sigma is None:
        sigma = spread / lanczos_steps

    pad = grid_padding * spread
    grid = torch.linspace(
        lo - pad, hi + pad, num_grid_points, dtype=operator.dtype, device=operator.device
    )

    density = torch.zeros_like(grid)
    norm = 1.0 / (sigma * math.sqrt(2.0 * math.pi))
    inv_two_sigma_sq = 1.0 / (2.0 * sigma * sigma)
    for nodes, weights in zip(raw_nodes_runs, raw_weights_runs, strict=True):
        diffs = grid.unsqueeze(1) - nodes.unsqueeze(0)
        gauss = torch.exp(-(diffs.pow(2)) * inv_two_sigma_sq) * norm
        density += (gauss * weights.unsqueeze(0)).sum(dim=1)
    density /= num_runs

    return SpectralDensityResult(
        grid=grid,
        density=density,
        raw_eigenvalues=raw_eigenvalues,
        raw_weights=raw_weights,
        sigma=sigma,
    )

Result types

EigenResult dataclass

Top-k eigenpairs of a symmetric operator, sorted in the order requested by the algorithm.

Source code in hessian_eigenthings/algorithms/result.py
@dataclass(frozen=True)
class EigenResult:
    """Top-k eigenpairs of a symmetric operator, sorted in the order requested by the algorithm."""

    eigenvalues: torch.Tensor  # (k,)
    eigenvectors: torch.Tensor  # (k, n)
    residuals: torch.Tensor  # (k,) ||A v - λ v||
    iterations: int
    converged: torch.Tensor  # (k,) bool

TraceResult dataclass

Stochastic estimate of tr(A) together with its sample-standard-error.

Source code in hessian_eigenthings/algorithms/trace.py
@dataclass(frozen=True)
class TraceResult:
    """Stochastic estimate of `tr(A)` together with its sample-standard-error."""

    estimate: float
    stderr: float
    samples: torch.Tensor  # (m,) per-sample contributions used for the estimate

SpectralDensityResult dataclass

Smoothed eigenvalue density φ(t) on a regular grid, plus the raw quadrature data.

Source code in hessian_eigenthings/algorithms/spectral_density.py
@dataclass(frozen=True)
class SpectralDensityResult:
    """Smoothed eigenvalue density φ(t) on a regular grid, plus the raw quadrature data."""

    grid: torch.Tensor  # (G,) abscissae
    density: torch.Tensor  # (G,) probability density, ∫ density(t) dt ≈ 1
    raw_eigenvalues: torch.Tensor  # (n_runs, m) tridiagonal eigenvalues per run
    raw_weights: torch.Tensor  # (n_runs, m) squared first components per run
    sigma: float  # Gaussian smoothing bandwidth used