Fitting Methods

Fitting Methods#

Here we will explore the various fitting methods in AstroPhot. You have already encountered some of the methods, but here we will take a more systematic approach and discuss their strengths/weaknesses. Each method will be applied to the same problem with the same initial conditions so you can see how they operate.

%load_ext autoreload
%autoreload 2

import torch
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
from scipy.stats import gaussian_kde as kde
from scipy.stats import norm

%matplotlib inline
import astrophot as ap

# Setup a fitting problem. You can ignore this cell to start, it just makes some test data to fit


def true_params():

    # just some random parameters to use for fitting. Feel free to play around with these to see what happens!
    sky_param = np.array([1.5])
    sersic_params = np.array(
        [
            [
                58.44035491,
                55.58516735,
                0.54945988,
                37.19794926 * np.pi / 180,
                2.14513004,
                22.05219055,
                2.45583024,
            ],
            [
                44.00353786,
                31.54430634,
                0.40203928,
                172.03862521 * np.pi / 180,
                2.88613347,
                12.095631,
                2.76711163,
            ],
        ]
    )

    return sersic_params, sky_param


def init_params():

    sky_param = np.array([1.4])
    sersic_params = np.array(
        [
            [57.0, 56.0, 0.6, 40.0 * np.pi / 180, 1.5, 25.0, 2.0],
            [45.0, 30.0, 0.5, 170.0 * np.pi / 180, 2.0, 10.0, 3.0],
        ]
    )

    return sersic_params, sky_param


def initialize_model(target, use_true_params=True):

    # Pick parameters to start the model with
    if use_true_params:
        sersic_params, sky_param = true_params()
    else:
        sersic_params, sky_param = init_params()

    # List of models, starting with the sky
    model_list = [
        ap.models.AstroPhot_Model(
            name="sky",
            model_type="flat sky model",
            target=target,
            parameters={"F": sky_param[0]},
        )
    ]
    # Add models to the list
    for i, params in enumerate(sersic_params):
        model_list.append(
            [
                ap.models.AstroPhot_Model(
                    name=f"sersic {i}",
                    model_type="sersic galaxy model",
                    target=target,
                    parameters={
                        "center": [params[0], params[1]],
                        "q": params[2],
                        "PA": params[3],
                        "n": params[4],
                        "Re": params[5],
                        "Ie": params[6],
                    },
                    # psf_mode = "full", # uncomment to try everything with PSF blurring (takes longer)
                )
            ]
        )

    MODEL = ap.models.Group_Model(
        name="group",
        models=model_list,
        target=target,
    )
    # Make sure every model is ready to go
    MODEL.initialize()

    return MODEL


def generate_target():

    N = 99
    pixelscale = 1.0
    rng = np.random.default_rng(42)

    # PSF has sigma of 2x pixelscale
    PSF = ap.utils.initialize.gaussian_psf(2, 21, pixelscale)
    PSF /= np.sum(PSF)

    target = ap.image.Target_Image(
        data=np.zeros((N, N)),
        pixelscale=pixelscale,
        psf=PSF,
    )

    MODEL = initialize_model(target, True)

    # Sample the model with the true values to make a mock image
    img = MODEL().data.detach().cpu().numpy()
    # Add poisson noise
    target.data = torch.Tensor(img + rng.normal(scale=np.sqrt(img) / 2))
    target.variance = torch.Tensor(img / 4)

    fig, ax = plt.subplots(figsize=(8, 8))
    ap.plots.target_image(fig, ax, target)
    ax.axis("off")
    plt.show()

    return target


def corner_plot(
    chain,
    labels=None,
    bins=None,
    true_values=None,
    plot_density=True,
    plot_contours=True,
    figsize=(10, 10),
):
    ndim = chain.shape[1]

    fig, axes = plt.subplots(ndim, ndim, figsize=figsize)
    plt.subplots_adjust(wspace=0.0, hspace=0.0)
    if bins is None:
        bins = int(np.sqrt(chain.shape[0]))

    for i in range(ndim):
        for j in range(ndim):
            ax = axes[i, j]

            i_range = (np.min(chain[:, i]), np.max(chain[:, i]))
            j_range = (np.min(chain[:, j]), np.max(chain[:, j]))
            if i == j:
                # Plot the histogram of parameter i
                # ax.hist(chain[:, i], bins=bins, histtype="step", range = i_range, density=True, color="k", lw=1)

                if plot_density:
                    # Plot the kernel density estimate
                    kde_x = np.linspace(i_range[0], i_range[1], 100)
                    kde_y = kde(chain[:, i])(kde_x)
                    ax.plot(kde_x, kde_y, color="green", lw=1)

                if true_values is not None:
                    ax.axvline(true_values[i], color="red", linestyle="-", lw=1)
                ax.set_xlim(i_range)

            elif i > j:
                # Plot the 2D histogram of parameters i and j
                # ax.hist2d(chain[:, j], chain[:, i], bins=bins, cmap="Greys")

                if plot_contours:
                    # Plot the kernel density estimate contours
                    kde_ij = kde([chain[:, j], chain[:, i]])
                    x, y = np.mgrid[j_range[0] : j_range[1] : 100j, i_range[0] : i_range[1] : 100j]
                    positions = np.vstack([x.ravel(), y.ravel()])
                    kde_pos = np.reshape(kde_ij(positions).T, x.shape)
                    ax.contour(x, y, kde_pos, colors="green", linewidths=1, levels=3)

                if true_values is not None:
                    ax.axvline(true_values[j], color="red", linestyle="-", lw=1)
                    ax.axhline(true_values[i], color="red", linestyle="-", lw=1)
                ax.set_xlim(j_range)
                ax.set_ylim(i_range)

            else:
                ax.axis("off")

            if j == 0 and labels is not None:
                ax.set_ylabel(labels[i])
            ax.yaxis.set_major_locator(plt.NullLocator())

            if i == ndim - 1 and labels is not None:
                ax.set_xlabel(labels[j])
            ax.xaxis.set_major_locator(plt.NullLocator())

    plt.show()


def corner_plot_covariance(
    cov_matrix, mean, labels=None, figsize=(10, 10), true_values=None, ellipse_colors="g"
):
    num_params = cov_matrix.shape[0]
    fig, axes = plt.subplots(num_params, num_params, figsize=figsize)
    plt.subplots_adjust(wspace=0.0, hspace=0.0)

    for i in range(num_params):
        for j in range(num_params):
            ax = axes[i, j]

            if i == j:
                x = np.linspace(
                    mean[i] - 3 * np.sqrt(cov_matrix[i, i]),
                    mean[i] + 3 * np.sqrt(cov_matrix[i, i]),
                    100,
                )
                y = norm.pdf(x, mean[i], np.sqrt(cov_matrix[i, i]))
                ax.plot(x, y, color="g")
                ax.set_xlim(
                    mean[i] - 3 * np.sqrt(cov_matrix[i, i]), mean[i] + 3 * np.sqrt(cov_matrix[i, i])
                )
                if true_values is not None:
                    ax.axvline(true_values[i], color="red", linestyle="-", lw=1)
            elif j < i:
                cov = cov_matrix[np.ix_([j, i], [j, i])]
                lambda_, v = np.linalg.eig(cov)
                lambda_ = np.sqrt(lambda_)
                angle = np.rad2deg(np.arctan2(v[1, 0], v[0, 0]))
                for k in [1, 2]:
                    ellipse = Ellipse(
                        xy=(mean[j], mean[i]),
                        width=lambda_[0] * k * 2,
                        height=lambda_[1] * k * 2,
                        angle=angle,
                        edgecolor=ellipse_colors,
                        facecolor="none",
                    )
                    ax.add_artist(ellipse)

                # Set axis limits
                margin = 3
                ax.set_xlim(
                    mean[j] - margin * np.sqrt(cov_matrix[j, j]),
                    mean[j] + margin * np.sqrt(cov_matrix[j, j]),
                )
                ax.set_ylim(
                    mean[i] - margin * np.sqrt(cov_matrix[i, i]),
                    mean[i] + margin * np.sqrt(cov_matrix[i, i]),
                )

                if true_values is not None:
                    ax.axvline(true_values[j], color="red", linestyle="-", lw=1)
                    ax.axhline(true_values[i], color="red", linestyle="-", lw=1)

            if j > i:
                ax.axis("off")

            if i < num_params - 1:
                ax.set_xticklabels([])
            else:
                if labels is not None:
                    ax.set_xlabel(labels[j])
            ax.yaxis.set_major_locator(plt.NullLocator())

            if j > 0:
                ax.set_yticklabels([])
            else:
                if labels is not None:
                    ax.set_ylabel(labels[i])
            ax.xaxis.set_major_locator(plt.NullLocator())

    plt.show()


target = generate_target()

../_images/b8ced6f0200b059f9fa0b8cc55f40201afa22427530457ce76c2f860053c03c8.png

Levenberg-Marquardt#

This fitter is identitied as ap.fit.LM and it employs a variant of the second order Newton’s method to converge very quickly to the local minimum. This is the generally accepted best algorithm for most use cases in \(\chi^2\) minimization. If you don’t know what to pick, start with this minimizer. The LM optimizer bridges the gap between first-order gradient descent and second order Newton’s method. When far from the minimum, Newton’s method is unstable and can give wildly wrong results, so LM takes gradient descent steps. However, near the minimum it switches to the Newton’s method which has “quadratic convergence” this means that it takes only a few iterations to converge to several decimal places. This can be represented as:

\((H + LI)h = g\)

Where H is the Hessian matrix of second derivatives, L is the damping parameter, I is the identity matrix, h is the step we will take in parameter space, and g is the gradient. We solve this linear system for h to get the next update step. The “L” scale parameter goes from L >> 1 which represents gradient descent to L << 1 which is Newton’s Method. When L >> 1 the hessian is effectively zero and we get \(h = g/L\) which is just gradient descent with \(1/L\) as the learning rate. In AstroPhot the damping parameter is treated somewhat differently, but the concept is the same.

LM can handle a lot of scenarios and converge to the minimum. Keep in mind, however, that it is seeking a local minimum, so it is best to start off the algorithm as close as possible to the best fit parameters. AstroPhot can automatically initialize, as discussed in other notebooks, but even that needs help sometimes (often in the form of a segmentation map).

The main drawback of LM is its memory consumption which goes as \(\mathcal{O}(PN)\) where P is the number of pixels and N is the number of parameters.

MODEL = initialize_model(target, False)
fig, axarr = plt.subplots(1, 4, figsize=(24, 5))
plt.subplots_adjust(wspace=0.1)
ap.plots.model_image(fig, axarr[0], MODEL)
axarr[0].set_title("Model before optimization")
ap.plots.residual_image(fig, axarr[1], MODEL, normalize_residuals=True)
axarr[1].set_title("Residuals before optimization")

res_lm = ap.fit.LM(MODEL, verbose=1).fit()
print(res_lm.message)

ap.plots.model_image(fig, axarr[2], MODEL)
axarr[2].set_title("Model after optimization")
ap.plots.residual_image(fig, axarr[3], MODEL, normalize_residuals=True)
axarr[3].set_title("Residuals after optimization")
plt.show()

Chi^2/DoF: 267.6885152817945, L: 1.0

Chi^2/DoF: 177.93926526081268, L: 1.2222222222222223

Chi^2/DoF: 54.75892021167093, L: 0.1358024691358025

Chi^2/DoF: 11.656984074187365, L: 0.015089163237311388

Chi^2/DoF: 4.283854234229527, L: 0.0016765736930345987

Chi^2/DoF: 1.9959915073515178, L: 0.00018628596589273318

Chi^2/DoF: 1.03116821072239, L: 2.069844065474813e-05

Chi^2/DoF: 1.0136690535436947, L: 2.299826739416459e-06

Chi^2/DoF: 1.0136677315707687, L: 3.154769189871686e-09

Final Chi^2/DoF: 1.0136677315680622, L: 3.154769189871686e-09. Converged: success

success

../_images/2e9001f0d3d958b5820065b70fa2fa4efe397b214208eafc233587b90b65984a.png

Now that LM has found the \(\chi^2\) minimum, we can do a really neat trick. Since LM needs the hessian matrix, we have access to the hessian matrix at the minimum. This is in fact equal to the negative Fisher information matrix. If we take the matrix inverse of this matrix then we get the covariance matrix for a multivariate gaussian approximation of the \(\chi^2\) surface near the minimum. With the covariance matrix we can create a corner plot just like we would with an MCMC. We will see later that the MCMC methods (at least the ones which converge) produce very similar results!

param_names = list(MODEL.parameters.vector_names())
i = 0
while i < len(param_names):
    param_names[i] = param_names[i].replace(" ", "")
    if "center" in param_names[i]:
        center_name = param_names.pop(i)
        param_names.insert(i, center_name.replace("center", "y"))
        param_names.insert(i, center_name.replace("center", "x"))
    i += 1
set, sky = true_params()
corner_plot_covariance(
    res_lm.covariance_matrix.detach().cpu().numpy(),
    MODEL.parameters.vector_values().detach().cpu().numpy(),
    labels=param_names,
    figsize=(20, 20),
    true_values=np.concatenate((sky, set.ravel())),
)

../_images/c054bcae8c39e9b314bbe393f82355ec172e8a0022fd62d2db21a66da2d8c083.png

Iterative Fit (models)#

An iterative fitter is identified as ap.fit.Iter, this method is generally employed for large models where it is not feasible to hold all the relevant data in memory at once. The iterative fitter will cycle through the models in a Group_Model object and fit them one at a time to the image, using the residuals from the previous cycle. This can be a very robust way to deal with some fits, especially if the overlap between models is not too strong. It is however more dependent on good initialization than other methods like the Levenberg-Marquardt. Also, it is possible for the Iter method to get stuck in a local minimum under certain circumstances.

Note that while the Iterative fitter needs a Group_Model object to iterate over, it is not necessarily true that the sub models are Component_Model objects, they could be Group_Model objects as well. In this way it is possible to cycle through and fit “clusters” of objects that are nearby, so long as it doesn’t consume too much memory.

By only fitting one model at a time it is possible to get caught in a local minimum, or to get out of a local minimum that a different fitter was stuck in. For this reason it can be good to mix-and-match the iterative optimizers so they can help each other get unstuck if a fit is very challenging.

MODEL = initialize_model(target, False)
fig, axarr = plt.subplots(1, 4, figsize=(24, 5))
plt.subplots_adjust(wspace=0.1)
ap.plots.model_image(fig, axarr[0], MODEL)
axarr[0].set_title("Model before optimization")
ap.plots.residual_image(fig, axarr[1], MODEL, normalize_residuals=True)
axarr[1].set_title("Residuals before optimization")

res_iter = ap.fit.Iter(MODEL, verbose=1).fit()

ap.plots.model_image(fig, axarr[2], MODEL)
axarr[2].set_title("Model after optimization")
ap.plots.residual_image(fig, axarr[3], MODEL, normalize_residuals=True)
axarr[3].set_title("Residuals after optimization")
plt.show()

--------iter-------

sky

sersic 0

sersic 1

Update Chi^2 with new parameters

Loss: 6.3052776493532

--------iter-------

sky

sersic 0

sersic 1

Update Chi^2 with new parameters

Loss: 3.2538450815525284

--------iter-------

sky

sersic 0

sersic 1

Update Chi^2 with new parameters

Loss: 2.2193089015786422

--------iter-------

sky

sersic 0

sersic 1

Update Chi^2 with new parameters

Loss: 1.6528602658695133

--------iter-------

sky

sersic 0

sersic 1

Update Chi^2 with new parameters

Loss: 1.3515187564821853

--------iter-------

sky

sersic 0

sersic 1

Update Chi^2 with new parameters

Loss: 1.19298321952236

--------iter-------

sky

sersic 0

sersic 1

Update Chi^2 with new parameters

Loss: 1.1107243598165408

--------iter-------

sky

sersic 0

sersic 1

Update Chi^2 with new parameters

Loss: 1.0658922275351852

--------iter-------

sky

sersic 0

sersic 1

Update Chi^2 with new parameters

Loss: 1.042000766415672

--------iter-------

sky

sersic 0

sersic 1

Update Chi^2 with new parameters

Loss: 1.0291459785157278

--------iter-------

sky

sersic 0

sersic 1

Update Chi^2 with new parameters

Loss: 1.0221846334191642

--------iter-------

sky

sersic 0

sersic 1

Update Chi^2 with new parameters

Loss: 1.0183611197655607

--------iter-------

sky

sersic 0

sersic 1

Update Chi^2 with new parameters

Loss: 1.016266042898724

--------iter-------

sky

sersic 0

sersic 1

Update Chi^2 with new parameters

Loss: 1.0151097695657618

--------iter-------

sky

sersic 0

sersic 1

Update Chi^2 with new parameters

Loss: 1.0144697388750754

--------iter-------

sky

sersic 0

sersic 1

Update Chi^2 with new parameters

Loss: 1.0141149135617438

--------iter-------

sky

sersic 0

sersic 1

Update Chi^2 with new parameters

Loss: 1.0139174381548832

--------iter-------

sky

sersic 0

sersic 1

Update Chi^2 with new parameters

Loss: 1.013807493303548

--------iter-------

sky

sersic 0

sersic 1

Update Chi^2 with new parameters

Loss: 1.0137461985346397

--------iter-------

sky

sersic 0

sersic 1

Update Chi^2 with new parameters

Loss: 1.0137119952129183

../_images/2e3798d48afdc2bfe3c2db5dc4dcfae06178ea9c50c0c07257fce11c182aa6c5.png

Iterative Fit (parameters)#

This is an iterative fitter identified as ap.fit.Iter_LM and is generally employed for large models where it is not feasible to hold all the relevant data in memory at once. This iterative fitter will cycle through chunks of parameters and fit them one at a time to the image. This can be a very robust way to deal with some fits, especially if the overlap between models is not too strong. This is very similar to the other iterative fitter, however it is necessary for certain fitting circumstances when the problem can’t be broken down into individual component models. This occurs, for example, when the models have many shared (constrained) parameters and there is no obvious way to break down sub-groups of models (an example of this is discussed in the AstroPhot paper).

Note that this is iterating over the parameters, not the models. This allows it to handle parameter covariances even for very large models (if they happen to land in the same chunk). However, for this to work it must evaluate the whole model at each iteration making it somewhat slower than the regular Iter fitter, though it can make up for it by fitting larger chunks at a time which makes the whole optimization faster.

By only fitting a subset of parameters at a time it is possible to get caught in a local minimum, or to get out of a local minimum that a different fitter was stuck in. For this reason it can be good to mix-and-match the iterative optimizers so they can help each other get unstuck. Since this iterative fitter chooses parameters randomly, it can sometimes get itself unstuck if it gets a lucky combination of parameters. Generally giving it more parameters to work with at a time is better.

MODEL = initialize_model(target, False)
fig, axarr = plt.subplots(1, 4, figsize=(24, 5))
plt.subplots_adjust(wspace=0.1)
ap.plots.model_image(fig, axarr[0], MODEL)
axarr[0].set_title("Model before optimization")
ap.plots.residual_image(fig, axarr[1], MODEL, normalize_residuals=True)
axarr[1].set_title("Residuals before optimization")

res_iterlm = ap.fit.Iter_LM(MODEL, chunks=11, verbose=1).fit()

ap.plots.model_image(fig, axarr[2], MODEL)
axarr[2].set_title("Model after optimization")
ap.plots.residual_image(fig, axarr[3], MODEL, normalize_residuals=True)
axarr[3].set_title("Residuals after optimization")
plt.show()

--------iter-------

chunk loss: 3.647221993407412

chunk loss: 2.002123380019146

Loss: 2.002123380019146

--------iter-------

chunk loss: 1.0192564008371952

chunk loss: 1.0158373601361503

Loss: 1.0158373601361503

--------iter-------

chunk loss: 1.0157043155833774

chunk loss: 1.0156447061755647

Loss: 1.0156447061755647

--------iter-------

chunk loss: 1.0139531368778143

chunk loss: 1.0138669972250474

Loss: 1.0138669972250474

--------iter-------

chunk loss: 1.0136741262924274

chunk loss: 1.0136731645201287

Loss: 1.0136731645201287

--------iter-------

chunk loss: 1.013673024223271

chunk loss: 1.0136729179198445

Loss: 1.0136729179198445

--------iter-------

chunk loss: 1.0136678502401588

chunk loss: 1.0136677439621327

Loss: 1.0136677439621327

../_images/1cec19c5cdcd902e11fb326bcb951c09c0836bf28962be5e7d2c5bfe6ef2a824.png

Gradient Descent#

A gradient descent fitter is identified as ap.fit.Grad and uses standard first order derivative methods as provided by PyTorch. These gradient descent methods include Adam, SGD, and LBFGS to name a few. The first order gradient is faster to evaluate and uses less memory, however it is considerably slower to converge than Levenberg-Marquardt. The gradient descent method with a small learning rate will reliably converge towards a local minimum, it will just do so slowly.

In the example below we let it run for 1000 steps and even still it has not converged. In general you should not use gradient descent to optimize a model. However, in a challenging fitting scenario the small step size of gradient descent can actually be an advantage as it will not take any unedpectedly large steps which could mix up some models, or hop over the \(\chi^2\) minimum into impossible parameter space. Just make sure to finish with LM after using Grad so that it fully converges to a reliable minimum.

MODEL = initialize_model(target, False)
fig, axarr = plt.subplots(1, 4, figsize=(24, 5))
plt.subplots_adjust(wspace=0.1)
ap.plots.model_image(fig, axarr[0], MODEL)
axarr[0].set_title("Model before optimization")
ap.plots.residual_image(fig, axarr[1], MODEL, normalize_residuals=True)
axarr[1].set_title("Residuals before optimization")

res_grad = ap.fit.Grad(MODEL, verbose=1, max_iter=1000, optim_kwargs={"lr": 5e-3}).fit()

ap.plots.model_image(fig, axarr[2], MODEL)
axarr[2].set_title("Model after optimization")
ap.plots.residual_image(fig, axarr[3], MODEL, normalize_residuals=True)
axarr[3].set_title("Residuals after optimization")
plt.show()

iter: 100, loss: 42.7005510352121

iter: 200, loss: 19.47954274646822

iter: 300, loss: 8.514593055425426

iter: 400, loss: 3.807539943070843

iter: 500, loss: 2.3893408521514417

iter: 600, loss: 1.9788784378971025

iter: 700, loss: 1.8400631131806646

iter: 800, loss: 1.7618624995937953

iter: 900, loss: 1.69507798625246

iter: 1000, loss: 1.6340798788727897

../_images/6c8d584af906163a6015a79cdc21643fb0b2b95f6684447fd2ff49770acc53df.png

No U-Turn Sampler (NUTS)#

Unlike the above methods, ap.fit.NUTS does not stricktly seek a minimum \(\chi^2\), instead it is an MCMC method which seeks to explore the likelihood space and provide a full posterior in the form of random samples. The NUTS method in AstroPhot is actually just a wrapper for the Pyro implementation (link here). Most of the functionality can be accessed this way, though for very advanced applications it may be necessary to manually interface with Pyro (this is not very challenging as AstroPhot is fully differentiable).

The first iteration of NUTS is always very slow since it compiles the forward method on the fly, after that each sample is drawn much faster. The warmup iterations take longer as the method is exploring the space and determining the ideal step size and mass matrix for fast integration with minimal numerical error (we only do 20 warmup steps here, if something goes wrong just try rerunning). Once the algorithm begins sampling it is able to move quickly (for an MCMC) through the parameter space. For many models, the NUTS sampler is able to collect nearly completely uncorrelated samples, meaning that even 100 is enough to get a good estimate of the posterior.

NUTS is far faster than other MCMC implementations such as a standard Metropolis Hastings MCMC. However, it is still a lot slower than the other optimizers (LM) since it is doing more than seeking a single high likelihood point, it is fully exploring the likelihood space. In simple cases, the automatic covariance matrix from LM is likely good enough, but if one really needs access to the full posterior of a complex model then NUTS is the best way to get it.

For an excellent introduction to the Hamiltonian Monte-Carlo and a high level explanation of NUTS see this review: Betancourt 2018

MODEL = initialize_model(target, False)

# Use LM to start the sampler at a high likelihood location, no burn-in needed!
# In general, NUTS is quite fast to do burn-in so this is often not needed
res1 = ap.fit.LM(MODEL).fit()

# Run the NUTS sampler
res_nuts = ap.fit.NUTS(
    MODEL,
    warmup=20,
    max_iter=100,
    inv_mass=res1.covariance_matrix,
).fit()

Warmup:   0%|          | 0/120 [00:00, ?it/s]

Warmup:   1%|          | 1/120 [00:01,  1.93s/it, step size=1.91e+00, acc. prob=0.001]

Warmup:   2%|▎         | 3/120 [00:02,  1.21it/s, step size=1.80e-01, acc. prob=0.323]

Warmup:   3%|▎         | 4/120 [00:04,  1.12s/it, step size=2.25e-01, acc. prob=0.486]

Warmup:   4%|▍         | 5/120 [00:07,  1.59s/it, step size=3.02e-01, acc. prob=0.579]

Warmup:   5%|▌         | 6/120 [00:07,  1.34s/it, step size=5.08e-01, acc. prob=0.649]

Warmup:   6%|▌         | 7/120 [00:08,  1.18s/it, step size=3.84e-01, acc. prob=0.660]

Warmup:   7%|▋         | 8/120 [00:09,  1.07s/it, step size=4.32e-01, acc. prob=0.683]

Warmup:   8%|▊         | 9/120 [00:10,  1.01it/s, step size=4.20e-01, acc. prob=0.695]

Warmup:   8%|▊         | 10/120 [00:11,  1.06it/s, step size=5.99e-01, acc. prob=0.717]

Warmup:   9%|▉         | 11/120 [00:11,  1.29it/s, step size=1.13e+00, acc. prob=0.743]

Warmup:  10%|█         | 12/120 [00:11,  1.70it/s, step size=9.15e-02, acc. prob=0.681]

Warmup:  11%|█         | 13/120 [00:18,  2.50s/it, step size=1.59e-01, acc. prob=0.703]

Warmup:  12%|█▏        | 14/120 [00:20,  2.25s/it, step size=2.77e-01, acc. prob=0.722]

Warmup:  12%|█▎        | 15/120 [00:21,  1.82s/it, step size=4.27e-01, acc. prob=0.736]

Warmup:  13%|█▎        | 16/120 [00:21,  1.39s/it, step size=5.12e-01, acc. prob=0.744]

Warmup:  14%|█▍        | 17/120 [00:22,  1.22s/it, step size=9.13e-01, acc. prob=0.758]

Warmup:  15%|█▌        | 18/120 [00:22,  1.11it/s, step size=8.23e-02, acc. prob=0.716]

Warmup:  16%|█▌        | 19/120 [00:24,  1.14s/it, step size=3.02e-01, acc. prob=0.730]

Warmup:  17%|█▋        | 20/120 [00:25,  1.05s/it, step size=3.02e-01, acc. prob=0.742]

Warmup:  18%|█▊        | 21/120 [00:25,  1.02it/s, step size=3.02e-01, acc. prob=0.939]

Sample:  18%|█▊        | 22/120 [00:26,  1.08it/s, step size=3.02e-01, acc. prob=0.963]

Sample:  19%|█▉        | 23/120 [00:27,  1.12it/s, step size=3.02e-01, acc. prob=0.943]

Sample:  20%|██        | 24/120 [00:30,  1.39s/it, step size=3.02e-01, acc. prob=0.930]

Sample:  21%|██        | 25/120 [00:30,  1.22s/it, step size=3.02e-01, acc. prob=0.944]

Sample:  22%|██▏       | 26/120 [00:31,  1.10s/it, step size=3.02e-01, acc. prob=0.950]

Sample:  22%|██▎       | 27/120 [00:32,  1.02s/it, step size=3.02e-01, acc. prob=0.955]

Sample:  23%|██▎       | 28/120 [00:33,  1.05it/s, step size=3.02e-01, acc. prob=0.954]

Sample:  24%|██▍       | 29/120 [00:34,  1.10it/s, step size=3.02e-01, acc. prob=0.959]

Sample:  25%|██▌       | 30/120 [00:34,  1.12it/s, step size=3.02e-01, acc. prob=0.963]

Sample:  26%|██▌       | 31/120 [00:37,  1.39s/it, step size=3.02e-01, acc. prob=0.963]

Sample:  27%|██▋       | 32/120 [00:38,  1.22s/it, step size=3.02e-01, acc. prob=0.949]

Sample:  28%|██▊       | 33/120 [00:39,  1.36s/it, step size=3.02e-01, acc. prob=0.952]

Sample:  28%|██▊       | 34/120 [00:40,  1.19s/it, step size=3.02e-01, acc. prob=0.953]

Sample:  29%|██▉       | 35/120 [00:41,  1.08s/it, step size=3.02e-01, acc. prob=0.955]

Sample:  30%|███       | 36/120 [00:42,  1.00s/it, step size=3.02e-01, acc. prob=0.956]

Sample:  31%|███       | 37/120 [00:44,  1.21s/it, step size=3.02e-01, acc. prob=0.957]

Sample:  32%|███▏      | 38/120 [00:45,  1.36s/it, step size=3.02e-01, acc. prob=0.952]

Sample:  32%|███▎      | 39/120 [00:46,  1.20s/it, step size=3.02e-01, acc. prob=0.954]

Sample:  33%|███▎      | 40/120 [00:50,  1.86s/it, step size=3.02e-01, acc. prob=0.953]

Sample:  34%|███▍      | 41/120 [00:50,  1.55s/it, step size=3.02e-01, acc. prob=0.954]

Sample:  35%|███▌      | 42/120 [00:51,  1.33s/it, step size=3.02e-01, acc. prob=0.954]

Sample:  36%|███▌      | 43/120 [00:54,  1.70s/it, step size=3.02e-01, acc. prob=0.950]

Sample:  37%|███▋      | 44/120 [00:55,  1.44s/it, step size=3.02e-01, acc. prob=0.952]

Sample:  38%|███▊      | 45/120 [00:55,  1.12s/it, step size=3.02e-01, acc. prob=0.951]

Sample:  38%|███▊      | 46/120 [00:57,  1.29s/it, step size=3.02e-01, acc. prob=0.952]

Sample:  39%|███▉      | 47/120 [00:57,  1.15s/it, step size=3.02e-01, acc. prob=0.953]

Sample:  40%|████      | 48/120 [00:58,  1.05s/it, step size=3.02e-01, acc. prob=0.951]

Sample:  41%|████      | 49/120 [00:59,  1.02it/s, step size=3.02e-01, acc. prob=0.950]

Sample:  42%|████▏     | 50/120 [00:59,  1.25it/s, step size=3.02e-01, acc. prob=0.951]

Sample:  42%|████▎     | 51/120 [01:02,  1.33s/it, step size=3.02e-01, acc. prob=0.951]

Sample:  43%|████▎     | 52/120 [01:03,  1.17s/it, step size=3.02e-01, acc. prob=0.944]

Sample:  44%|████▍     | 53/120 [01:04,  1.07s/it, step size=3.02e-01, acc. prob=0.944]

Sample:  45%|████▌     | 54/120 [01:05,  1.25s/it, step size=3.02e-01, acc. prob=0.944]

Sample:  46%|████▌     | 55/120 [01:06,  1.12s/it, step size=3.02e-01, acc. prob=0.944]

Sample:  47%|████▋     | 56/120 [01:07,  1.03s/it, step size=3.02e-01, acc. prob=0.945]

Sample:  48%|████▊     | 57/120 [01:08,  1.03it/s, step size=3.02e-01, acc. prob=0.945]

Sample:  48%|████▊     | 58/120 [01:09,  1.09it/s, step size=3.02e-01, acc. prob=0.946]

Sample:  49%|████▉     | 59/120 [01:09,  1.13it/s, step size=3.02e-01, acc. prob=0.947]

Sample:  50%|█████     | 60/120 [01:10,  1.15it/s, step size=3.02e-01, acc. prob=0.946]

Sample:  51%|█████     | 61/120 [01:12,  1.12s/it, step size=3.02e-01, acc. prob=0.945]

Sample:  52%|█████▏    | 62/120 [01:13,  1.03s/it, step size=3.02e-01, acc. prob=0.945]

Sample:  52%|█████▎    | 63/120 [01:14,  1.03it/s, step size=3.02e-01, acc. prob=0.944]

Sample:  53%|█████▎    | 64/120 [01:15,  1.18s/it, step size=3.02e-01, acc. prob=0.946]

Sample:  54%|█████▍    | 65/120 [01:17,  1.34s/it, step size=3.02e-01, acc. prob=0.945]

Sample:  55%|█████▌    | 66/120 [01:20,  1.96s/it, step size=3.02e-01, acc. prob=0.946]

Sample:  56%|█████▌    | 67/120 [01:22,  1.89s/it, step size=3.02e-01, acc. prob=0.946]

Sample:  57%|█████▋    | 68/120 [01:23,  1.57s/it, step size=3.02e-01, acc. prob=0.946]

Sample:  57%|█████▊    | 69/120 [01:23,  1.21s/it, step size=3.02e-01, acc. prob=0.947]

Sample:  58%|█████▊    | 70/120 [01:27,  1.87s/it, step size=3.02e-01, acc. prob=0.947]

Sample:  59%|█████▉    | 71/120 [01:29,  2.08s/it, step size=3.02e-01, acc. prob=0.946]

Sample:  60%|██████    | 72/120 [01:30,  1.70s/it, step size=3.02e-01, acc. prob=0.947]

Sample:  61%|██████    | 73/120 [01:31,  1.44s/it, step size=3.02e-01, acc. prob=0.948]

Sample:  62%|██████▏   | 74/120 [01:32,  1.25s/it, step size=3.02e-01, acc. prob=0.946]

Sample:  62%|██████▎   | 75/120 [01:33,  1.12s/it, step size=3.02e-01, acc. prob=0.947]

Sample:  63%|██████▎   | 76/120 [01:33,  1.03s/it, step size=3.02e-01, acc. prob=0.947]

Sample:  64%|██████▍   | 77/120 [01:35,  1.23s/it, step size=3.02e-01, acc. prob=0.946]

Sample:  65%|██████▌   | 78/120 [01:38,  1.62s/it, step size=3.02e-01, acc. prob=0.947]

Sample:  66%|██████▌   | 79/120 [01:38,  1.39s/it, step size=3.02e-01, acc. prob=0.947]

Sample:  67%|██████▋   | 80/120 [01:39,  1.21s/it, step size=3.02e-01, acc. prob=0.947]

Sample:  68%|██████▊   | 81/120 [01:40,  1.10s/it, step size=3.02e-01, acc. prob=0.946]

Sample:  68%|██████▊   | 82/120 [01:41,  1.01s/it, step size=3.02e-01, acc. prob=0.947]

Sample:  69%|██████▉   | 83/120 [01:42,  1.04it/s, step size=3.02e-01, acc. prob=0.947]

Sample:  70%|███████   | 84/120 [01:43,  1.18s/it, step size=3.02e-01, acc. prob=0.948]

Sample:  71%|███████   | 85/120 [01:45,  1.33s/it, step size=3.02e-01, acc. prob=0.948]

Sample:  72%|███████▏  | 86/120 [01:46,  1.31s/it, step size=3.02e-01, acc. prob=0.949]

Sample:  72%|███████▎  | 87/120 [01:47,  1.16s/it, step size=3.02e-01, acc. prob=0.948]

Sample:  73%|███████▎  | 88/120 [01:48,  1.05s/it, step size=3.02e-01, acc. prob=0.948]

Sample:  74%|███████▍  | 89/120 [01:51,  1.50s/it, step size=3.02e-01, acc. prob=0.948]

Sample:  75%|███████▌  | 90/120 [01:51,  1.30s/it, step size=3.02e-01, acc. prob=0.948]

Sample:  76%|███████▌  | 91/120 [01:53,  1.42s/it, step size=3.02e-01, acc. prob=0.947]

Sample:  77%|███████▋  | 92/120 [01:55,  1.50s/it, step size=3.02e-01, acc. prob=0.948]

Sample:  78%|███████▊  | 93/120 [01:56,  1.30s/it, step size=3.02e-01, acc. prob=0.948]

Sample:  78%|███████▊  | 94/120 [01:56,  1.15s/it, step size=3.02e-01, acc. prob=0.949]

Sample:  79%|███████▉  | 95/120 [01:58,  1.31s/it, step size=3.02e-01, acc. prob=0.949]

Sample:  80%|████████  | 96/120 [01:59,  1.16s/it, step size=3.02e-01, acc. prob=0.950]

Sample:  81%|████████  | 97/120 [02:00,  1.06s/it, step size=3.02e-01, acc. prob=0.949]

Sample:  82%|████████▏ | 98/120 [02:01,  1.25s/it, step size=3.02e-01, acc. prob=0.949]

Sample:  82%|████████▎ | 99/120 [02:04,  1.64s/it, step size=3.02e-01, acc. prob=0.950]

Sample:  83%|████████▎ | 100/120 [02:05,  1.39s/it, step size=3.02e-01, acc. prob=0.950]

Sample:  84%|████████▍ | 101/120 [02:05,  1.09s/it, step size=3.02e-01, acc. prob=0.949]

Sample:  85%|████████▌ | 102/120 [02:07,  1.27s/it, step size=3.02e-01, acc. prob=0.950]

Sample:  86%|████████▌ | 103/120 [02:09,  1.39s/it, step size=3.02e-01, acc. prob=0.947]

Sample:  87%|████████▋ | 104/120 [02:09,  1.22s/it, step size=3.02e-01, acc. prob=0.948]

Sample:  88%|████████▊ | 105/120 [02:10,  1.11s/it, step size=3.02e-01, acc. prob=0.948]

Sample:  88%|████████▊ | 106/120 [02:11,  1.02s/it, step size=3.02e-01, acc. prob=0.948]

Sample:  89%|████████▉ | 107/120 [02:12,  1.04it/s, step size=3.02e-01, acc. prob=0.949]

Sample:  90%|█████████ | 108/120 [02:12,  1.27it/s, step size=3.02e-01, acc. prob=0.948]

Sample:  91%|█████████ | 109/120 [02:13,  1.25it/s, step size=3.02e-01, acc. prob=0.948]

Sample:  92%|█████████▏| 110/120 [02:13,  1.49it/s, step size=3.02e-01, acc. prob=0.949]

Sample:  92%|█████████▎| 111/120 [02:14,  1.40it/s, step size=3.02e-01, acc. prob=0.949]

Sample:  93%|█████████▎| 112/120 [02:15,  1.33it/s, step size=3.02e-01, acc. prob=0.949]

Sample:  94%|█████████▍| 113/120 [02:16,  1.30it/s, step size=3.02e-01, acc. prob=0.949]

Sample:  95%|█████████▌| 114/120 [02:18,  1.04s/it, step size=3.02e-01, acc. prob=0.949]

Sample:  96%|█████████▌| 115/120 [02:18,  1.03it/s, step size=3.02e-01, acc. prob=0.949]

Sample:  97%|█████████▋| 116/120 [02:19,  1.08it/s, step size=3.02e-01, acc. prob=0.948]

Sample:  98%|█████████▊| 117/120 [02:23,  1.68s/it, step size=3.02e-01, acc. prob=0.949]

Sample:  98%|█████████▊| 118/120 [02:24,  1.68s/it, step size=3.02e-01, acc. prob=0.948]

Sample:  99%|█████████▉| 119/120 [02:27,  1.95s/it, step size=3.02e-01, acc. prob=0.949]

Sample: 100%|██████████| 120/120 [02:28,  1.61s/it, step size=3.02e-01, acc. prob=0.948]

Sample: 100%|██████████| 120/120 [02:28,  1.24s/it, step size=3.02e-01, acc. prob=0.948]

Note that there is no “after optimization” image above, because optimization was not done, it was full likelihood exploration. We can now create a corner plot with 2D projections of the 22 dimensional space that NUTS was exploring. The resulting corner plot is about what you would expect to get with 100 samples drawn from the multivariate gaussian found by LM above. If you run it again with more samples then the results will get even smoother.

# corner plot of the posterior
# observe that it is very similar to the corner plot from the LM optimization since this case can be roughly
# approximated as a multivariate gaussian centered on the maximum likelihood point
param_names = list(MODEL.parameters.vector_names())
i = 0
while i < len(param_names):
    param_names[i] = param_names[i].replace(" ", "")
    if "center" in param_names[i]:
        center_name = param_names.pop(i)
        param_names.insert(i, center_name.replace("center", "y"))
        param_names.insert(i, center_name.replace("center", "x"))
    i += 1

set, sky = true_params()
corner_plot(
    res_nuts.chain.detach().cpu().numpy(),
    labels=param_names,
    figsize=(20, 20),
    true_values=np.concatenate((sky, set.ravel())),
)

../_images/3083564a29f5fb0c1f1ed1393e3180c5d19d4380042a21b60ad3780136fa7196.png

Hamiltonian Monte-Carlo (HMC)#

The ap.fit.HMC is a simpler variant of the NUTS sampler. HMC takes a fixed number of steps at a fixed step size following Hamiltonian dynamics. This is in contrast to NUTS which attempts to optimally choose these parameters. HMC may be suitable in some cases where NUTS is unable to find ideal parameters. Also in some cases where you already know the pretty good step parameters HMC may run faster. If you don’t want to fiddle around with parameters then stick with NUTS, HMC results will still have autocorrelation which will depend on the problem and choice of step parameters.

MODEL = initialize_model(target, False)

# Use LM to start the sampler at a high likelihood location, no burn-in needed!
res1 = ap.fit.LM(MODEL).fit()

# Run the HMC sampler
res_hmc = ap.fit.HMC(
    MODEL,
    warmup=1,
    max_iter=150,
    epsilon=1e-1,
    leapfrog_steps=10,
    inv_mass=res1.covariance_matrix,
).fit()

Warmup:   0%|          | 0/151 [00:00, ?it/s]

Warmup:   1%|          | 1/151 [00:00,  1.56it/s, step size=1.00e-01, acc. prob=0.983]

Warmup:   1%|▏         | 2/151 [00:01,  1.73it/s, step size=1.00e-01, acc. prob=0.986]

Sample:   2%|▏         | 3/151 [00:01,  1.78it/s, step size=1.00e-01, acc. prob=0.990]

Sample:   3%|▎         | 4/151 [00:02,  1.82it/s, step size=1.00e-01, acc. prob=0.993]

Sample:   3%|▎         | 5/151 [00:02,  1.82it/s, step size=1.00e-01, acc. prob=0.995]

Sample:   4%|▍         | 6/151 [00:03,  1.83it/s, step size=1.00e-01, acc. prob=0.977]

Sample:   5%|▍         | 7/151 [00:03,  1.84it/s, step size=1.00e-01, acc. prob=0.981]

Sample:   5%|▌         | 8/151 [00:04,  1.84it/s, step size=1.00e-01, acc. prob=0.984]

Sample:   6%|▌         | 9/151 [00:04,  1.84it/s, step size=1.00e-01, acc. prob=0.984]

Sample:   7%|▋         | 10/151 [00:05,  1.84it/s, step size=1.00e-01, acc. prob=0.986]

Sample:   7%|▋         | 11/151 [00:06,  1.85it/s, step size=1.00e-01, acc. prob=0.987]

Sample:   8%|▊         | 12/151 [00:06,  1.86it/s, step size=1.00e-01, acc. prob=0.981]

Sample:   9%|▊         | 13/151 [00:07,  1.86it/s, step size=1.00e-01, acc. prob=0.983]

Sample:   9%|▉         | 14/151 [00:07,  1.86it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  10%|▉         | 15/151 [00:08,  1.86it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  11%|█         | 16/151 [00:08,  1.85it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  11%|█▏        | 17/151 [00:09,  1.85it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  12%|█▏        | 18/151 [00:09,  1.85it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  13%|█▎        | 19/151 [00:10,  1.86it/s, step size=1.00e-01, acc. prob=0.982]

Sample:  13%|█▎        | 20/151 [00:10,  1.86it/s, step size=1.00e-01, acc. prob=0.983]

Sample:  14%|█▍        | 21/151 [00:11,  1.87it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  15%|█▍        | 22/151 [00:11,  1.86it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  15%|█▌        | 23/151 [00:12,  1.86it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  16%|█▌        | 24/151 [00:13,  1.85it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  17%|█▋        | 25/151 [00:13,  1.85it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  17%|█▋        | 26/151 [00:14,  1.85it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  18%|█▊        | 27/151 [00:14,  1.86it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  19%|█▊        | 28/151 [00:15,  1.86it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  19%|█▉        | 29/151 [00:15,  1.85it/s, step size=1.00e-01, acc. prob=0.983]

Sample:  20%|█▉        | 30/151 [00:16,  1.86it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  21%|██        | 31/151 [00:16,  1.87it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  21%|██        | 32/151 [00:17,  1.87it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  22%|██▏       | 33/151 [00:17,  1.86it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  23%|██▎       | 34/151 [00:18,  1.87it/s, step size=1.00e-01, acc. prob=0.983]

Sample:  23%|██▎       | 35/151 [00:18,  1.86it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  24%|██▍       | 36/151 [00:19,  1.87it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  25%|██▍       | 37/151 [00:20,  1.87it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  25%|██▌       | 38/151 [00:20,  1.85it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  26%|██▌       | 39/151 [00:21,  1.85it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  26%|██▋       | 40/151 [00:21,  1.86it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  27%|██▋       | 41/151 [00:22,  1.86it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  28%|██▊       | 42/151 [00:22,  1.86it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  28%|██▊       | 43/151 [00:23,  1.87it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  29%|██▉       | 44/151 [00:23,  1.86it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  30%|██▉       | 45/151 [00:24,  1.86it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  30%|███       | 46/151 [00:24,  1.86it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  31%|███       | 47/151 [00:25,  1.87it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  32%|███▏      | 48/151 [00:25,  1.86it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  32%|███▏      | 49/151 [00:26,  1.86it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  33%|███▎      | 50/151 [00:26,  1.87it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  34%|███▍      | 51/151 [00:27,  1.87it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  34%|███▍      | 52/151 [00:28,  1.86it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  35%|███▌      | 53/151 [00:28,  1.86it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  36%|███▌      | 54/151 [00:29,  1.86it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  36%|███▋      | 55/151 [00:29,  1.86it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  37%|███▋      | 56/151 [00:30,  1.86it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  38%|███▊      | 57/151 [00:30,  1.85it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  38%|███▊      | 58/151 [00:31,  1.85it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  39%|███▉      | 59/151 [00:31,  1.85it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  40%|███▉      | 60/151 [00:32,  1.85it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  40%|████      | 61/151 [00:32,  1.85it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  41%|████      | 62/151 [00:33,  1.85it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  42%|████▏     | 63/151 [00:34,  1.85it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  42%|████▏     | 64/151 [00:34,  1.85it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  43%|████▎     | 65/151 [00:35,  1.84it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  44%|████▎     | 66/151 [00:35,  1.84it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  44%|████▍     | 67/151 [00:36,  1.85it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  45%|████▌     | 68/151 [00:36,  1.85it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  46%|████▌     | 69/151 [00:37,  1.86it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  46%|████▋     | 70/151 [00:37,  1.86it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  47%|████▋     | 71/151 [00:38,  1.86it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  48%|████▊     | 72/151 [00:38,  1.86it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  48%|████▊     | 73/151 [00:39,  1.86it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  49%|████▉     | 74/151 [00:39,  1.86it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  50%|████▉     | 75/151 [00:40,  1.86it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  50%|█████     | 76/151 [00:41,  1.86it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  51%|█████     | 77/151 [00:41,  1.87it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  52%|█████▏    | 78/151 [00:42,  1.86it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  52%|█████▏    | 79/151 [00:42,  1.86it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  53%|█████▎    | 80/151 [00:43,  1.86it/s, step size=1.00e-01, acc. prob=0.987]

Sample:  54%|█████▎    | 81/151 [00:43,  1.84it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  54%|█████▍    | 82/151 [00:44,  1.86it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  55%|█████▍    | 83/151 [00:44,  1.86it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  56%|█████▌    | 84/151 [00:45,  1.85it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  56%|█████▋    | 85/151 [00:45,  1.84it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  57%|█████▋    | 86/151 [00:46,  1.85it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  58%|█████▊    | 87/151 [00:46,  1.86it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  58%|█████▊    | 88/151 [00:47,  1.86it/s, step size=1.00e-01, acc. prob=0.983]

Sample:  59%|█████▉    | 89/151 [00:48,  1.87it/s, step size=1.00e-01, acc. prob=0.983]

Sample:  60%|█████▉    | 90/151 [00:48,  1.87it/s, step size=1.00e-01, acc. prob=0.983]

Sample:  60%|██████    | 91/151 [00:49,  1.86it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  61%|██████    | 92/151 [00:49,  1.87it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  62%|██████▏   | 93/151 [00:50,  1.86it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  62%|██████▏   | 94/151 [00:50,  1.85it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  63%|██████▎   | 95/151 [00:51,  1.85it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  64%|██████▎   | 96/151 [00:51,  1.85it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  64%|██████▍   | 97/151 [00:52,  1.85it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  65%|██████▍   | 98/151 [00:52,  1.85it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  66%|██████▌   | 99/151 [00:53,  1.86it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  66%|██████▌   | 100/151 [00:53,  1.86it/s, step size=1.00e-01, acc. prob=0.984]

Sample:  67%|██████▋   | 101/151 [00:54,  1.86it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  68%|██████▊   | 102/151 [00:55,  1.85it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  68%|██████▊   | 103/151 [00:55,  1.85it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  69%|██████▉   | 104/151 [00:56,  1.85it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  70%|██████▉   | 105/151 [00:56,  1.85it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  70%|███████   | 106/151 [00:57,  1.85it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  71%|███████   | 107/151 [00:57,  1.85it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  72%|███████▏  | 108/151 [00:58,  1.87it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  72%|███████▏  | 109/151 [00:58,  1.87it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  73%|███████▎  | 110/151 [00:59,  1.86it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  74%|███████▎  | 111/151 [00:59,  1.86it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  74%|███████▍  | 112/151 [01:00,  1.86it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  75%|███████▍  | 113/151 [01:00,  1.86it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  75%|███████▌  | 114/151 [01:01,  1.86it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  76%|███████▌  | 115/151 [01:02,  1.86it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  77%|███████▋  | 116/151 [01:02,  1.85it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  77%|███████▋  | 117/151 [01:03,  1.85it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  78%|███████▊  | 118/151 [01:03,  1.86it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  79%|███████▉  | 119/151 [01:04,  1.87it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  79%|███████▉  | 120/151 [01:04,  1.85it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  80%|████████  | 121/151 [01:05,  1.85it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  81%|████████  | 122/151 [01:05,  1.85it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  81%|████████▏ | 123/151 [01:06,  1.86it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  82%|████████▏ | 124/151 [01:06,  1.85it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  83%|████████▎ | 125/151 [01:07,  1.85it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  83%|████████▎ | 126/151 [01:07,  1.85it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  84%|████████▍ | 127/151 [01:08,  1.85it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  85%|████████▍ | 128/151 [01:09,  1.86it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  85%|████████▌ | 129/151 [01:09,  1.86it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  86%|████████▌ | 130/151 [01:10,  1.86it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  87%|████████▋ | 131/151 [01:10,  1.87it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  87%|████████▋ | 132/151 [01:11,  1.86it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  88%|████████▊ | 133/151 [01:11,  1.85it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  89%|████████▊ | 134/151 [01:12,  1.86it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  89%|████████▉ | 135/151 [01:12,  1.85it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  90%|█████████ | 136/151 [01:13,  1.84it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  91%|█████████ | 137/151 [01:13,  1.85it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  91%|█████████▏| 138/151 [01:14,  1.85it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  92%|█████████▏| 139/151 [01:14,  1.84it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  93%|█████████▎| 140/151 [01:15,  1.84it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  93%|█████████▎| 141/151 [01:16,  1.84it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  94%|█████████▍| 142/151 [01:16,  1.83it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  95%|█████████▍| 143/151 [01:17,  1.84it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  95%|█████████▌| 144/151 [01:17,  1.84it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  96%|█████████▌| 145/151 [01:18,  1.84it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  97%|█████████▋| 146/151 [01:18,  1.84it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  97%|█████████▋| 147/151 [01:19,  1.85it/s, step size=1.00e-01, acc. prob=0.985]

Sample:  98%|█████████▊| 148/151 [01:19,  1.84it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  99%|█████████▊| 149/151 [01:20,  1.83it/s, step size=1.00e-01, acc. prob=0.986]

Sample:  99%|█████████▉| 150/151 [01:20,  1.84it/s, step size=1.00e-01, acc. prob=0.986]

Sample: 100%|██████████| 151/151 [01:21,  1.84it/s, step size=1.00e-01, acc. prob=0.986]

Sample: 100%|██████████| 151/151 [01:21,  1.85it/s, step size=1.00e-01, acc. prob=0.986]

# corner plot of the posterior
param_names = list(MODEL.parameters.vector_names())
i = 0
while i < len(param_names):
    param_names[i] = param_names[i].replace(" ", "")
    if "center" in param_names[i]:
        center_name = param_names.pop(i)
        param_names.insert(i, center_name.replace("center", "y"))
        param_names.insert(i, center_name.replace("center", "x"))
    i += 1

set, sky = true_params()
corner_plot(
    res_hmc.chain.detach().cpu().numpy(),
    labels=param_names,
    figsize=(20, 20),
    true_values=np.concatenate((sky, set.ravel())),
)

../_images/1078a6e776526dfc5821d08391651f2eed282a712b61a59519243f5095c3169b.png

Metropolis Hastings#

This is the classic MCMC algorithm using the Metropolis Hastngs accept step identified with ap.fit.MHMCMC. One can set the gaussian random step scale and then explore the posterior. While this technically always works, in practice it can take exceedingly long to actually converge to the posterior. This is because the step size must be set very small to have a reasonable likelihood of accepting each step, so it never moves very far in parameter space. With each subsequent sample being very close to the previous sample it can take a long time for it to wander away from its starting point. In the example below it would take an extremely long time for the chain to converge. Instead of waiting that long, we demonstrate the functionality with 1000 steps, but suggest using NUTS for any real world problem. Still, if there is something NUTS can’t handle (a function that isn’t differentiable) then MHMCMC can save the day (even if it takes all day to do it).

MODEL = initialize_model(target, False)

# Use LM to start the sampler at a high likelihood location, no burn-in needed!
res1 = ap.fit.LM(MODEL).fit()

# Run the HMC sampler
res_mh = ap.fit.MHMCMC(MODEL, verbose=1, max_iter=1000, epsilon=1e-4, report_after=np.inf).fit()

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 3/1000 [00:00<00:34, 29.02it/s]

  1%|          | 6/1000 [00:00<00:34, 29.20it/s]

  1%|          | 9/1000 [00:00<00:33, 29.20it/s]

  1%|          | 12/1000 [00:00<00:33, 29.15it/s]

  2%|▏         | 15/1000 [00:00<00:33, 29.05it/s]

  2%|▏         | 18/1000 [00:00<00:33, 29.04it/s]

  2%|▏         | 21/1000 [00:00<00:33, 28.95it/s]

  2%|▏         | 24/1000 [00:00<00:33, 28.90it/s]

  3%|▎         | 27/1000 [00:00<00:33, 28.88it/s]

  3%|▎         | 30/1000 [00:01<00:33, 28.94it/s]

  3%|▎         | 33/1000 [00:01<00:33, 28.79it/s]

  4%|▎         | 36/1000 [00:01<00:33, 28.69it/s]

  4%|▍         | 39/1000 [00:01<00:33, 28.87it/s]

  4%|▍         | 42/1000 [00:01<00:33, 28.99it/s]

  4%|▍         | 45/1000 [00:01<00:32, 29.09it/s]

  5%|▍         | 48/1000 [00:01<00:32, 29.25it/s]

  5%|▌         | 51/1000 [00:01<00:32, 29.31it/s]

  5%|▌         | 54/1000 [00:01<00:32, 29.41it/s]

  6%|▌         | 57/1000 [00:01<00:32, 29.24it/s]

  6%|▌         | 60/1000 [00:02<00:32, 29.33it/s]

  6%|▋         | 63/1000 [00:02<00:32, 29.26it/s]

  7%|▋         | 66/1000 [00:02<00:31, 29.36it/s]

  7%|▋         | 69/1000 [00:02<00:31, 29.38it/s]

  7%|▋         | 72/1000 [00:02<00:31, 29.20it/s]

  8%|▊         | 75/1000 [00:02<00:31, 29.08it/s]

  8%|▊         | 78/1000 [00:02<00:31, 29.10it/s]

  8%|▊         | 81/1000 [00:02<00:31, 29.13it/s]

  8%|▊         | 84/1000 [00:02<00:31, 29.16it/s]

  9%|▊         | 87/1000 [00:02<00:31, 29.16it/s]

  9%|▉         | 90/1000 [00:03<00:31, 29.09it/s]

  9%|▉         | 93/1000 [00:03<00:31, 28.75it/s]

 10%|▉         | 96/1000 [00:03<00:31, 28.73it/s]

 10%|▉         | 99/1000 [00:03<00:31, 28.91it/s]

 10%|█         | 102/1000 [00:03<00:30, 29.06it/s]

 10%|█         | 105/1000 [00:03<00:30, 29.26it/s]

 11%|█         | 108/1000 [00:03<00:30, 29.34it/s]

 11%|█         | 111/1000 [00:03<00:30, 29.40it/s]

 11%|█▏        | 114/1000 [00:03<00:30, 29.39it/s]

 12%|█▏        | 117/1000 [00:04<00:30, 29.26it/s]

 12%|█▏        | 120/1000 [00:04<00:30, 29.26it/s]

 12%|█▏        | 123/1000 [00:04<00:30, 29.20it/s]

 13%|█▎        | 126/1000 [00:04<00:29, 29.13it/s]

 13%|█▎        | 129/1000 [00:04<00:30, 28.96it/s]

 13%|█▎        | 132/1000 [00:04<00:30, 28.80it/s]

 14%|█▎        | 135/1000 [00:04<00:30, 28.83it/s]

 14%|█▍        | 138/1000 [00:04<00:30, 28.70it/s]

 14%|█▍        | 141/1000 [00:04<00:29, 28.72it/s]

 14%|█▍        | 144/1000 [00:04<00:29, 28.65it/s]

 15%|█▍        | 147/1000 [00:05<00:29, 28.79it/s]

 15%|█▌        | 150/1000 [00:05<00:29, 28.76it/s]

 15%|█▌        | 153/1000 [00:05<00:29, 28.80it/s]

 16%|█▌        | 156/1000 [00:05<00:29, 28.93it/s]

 16%|█▌        | 159/1000 [00:05<00:28, 29.06it/s]

 16%|█▌        | 162/1000 [00:05<00:28, 29.15it/s]

 16%|█▋        | 165/1000 [00:05<00:28, 29.25it/s]

 17%|█▋        | 168/1000 [00:05<00:28, 29.21it/s]

 17%|█▋        | 171/1000 [00:05<00:28, 29.27it/s]

 17%|█▋        | 174/1000 [00:05<00:28, 29.05it/s]

 18%|█▊        | 177/1000 [00:06<00:28, 29.01it/s]

 18%|█▊        | 180/1000 [00:06<00:28, 28.97it/s]

 18%|█▊        | 183/1000 [00:06<00:28, 29.03it/s]

 19%|█▊        | 186/1000 [00:06<00:28, 28.91it/s]

 19%|█▉        | 189/1000 [00:06<00:28, 28.90it/s]

 19%|█▉        | 192/1000 [00:06<00:27, 29.01it/s]

 20%|█▉        | 195/1000 [00:06<00:27, 29.11it/s]

 20%|█▉        | 198/1000 [00:06<00:27, 29.23it/s]

 20%|██        | 201/1000 [00:06<00:27, 29.22it/s]

 20%|██        | 204/1000 [00:07<00:27, 29.12it/s]

 21%|██        | 207/1000 [00:07<00:27, 28.99it/s]

 21%|██        | 210/1000 [00:07<00:27, 28.82it/s]

 21%|██▏       | 213/1000 [00:07<00:27, 29.05it/s]

 22%|██▏       | 216/1000 [00:07<00:26, 29.18it/s]

 22%|██▏       | 219/1000 [00:07<00:26, 29.04it/s]

 22%|██▏       | 222/1000 [00:07<00:26, 29.07it/s]

 22%|██▎       | 225/1000 [00:07<00:26, 28.92it/s]

 23%|██▎       | 228/1000 [00:07<00:26, 28.91it/s]

 23%|██▎       | 231/1000 [00:07<00:26, 28.82it/s]

 23%|██▎       | 234/1000 [00:08<00:26, 28.94it/s]

 24%|██▎       | 237/1000 [00:08<00:26, 28.86it/s]

 24%|██▍       | 240/1000 [00:08<00:26, 28.86it/s]

 24%|██▍       | 243/1000 [00:08<00:26, 28.79it/s]

 25%|██▍       | 246/1000 [00:08<00:26, 28.76it/s]

 25%|██▍       | 249/1000 [00:08<00:26, 28.88it/s]

 25%|██▌       | 252/1000 [00:08<00:25, 28.95it/s]

 26%|██▌       | 255/1000 [00:08<00:25, 29.07it/s]

 26%|██▌       | 258/1000 [00:08<00:25, 28.82it/s]

 26%|██▌       | 261/1000 [00:08<00:25, 28.90it/s]

 26%|██▋       | 264/1000 [00:09<00:25, 28.93it/s]

 27%|██▋       | 267/1000 [00:09<00:25, 28.70it/s]

 27%|██▋       | 270/1000 [00:09<00:25, 28.87it/s]

 27%|██▋       | 273/1000 [00:09<00:25, 29.02it/s]

 28%|██▊       | 276/1000 [00:09<00:24, 29.08it/s]

 28%|██▊       | 279/1000 [00:09<00:24, 29.20it/s]

 28%|██▊       | 282/1000 [00:09<00:24, 29.26it/s]

 28%|██▊       | 285/1000 [00:09<00:24, 29.08it/s]

 29%|██▉       | 288/1000 [00:09<00:24, 28.95it/s]

 29%|██▉       | 291/1000 [00:10<00:24, 28.85it/s]

 29%|██▉       | 294/1000 [00:10<00:24, 28.81it/s]

 30%|██▉       | 297/1000 [00:10<00:24, 28.96it/s]

 30%|███       | 300/1000 [00:10<00:24, 29.02it/s]

 30%|███       | 303/1000 [00:10<00:23, 29.09it/s]

 31%|███       | 306/1000 [00:10<00:23, 29.04it/s]

 31%|███       | 309/1000 [00:10<00:23, 29.11it/s]

 31%|███       | 312/1000 [00:10<00:23, 29.11it/s]

 32%|███▏      | 315/1000 [00:10<00:23, 29.04it/s]

 32%|███▏      | 318/1000 [00:10<00:23, 29.13it/s]

 32%|███▏      | 321/1000 [00:11<00:23, 29.19it/s]

 32%|███▏      | 324/1000 [00:11<00:23, 28.86it/s]

 33%|███▎      | 327/1000 [00:11<00:23, 28.58it/s]

 33%|███▎      | 330/1000 [00:11<00:23, 28.63it/s]

 33%|███▎      | 333/1000 [00:11<00:23, 28.63it/s]

 34%|███▎      | 336/1000 [00:11<00:23, 28.73it/s]

 34%|███▍      | 339/1000 [00:11<00:22, 28.87it/s]

 34%|███▍      | 342/1000 [00:11<00:22, 28.96it/s]

 34%|███▍      | 345/1000 [00:11<00:22, 29.11it/s]

 35%|███▍      | 348/1000 [00:11<00:22, 29.06it/s]

 35%|███▌      | 351/1000 [00:12<00:22, 29.21it/s]

 35%|███▌      | 354/1000 [00:12<00:22, 29.26it/s]

 36%|███▌      | 357/1000 [00:12<00:21, 29.31it/s]

 36%|███▌      | 360/1000 [00:12<00:21, 29.26it/s]

 36%|███▋      | 363/1000 [00:12<00:21, 29.26it/s]

 37%|███▋      | 366/1000 [00:12<00:21, 29.34it/s]

 37%|███▋      | 369/1000 [00:12<00:21, 29.37it/s]

 37%|███▋      | 372/1000 [00:12<00:21, 29.32it/s]

 38%|███▊      | 375/1000 [00:12<00:21, 29.28it/s]

 38%|███▊      | 378/1000 [00:13<00:21, 29.25it/s]

 38%|███▊      | 381/1000 [00:13<00:21, 29.07it/s]

 38%|███▊      | 384/1000 [00:13<00:21, 28.68it/s]

 39%|███▊      | 387/1000 [00:13<00:21, 28.61it/s]

 39%|███▉      | 390/1000 [00:13<00:21, 28.80it/s]

 39%|███▉      | 393/1000 [00:13<00:21, 28.89it/s]

 40%|███▉      | 396/1000 [00:13<00:20, 29.04it/s]

 40%|███▉      | 399/1000 [00:13<00:20, 29.15it/s]

 40%|████      | 402/1000 [00:13<00:20, 29.20it/s]

 40%|████      | 405/1000 [00:13<00:20, 29.17it/s]

 41%|████      | 408/1000 [00:14<00:20, 29.23it/s]

 41%|████      | 411/1000 [00:14<00:20, 29.16it/s]

 41%|████▏     | 414/1000 [00:14<00:19, 29.34it/s]

 42%|████▏     | 417/1000 [00:14<00:19, 29.35it/s]

 42%|████▏     | 420/1000 [00:14<00:19, 29.07it/s]

 42%|████▏     | 423/1000 [00:14<00:19, 29.04it/s]

 43%|████▎     | 426/1000 [00:14<00:19, 29.06it/s]

 43%|████▎     | 429/1000 [00:14<00:19, 28.89it/s]

 43%|████▎     | 432/1000 [00:14<00:19, 28.92it/s]

 44%|████▎     | 435/1000 [00:14<00:19, 28.94it/s]

 44%|████▍     | 438/1000 [00:15<00:19, 28.77it/s]

 44%|████▍     | 441/1000 [00:15<00:19, 28.53it/s]

 44%|████▍     | 444/1000 [00:15<00:19, 28.54it/s]

 45%|████▍     | 447/1000 [00:15<00:19, 28.75it/s]

 45%|████▌     | 450/1000 [00:15<00:19, 28.84it/s]

 45%|████▌     | 453/1000 [00:15<00:19, 28.68it/s]

 46%|████▌     | 456/1000 [00:15<00:18, 28.68it/s]

 46%|████▌     | 459/1000 [00:15<00:18, 28.80it/s]

 46%|████▌     | 462/1000 [00:15<00:18, 28.89it/s]

 46%|████▋     | 465/1000 [00:16<00:18, 28.85it/s]

 47%|████▋     | 468/1000 [00:16<00:18, 28.95it/s]

 47%|████▋     | 471/1000 [00:16<00:18, 29.11it/s]

 47%|████▋     | 474/1000 [00:16<00:18, 28.97it/s]

 48%|████▊     | 477/1000 [00:16<00:18, 28.85it/s]

 48%|████▊     | 480/1000 [00:16<00:18, 28.80it/s]

 48%|████▊     | 483/1000 [00:16<00:17, 28.91it/s]

 49%|████▊     | 486/1000 [00:16<00:17, 29.03it/s]

 49%|████▉     | 489/1000 [00:16<00:17, 29.06it/s]

 49%|████▉     | 492/1000 [00:16<00:17, 28.84it/s]

 50%|████▉     | 495/1000 [00:17<00:17, 28.83it/s]

 50%|████▉     | 498/1000 [00:17<00:17, 28.75it/s]

 50%|█████     | 501/1000 [00:17<00:17, 28.81it/s]

 50%|█████     | 504/1000 [00:17<00:17, 29.02it/s]

 51%|█████     | 507/1000 [00:17<00:16, 29.02it/s]

 51%|█████     | 510/1000 [00:17<00:16, 29.21it/s]

 51%|█████▏    | 513/1000 [00:17<00:16, 29.17it/s]

 52%|█████▏    | 516/1000 [00:17<00:16, 29.23it/s]

 52%|█████▏    | 519/1000 [00:17<00:16, 29.24it/s]

 52%|█████▏    | 522/1000 [00:17<00:16, 29.10it/s]

 52%|█████▎    | 525/1000 [00:18<00:16, 29.10it/s]

 53%|█████▎    | 528/1000 [00:18<00:16, 28.95it/s]

 53%|█████▎    | 531/1000 [00:18<00:16, 28.89it/s]

 53%|█████▎    | 534/1000 [00:18<00:16, 28.85it/s]

 54%|█████▎    | 537/1000 [00:18<00:16, 28.77it/s]

 54%|█████▍    | 540/1000 [00:18<00:15, 28.92it/s]

 54%|█████▍    | 543/1000 [00:18<00:15, 29.03it/s]

 55%|█████▍    | 546/1000 [00:18<00:15, 29.04it/s]

 55%|█████▍    | 549/1000 [00:18<00:15, 29.02it/s]

 55%|█████▌    | 552/1000 [00:19<00:15, 29.08it/s]

 56%|█████▌    | 555/1000 [00:19<00:15, 28.93it/s]

 56%|█████▌    | 558/1000 [00:19<00:15, 28.68it/s]

 56%|█████▌    | 561/1000 [00:19<00:15, 28.95it/s]

 56%|█████▋    | 564/1000 [00:19<00:14, 29.15it/s]

 57%|█████▋    | 567/1000 [00:19<00:14, 29.30it/s]

 57%|█████▋    | 570/1000 [00:19<00:14, 29.26it/s]

 57%|█████▋    | 573/1000 [00:19<00:14, 29.20it/s]

 58%|█████▊    | 576/1000 [00:19<00:14, 29.10it/s]

 58%|█████▊    | 579/1000 [00:19<00:14, 29.04it/s]

 58%|█████▊    | 582/1000 [00:20<00:14, 28.96it/s]

 58%|█████▊    | 585/1000 [00:20<00:14, 28.69it/s]

 59%|█████▉    | 588/1000 [00:20<00:14, 28.77it/s]

 59%|█████▉    | 591/1000 [00:20<00:14, 28.82it/s]

 59%|█████▉    | 594/1000 [00:20<00:14, 28.79it/s]

 60%|█████▉    | 597/1000 [00:20<00:13, 28.80it/s]

 60%|██████    | 600/1000 [00:20<00:13, 28.97it/s]

 60%|██████    | 603/1000 [00:20<00:13, 29.06it/s]

 61%|██████    | 606/1000 [00:20<00:13, 29.12it/s]

 61%|██████    | 609/1000 [00:20<00:13, 29.15it/s]

 61%|██████    | 612/1000 [00:21<00:13, 29.08it/s]

 62%|██████▏   | 615/1000 [00:21<00:13, 28.73it/s]

 62%|██████▏   | 618/1000 [00:21<00:13, 28.74it/s]

 62%|██████▏   | 621/1000 [00:21<00:13, 28.77it/s]

 62%|██████▏   | 624/1000 [00:21<00:13, 28.81it/s]

 63%|██████▎   | 627/1000 [00:21<00:12, 28.83it/s]

 63%|██████▎   | 630/1000 [00:21<00:12, 28.87it/s]

 63%|██████▎   | 633/1000 [00:21<00:12, 28.95it/s]

 64%|██████▎   | 636/1000 [00:21<00:12, 28.98it/s]

 64%|██████▍   | 639/1000 [00:22<00:12, 28.89it/s]

 64%|██████▍   | 642/1000 [00:22<00:12, 28.85it/s]

 64%|██████▍   | 645/1000 [00:22<00:12, 28.91it/s]

 65%|██████▍   | 648/1000 [00:22<00:12, 29.12it/s]

 65%|██████▌   | 651/1000 [00:22<00:11, 29.19it/s]

 65%|██████▌   | 654/1000 [00:22<00:11, 29.21it/s]

 66%|██████▌   | 657/1000 [00:22<00:11, 29.24it/s]

 66%|██████▌   | 660/1000 [00:22<00:11, 29.27it/s]

 66%|██████▋   | 663/1000 [00:22<00:11, 29.35it/s]

 67%|██████▋   | 666/1000 [00:22<00:11, 29.26it/s]

 67%|██████▋   | 669/1000 [00:23<00:11, 29.23it/s]

 67%|██████▋   | 672/1000 [00:23<00:11, 29.13it/s]

 68%|██████▊   | 675/1000 [00:23<00:11, 28.93it/s]

 68%|██████▊   | 678/1000 [00:23<00:11, 28.99it/s]

 68%|██████▊   | 681/1000 [00:23<00:10, 29.02it/s]

 68%|██████▊   | 684/1000 [00:23<00:10, 29.16it/s]

 69%|██████▊   | 687/1000 [00:23<00:10, 29.20it/s]

 69%|██████▉   | 690/1000 [00:23<00:10, 29.12it/s]

 69%|██████▉   | 693/1000 [00:23<00:10, 29.06it/s]

 70%|██████▉   | 696/1000 [00:23<00:10, 28.89it/s]

 70%|██████▉   | 699/1000 [00:24<00:10, 29.01it/s]

 70%|███████   | 702/1000 [00:24<00:10, 28.94it/s]

 70%|███████   | 705/1000 [00:24<00:10, 29.03it/s]

 71%|███████   | 708/1000 [00:24<00:10, 29.05it/s]

 71%|███████   | 711/1000 [00:24<00:09, 29.06it/s]

 71%|███████▏  | 714/1000 [00:24<00:09, 29.06it/s]

 72%|███████▏  | 717/1000 [00:24<00:09, 29.21it/s]

 72%|███████▏  | 720/1000 [00:24<00:09, 28.95it/s]

 72%|███████▏  | 723/1000 [00:24<00:09, 28.86it/s]

 73%|███████▎  | 726/1000 [00:25<00:09, 28.86it/s]

 73%|███████▎  | 729/1000 [00:25<00:09, 28.93it/s]

 73%|███████▎  | 732/1000 [00:25<00:09, 28.69it/s]

 74%|███████▎  | 735/1000 [00:25<00:09, 28.65it/s]

 74%|███████▍  | 738/1000 [00:25<00:09, 28.65it/s]

 74%|███████▍  | 741/1000 [00:25<00:09, 28.72it/s]

 74%|███████▍  | 744/1000 [00:25<00:08, 28.75it/s]

 75%|███████▍  | 747/1000 [00:25<00:08, 28.83it/s]

 75%|███████▌  | 750/1000 [00:25<00:08, 28.94it/s]

 75%|███████▌  | 753/1000 [00:25<00:08, 28.94it/s]

 76%|███████▌  | 756/1000 [00:26<00:08, 28.99it/s]

 76%|███████▌  | 759/1000 [00:26<00:08, 29.00it/s]

 76%|███████▌  | 762/1000 [00:26<00:08, 29.03it/s]

 76%|███████▋  | 765/1000 [00:26<00:08, 29.11it/s]

 77%|███████▋  | 768/1000 [00:26<00:07, 29.11it/s]

 77%|███████▋  | 771/1000 [00:26<00:07, 29.21it/s]

 77%|███████▋  | 774/1000 [00:26<00:07, 29.26it/s]

 78%|███████▊  | 777/1000 [00:26<00:07, 29.25it/s]

 78%|███████▊  | 780/1000 [00:26<00:07, 29.04it/s]

 78%|███████▊  | 783/1000 [00:26<00:07, 28.93it/s]

 79%|███████▊  | 786/1000 [00:27<00:07, 28.95it/s]

 79%|███████▉  | 789/1000 [00:27<00:07, 28.71it/s]

 79%|███████▉  | 792/1000 [00:27<00:07, 28.45it/s]

 80%|███████▉  | 795/1000 [00:27<00:07, 28.54it/s]

 80%|███████▉  | 798/1000 [00:27<00:07, 28.84it/s]

 80%|████████  | 801/1000 [00:27<00:06, 29.08it/s]

 80%|████████  | 804/1000 [00:27<00:06, 29.08it/s]

 81%|████████  | 807/1000 [00:27<00:06, 29.00it/s]

 81%|████████  | 810/1000 [00:27<00:06, 28.98it/s]

 81%|████████▏ | 813/1000 [00:28<00:06, 28.93it/s]

 82%|████████▏ | 816/1000 [00:28<00:06, 28.98it/s]

 82%|████████▏ | 819/1000 [00:28<00:06, 29.03it/s]

 82%|████████▏ | 822/1000 [00:28<00:06, 29.09it/s]

 82%|████████▎ | 825/1000 [00:28<00:06, 29.11it/s]

 83%|████████▎ | 828/1000 [00:28<00:05, 28.84it/s]

 83%|████████▎ | 831/1000 [00:28<00:05, 28.97it/s]

 83%|████████▎ | 834/1000 [00:28<00:05, 29.02it/s]

 84%|████████▎ | 837/1000 [00:28<00:05, 28.97it/s]

 84%|████████▍ | 840/1000 [00:28<00:05, 29.05it/s]

 84%|████████▍ | 843/1000 [00:29<00:05, 28.98it/s]

 85%|████████▍ | 846/1000 [00:29<00:05, 28.92it/s]

 85%|████████▍ | 849/1000 [00:29<00:05, 28.78it/s]

 85%|████████▌ | 852/1000 [00:29<00:05, 29.01it/s]

 86%|████████▌ | 855/1000 [00:29<00:04, 29.18it/s]

 86%|████████▌ | 858/1000 [00:29<00:04, 29.21it/s]

 86%|████████▌ | 861/1000 [00:29<00:04, 29.29it/s]

 86%|████████▋ | 864/1000 [00:29<00:04, 29.31it/s]

 87%|████████▋ | 867/1000 [00:29<00:04, 29.26it/s]

 87%|████████▋ | 870/1000 [00:29<00:04, 28.96it/s]

 87%|████████▋ | 873/1000 [00:30<00:04, 29.07it/s]

 88%|████████▊ | 876/1000 [00:30<00:04, 28.93it/s]

 88%|████████▊ | 879/1000 [00:30<00:04, 28.89it/s]

 88%|████████▊ | 882/1000 [00:30<00:04, 28.88it/s]

 88%|████████▊ | 885/1000 [00:30<00:03, 28.78it/s]

 89%|████████▉ | 888/1000 [00:30<00:03, 28.62it/s]

 89%|████████▉ | 891/1000 [00:30<00:03, 28.68it/s]

 89%|████████▉ | 894/1000 [00:30<00:03, 28.83it/s]

 90%|████████▉ | 897/1000 [00:30<00:03, 28.81it/s]

 90%|█████████ | 900/1000 [00:31<00:03, 28.96it/s]

 90%|█████████ | 903/1000 [00:31<00:03, 28.93it/s]

 91%|█████████ | 906/1000 [00:31<00:03, 28.77it/s]

 91%|█████████ | 909/1000 [00:31<00:03, 29.02it/s]

 91%|█████████ | 912/1000 [00:31<00:03, 29.11it/s]

 92%|█████████▏| 915/1000 [00:31<00:02, 29.24it/s]

 92%|█████████▏| 918/1000 [00:31<00:02, 29.34it/s]

 92%|█████████▏| 921/1000 [00:31<00:02, 29.37it/s]

 92%|█████████▏| 924/1000 [00:31<00:02, 29.38it/s]

 93%|█████████▎| 927/1000 [00:31<00:02, 29.15it/s]

 93%|█████████▎| 930/1000 [00:32<00:02, 29.09it/s]

 93%|█████████▎| 933/1000 [00:32<00:02, 29.08it/s]

 94%|█████████▎| 936/1000 [00:32<00:02, 29.13it/s]

 94%|█████████▍| 939/1000 [00:32<00:02, 29.24it/s]

 94%|█████████▍| 942/1000 [00:32<00:01, 29.14it/s]

 94%|█████████▍| 945/1000 [00:32<00:01, 29.03it/s]

 95%|█████████▍| 948/1000 [00:32<00:01, 29.14it/s]

 95%|█████████▌| 951/1000 [00:32<00:01, 29.27it/s]

 95%|█████████▌| 954/1000 [00:32<00:01, 29.36it/s]

 96%|█████████▌| 957/1000 [00:32<00:01, 29.38it/s]

 96%|█████████▌| 960/1000 [00:33<00:01, 29.41it/s]

 96%|█████████▋| 963/1000 [00:33<00:01, 29.20it/s]

 97%|█████████▋| 966/1000 [00:33<00:01, 29.16it/s]

 97%|█████████▋| 969/1000 [00:33<00:01, 29.29it/s]

 97%|█████████▋| 972/1000 [00:33<00:00, 29.35it/s]

 98%|█████████▊| 975/1000 [00:33<00:00, 29.41it/s]

 98%|█████████▊| 978/1000 [00:33<00:00, 29.42it/s]

 98%|█████████▊| 981/1000 [00:33<00:00, 29.40it/s]

 98%|█████████▊| 984/1000 [00:33<00:00, 29.31it/s]

 99%|█████████▊| 987/1000 [00:34<00:00, 28.88it/s]

 99%|█████████▉| 990/1000 [00:34<00:00, 28.95it/s]

 99%|█████████▉| 993/1000 [00:34<00:00, 29.03it/s]

100%|█████████▉| 996/1000 [00:34<00:00, 29.10it/s]

100%|█████████▉| 999/1000 [00:34<00:00, 29.07it/s]

100%|██████████| 1000/1000 [00:34<00:00, 29.02it/s]

Acceptance: 0.7950000166893005

# corner plot of the posterior
# note that, even 1000 samples is not enough to overcome the autocorrelation so the posterior has not converged.
# In fact it is not even close to convergence as can be seen by the multi-modal blobs in the posterior since this
# problem is unimodal (except the modes where models are swapped). It is almost never worthwhile to use this
# sampler except as a sanity check on very simple models.
param_names = list(MODEL.parameters.vector_names())
i = 0
while i < len(param_names):
    param_names[i] = param_names[i].replace(" ", "")
    if "center" in param_names[i]:
        center_name = param_names.pop(i)
        param_names.insert(i, center_name.replace("center", "y"))
        param_names.insert(i, center_name.replace("center", "x"))
    i += 1

set, sky = true_params()
corner_plot(
    res_mh.chain[::10],  # thin by a factor 10 so the plot works in reasonable time
    labels=param_names,
    figsize=(20, 20),
    true_values=np.concatenate((sky, set.ravel())),
)

../_images/bf086ad0c20c3d507f79fb0be751bb8054b184d8b53df6630dd9041eef429541.png