Source code for brmp

import numpy as np
import pandas as pd

from brmp.backend import Backend, data_from_numpy
from brmp.design import Metadata, code_lengths, makedata, metadata_from_df
from brmp.family import Family, Normal
from brmp.fit import Fit
from brmp.formula import Formula, parse
from brmp.model import build_model, model_repr
from brmp.model_pre import build_model_pre
from brmp.priors import build_prior_tree
from brmp.pyro_backend import backend as pyro_backend
from brmp.numpyro_backend import backend as numpyro_backend


def makedesc(formula, metadata, family, priors, code_lengths):
    assert type(formula) == Formula
    assert type(metadata) == Metadata
    assert type(family) == Family
    assert type(priors) == list
    model_desc_pre = build_model_pre(formula, metadata, family, code_lengths)
    prior_tree = build_prior_tree(model_desc_pre, priors)
    return build_model(model_desc_pre, prior_tree)

# TODO: In principle we only need to pass `code_length(contrasts)`
# here. The actual coding isn't required until we see data.


def define_model(formula_str, metadata, family=None, priors=None, contrasts=None):
    assert type(formula_str) == str
    assert type(metadata) == Metadata
    assert family is None or type(family) == Family
    assert priors is None or type(priors) == list
    assert contrasts is None or type(contrasts) == dict

    family = family or Normal
    priors = priors or []
    contrasts = contrasts or {}

    # TODO: Consider accepting nested arrays as well as numpy arrays.
    # (If we do, convert to numpy arrays here in `define_model`?)
    assert all(type(val) == np.ndarray and len(val.shape) == 2 for val in contrasts.values())

    formula = parse(formula_str)
    desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts))
    return Model(formula, metadata, contrasts, desc)


class Model:
    def __init__(self, formula, metadata, contrasts, desc):
        self.formula = formula
        self.metadata = metadata
        self.contrasts = contrasts
        self.desc = desc

    def gen(self, backend):
        assets = backend.gen(self.desc)
        return AssetsWrapper(self, assets, backend)

    # Generate design matrices. (Represented as numpy arrays.)
    def encode(self, df):
        return makedata(self.formula, df, self.metadata, self.contrasts)


class AssetsWrapper:
    def __init__(self, model, assets, backend):
        assert type(backend) == Backend
        self.model = model
        self.assets = assets
        self.backend = backend

    def encode(self, df):
        data = self.model.encode(df)
        return data_from_numpy(self.backend, data)

    def run_algo(self, name, data, *args, **kwargs):
        samples = getattr(self.backend, name)(data, self.assets, *args, **kwargs)
        return Fit(self.model.formula, self.model.metadata,
                   self.model.contrasts, data,
                   self.model.desc, self.assets, samples, self.backend)


[docs]def brm(formula_str, df, family=None, priors=None, contrasts=None):
    """
    Defines a model and encodes data in design matrices.

    By default categorical columns are coded using dummy coding.

    :param formula_str: An lme4 formula. e.g. ``'y ~ 1 + x'``. See
                        :class:`~brmp.formula.Formula` for a description
                        of the supported syntax.
    :type formula_str: str
    :param df: A data frame containing columns for each of the variables in
               ``formula_str``.
    :type df: pandas.DataFrame
    :param family: The model's response family.
    :type family: brmp.family.Family
    :param priors: A list of :class:`~brmp.priors.Prior` instances describing the model's priors.
    :type priors: list
    :param contrasts: A dictionary that optionally maps variable names to contrast matrices describing
                      custom encodings of categorical variables. Each contrast matrix should be
                      a :class:`~numpy.ndarray` of shape ``(L, C)``, where ``L`` is the number of levels
                      present in the categorical variable and ``C`` is the length of the desired
                      encoding.
    :type contrasts: dict
    :return: A wrapper around the model description and the design matrices.
    :rtype: brmp.ModelAndData

    Example::

      df = pd.DataFrame({'y': [1., 2.], 'x': [.5, 0.]})
      model = brm('y ~ 1 + x', df)

    """
    assert type(formula_str) == str
    assert type(df) == pd.DataFrame
    assert family is None or type(family) == Family
    assert priors is None or type(priors) == list
    assert contrasts is None or type(contrasts) == dict
    metadata = metadata_from_df(df)
    model = define_model(formula_str, metadata, family, priors, contrasts)
    data = model.encode(df)
    return ModelAndData(model, df, data)


[docs]class ModelAndData:
    def __init__(self, model, df, data):
        self.model = model
        self.df = df
        # TODO: Turn this into a `@property` to improve generate docs?
        self.data = data
        """
        A dictionary with the encoded data as its values.
        Each value is a :class:`~numpy.ndarray`.
        """

        # We might eventually want to cache generated assets. This
        # will likely be particularly useful once it's possible to
        # cache compiled NumPyro models on the `Model` instance.

        # Once we have this, we can extend `nuts` etc. to take an
        # extra (optional) `df` argument, enabling the model to be fit
        # with a data frame other than that used to define the model.
        # This would be passed to `run_algo` (as a keyword argument)
        # which already implements the required logic.

        # One thing we might consider adding with this is checks to
        # ensure that such a data frame is compatible (in an
        # appropriate sense) with the data frame used to define the
        # model.

    def run_algo(self, name, backend, *args, df=None, **kwargs):
        assert type(backend) == Backend
        data = self.model.encode(df) if df is not None else self.data
        assets_wrapper = self.model.gen(backend)
        return assets_wrapper.run_algo(name, data_from_numpy(backend, data), *args, **kwargs)

[docs]    def fit(self, algo='nuts', **kwargs):
        """
        Fits the wrapped model.

        :param algo: The algorithm used for inference, either ``'nuts'``, ``'svi'`` or ``'prior'``.
        :type algo: str
        :param kwargs: Inference algorithm-specific keyword arguments. See :func:`~brmp.ModelAndData.nuts`,
                       :func:`~brmp.ModelAndData.svi` and :func:`~brmp.ModelAndData.prior` for details.
        :return: A model fit.
        :rtype: brmp.fit.Fit

        Example::

          fit = brm('y ~ x', df).fit()

        """
        assert algo in ['prior', 'nuts', 'svi']
        return getattr(self, algo)(**kwargs)

[docs]    def nuts(self, iter=10, warmup=None, num_chains=1, seed=None, backend=numpyro_backend):
        """
        Fit the model using NUTS.

        :param iter: The number of (post warm up) samples to take.
        :type iter: int
        :param warmup: The number of warm up samples to take. Warm up samples are
                       not included in the final model fit. Defaults to
                       ``iter / 2``.
        :type warmup: int
        :param num_chains: The number of chains to run.
        :type num_chains: int
        :param seed: Random seed.
        :type seed: int
        :param backend: The backend used to perform inference.
        :type backend: brmp.backend.Backend
        :return: A model fit.
        :rtype: brmp.fit.Fit

        Example::

          fit = brm('y ~ x', df).nuts()

        """
        warmup = iter // 2 if warmup is None else warmup
        return self.run_algo('nuts', backend, iter, warmup, num_chains, seed)

[docs]    def svi(self, iter=10, num_samples=10, seed=None, backend=pyro_backend, **kwargs):
        """
        Fit the model using stochastic variational inference.

        :param iter: The number of optimisation steps to take.
        :type iter: int
        :param num_samples: The number of samples to take from the variational
                            posterior.
        :type num_samples: int
        :param seed: Random seed.
        :type seed: int
        :param backend: The backend used to perform inference.
        :type backend: brmp.backend.Backend
        :return: A model fit.
        :rtype: brmp.fit.Fit

        Example::

          fit = brm('y ~ x', df).svi()

        """
        return self.run_algo('svi', backend, iter, num_samples, seed, **kwargs)

[docs]    def prior(self, num_samples=10, seed=None, backend=pyro_backend):
        """
        Sample from the prior.

        :param num_samples: The number of samples to take.
        :type num_samples: int
        :param seed: Random seed.
        :type seed: int
        :param backend: The backend used to perform inference.
        :type backend: brmp.backend.Backend

        :return: A model fit.
        :rtype: brmp.fit.Fit

        Example::

          fit = brm('y ~ x', df).prior()

        """
        return self.run_algo('prior', backend, num_samples, seed)

    def __repr__(self):
        return model_repr(self.model.desc)