import numpy as np
import pandas as pd
from brmp.backend import Backend, data_from_numpy
from brmp.design import Metadata, code_lengths, makedata, metadata_from_df
from brmp.family import Family, Normal
from brmp.fit import Fit
from brmp.formula import Formula, parse
from brmp.model import build_model, model_repr
from brmp.model_pre import build_model_pre
from brmp.priors import build_prior_tree
from brmp.pyro_backend import backend as pyro_backend
from brmp.numpyro_backend import backend as numpyro_backend
def makedesc(formula, metadata, family, priors, code_lengths):
assert type(formula) == Formula
assert type(metadata) == Metadata
assert type(family) == Family
assert type(priors) == list
model_desc_pre = build_model_pre(formula, metadata, family, code_lengths)
prior_tree = build_prior_tree(model_desc_pre, priors)
return build_model(model_desc_pre, prior_tree)
# TODO: In principle we only need to pass `code_length(contrasts)`
# here. The actual coding isn't required until we see data.
def define_model(formula_str, metadata, family=None, priors=None, contrasts=None):
assert type(formula_str) == str
assert type(metadata) == Metadata
assert family is None or type(family) == Family
assert priors is None or type(priors) == list
assert contrasts is None or type(contrasts) == dict
family = family or Normal
priors = priors or []
contrasts = contrasts or {}
# TODO: Consider accepting nested arrays as well as numpy arrays.
# (If we do, convert to numpy arrays here in `define_model`?)
assert all(type(val) == np.ndarray and len(val.shape) == 2 for val in contrasts.values())
formula = parse(formula_str)
desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts))
return Model(formula, metadata, contrasts, desc)
class Model:
def __init__(self, formula, metadata, contrasts, desc):
self.formula = formula
self.metadata = metadata
self.contrasts = contrasts
self.desc = desc
def gen(self, backend):
assets = backend.gen(self.desc)
return AssetsWrapper(self, assets, backend)
# Generate design matrices. (Represented as numpy arrays.)
def encode(self, df):
return makedata(self.formula, df, self.metadata, self.contrasts)
class AssetsWrapper:
def __init__(self, model, assets, backend):
assert type(backend) == Backend
self.model = model
self.assets = assets
self.backend = backend
def encode(self, df):
data = self.model.encode(df)
return data_from_numpy(self.backend, data)
def run_algo(self, name, data, *args, **kwargs):
samples = getattr(self.backend, name)(data, self.assets, *args, **kwargs)
return Fit(self.model.formula, self.model.metadata,
self.model.contrasts, data,
self.model.desc, self.assets, samples, self.backend)
[docs]def brm(formula_str, df, family=None, priors=None, contrasts=None):
"""
Defines a model and encodes data in design matrices.
By default categorical columns are coded using dummy coding.
:param formula_str: An lme4 formula. e.g. ``'y ~ 1 + x'``. See
:class:`~brmp.formula.Formula` for a description
of the supported syntax.
:type formula_str: str
:param df: A data frame containing columns for each of the variables in
``formula_str``.
:type df: pandas.DataFrame
:param family: The model's response family.
:type family: brmp.family.Family
:param priors: A list of :class:`~brmp.priors.Prior` instances describing the model's priors.
:type priors: list
:param contrasts: A dictionary that optionally maps variable names to contrast matrices describing
custom encodings of categorical variables. Each contrast matrix should be
a :class:`~numpy.ndarray` of shape ``(L, C)``, where ``L`` is the number of levels
present in the categorical variable and ``C`` is the length of the desired
encoding.
:type contrasts: dict
:return: A wrapper around the model description and the design matrices.
:rtype: brmp.ModelAndData
Example::
df = pd.DataFrame({'y': [1., 2.], 'x': [.5, 0.]})
model = brm('y ~ 1 + x', df)
"""
assert type(formula_str) == str
assert type(df) == pd.DataFrame
assert family is None or type(family) == Family
assert priors is None or type(priors) == list
assert contrasts is None or type(contrasts) == dict
metadata = metadata_from_df(df)
model = define_model(formula_str, metadata, family, priors, contrasts)
data = model.encode(df)
return ModelAndData(model, df, data)
[docs]class ModelAndData:
def __init__(self, model, df, data):
self.model = model
self.df = df
# TODO: Turn this into a `@property` to improve generate docs?
self.data = data
"""
A dictionary with the encoded data as its values.
Each value is a :class:`~numpy.ndarray`.
"""
# We might eventually want to cache generated assets. This
# will likely be particularly useful once it's possible to
# cache compiled NumPyro models on the `Model` instance.
# Once we have this, we can extend `nuts` etc. to take an
# extra (optional) `df` argument, enabling the model to be fit
# with a data frame other than that used to define the model.
# This would be passed to `run_algo` (as a keyword argument)
# which already implements the required logic.
# One thing we might consider adding with this is checks to
# ensure that such a data frame is compatible (in an
# appropriate sense) with the data frame used to define the
# model.
def run_algo(self, name, backend, *args, df=None, **kwargs):
assert type(backend) == Backend
data = self.model.encode(df) if df is not None else self.data
assets_wrapper = self.model.gen(backend)
return assets_wrapper.run_algo(name, data_from_numpy(backend, data), *args, **kwargs)
[docs] def fit(self, algo='nuts', **kwargs):
"""
Fits the wrapped model.
:param algo: The algorithm used for inference, either ``'nuts'``, ``'svi'`` or ``'prior'``.
:type algo: str
:param kwargs: Inference algorithm-specific keyword arguments. See :func:`~brmp.ModelAndData.nuts`,
:func:`~brmp.ModelAndData.svi` and :func:`~brmp.ModelAndData.prior` for details.
:return: A model fit.
:rtype: brmp.fit.Fit
Example::
fit = brm('y ~ x', df).fit()
"""
assert algo in ['prior', 'nuts', 'svi']
return getattr(self, algo)(**kwargs)
[docs] def nuts(self, iter=10, warmup=None, num_chains=1, seed=None, backend=numpyro_backend):
"""
Fit the model using NUTS.
:param iter: The number of (post warm up) samples to take.
:type iter: int
:param warmup: The number of warm up samples to take. Warm up samples are
not included in the final model fit. Defaults to
``iter / 2``.
:type warmup: int
:param num_chains: The number of chains to run.
:type num_chains: int
:param seed: Random seed.
:type seed: int
:param backend: The backend used to perform inference.
:type backend: brmp.backend.Backend
:return: A model fit.
:rtype: brmp.fit.Fit
Example::
fit = brm('y ~ x', df).nuts()
"""
warmup = iter // 2 if warmup is None else warmup
return self.run_algo('nuts', backend, iter, warmup, num_chains, seed)
[docs] def svi(self, iter=10, num_samples=10, seed=None, backend=pyro_backend, **kwargs):
"""
Fit the model using stochastic variational inference.
:param iter: The number of optimisation steps to take.
:type iter: int
:param num_samples: The number of samples to take from the variational
posterior.
:type num_samples: int
:param seed: Random seed.
:type seed: int
:param backend: The backend used to perform inference.
:type backend: brmp.backend.Backend
:return: A model fit.
:rtype: brmp.fit.Fit
Example::
fit = brm('y ~ x', df).svi()
"""
return self.run_algo('svi', backend, iter, num_samples, seed, **kwargs)
[docs] def prior(self, num_samples=10, seed=None, backend=pyro_backend):
"""
Sample from the prior.
:param num_samples: The number of samples to take.
:type num_samples: int
:param seed: Random seed.
:type seed: int
:param backend: The backend used to perform inference.
:type backend: brmp.backend.Backend
:return: A model fit.
:rtype: brmp.fit.Fit
Example::
fit = brm('y ~ x', df).prior()
"""
return self.run_algo('prior', backend, num_samples, seed)
def __repr__(self):
return model_repr(self.model.desc)