Source code for bouts.boutsmle

"""BoutsMLE class

"""

import logging
import numpy as np
import pandas as pd
from scipy.optimize import minimize
from scipy.special import logit, expit
from statsmodels.distributions.empirical_distribution import ECDF
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
import skdiveMove.bouts.bouts as bouts

logger = logging.getLogger(__name__)
# Add the null handler if importing as library; whatever using this library
# should set up logging.basicConfig() as needed
logger.addHandler(logging.NullHandler())


def mle_fun(x, p, lambdas):
    """Random Poisson processes function

    The current implementation takes two or three random Poisson processes.

    Parameters
    ----------
    x : array_like
        Independent data array described by parameters `p` and `lambdas`.
    p : list
        List with mixing parameters of the model.
    lambdas : array_like
        1-D Array with the density parameters (:math:`lambda`) of the
        model.  Its length must be length(p) + 1.

    Returns
    -------
    out : array_like
        Same shape as `x` with the evaluated function.

    """
    logmsg = "p={0}, lambdas={1}".format(p, lambdas)
    logger.info(logmsg)
    ncoefs = lambdas.size

    # We assume at least two processes
    p0 = p[0]
    lda0 = lambdas[0]
    term0 = p0 * lda0 * np.exp(-lda0 * x)

    if ncoefs == 2:
        lda1 = lambdas[1]
        term1 = (1 - p0) * lda1 * np.exp(-lda1 * x)
        res = term0 + term1
    elif ncoefs == 3:
        p1 = p[1]
        lda1 = lambdas[1]
        term1 = p1 * (1 - p0) * lda1 * np.exp(-lda1 * x)
        lda2 = lambdas[2]
        term2 = (1 - p1) * (1 - p0) * lda2 * np.exp(-lda2 * x)
        res = term0 + term1 + term2
    else:
        msg = "Only mixtures of <= 3 processes are implemented"
        raise KeyError(msg)

    return(np.log(res))


[docs]class BoutsMLE(bouts.Bouts): """Nonlinear least squares bout identification """
[docs] def loglik_fun(self, params, x, transformed=True): """Log likelihood function of parameters given observed data Parameters ---------- params : array_like 1-D array with parameters to fit. Currently must be 3-length, with mixing parameter :math:`p`, density parameter :math:`\lambda_f` and :math:`\lambda_s`, in that order. x : array_like Independent data array described by parameters `p` and `lambdas`. transformed : bool Whether `params` are transformed and need to be un-transformed to calculate the likelihood. Returns ------- out : """ p = params[0] lambdas = params[1:] if transformed: p = expit(p) lambdas = np.exp(lambdas) # Need list `p` for mle_fun ll = -sum(mle_fun(x, [p], lambdas)) logger.info("LL={}".format(ll)) return(ll)
[docs] def fit(self, start, fit1_opts=None, fit2_opts=None): """Maximum likelihood estimation of log frequencies Parameters ---------- start : pandas.DataFrame DataFrame with starting values for coefficients of each process in columns. These can come from the "broken stick" method as in :meth:`Bouts.init_pars`, and will be transformed to minimize the first log likelihood function. fit1_opts, fit2_opts : dict Dictionaries with keywords to be pass to :func:`scipy.optimize.minimize`, for the first and second fits. Returns ------- fit1, fit2 : scipy.optimize.OptimizeResult Objects with the optimization result from the first and second fit, having a `x` attribute with coefficients of the solution. Notes ----- Current implementation handles mixtures of two Poisson processes. """ # Calculate `p` p0, lambda0 = bouts.calc_p(start) # transform parameters for first fit lambda0 = np.log(lambda0) x0 = np.array([*logit(p0), *lambda0]) if fit1_opts: fit1 = minimize(self.loglik_fun, x0=x0, args=(self.x,), **fit1_opts) else: fit1 = minimize(self.loglik_fun, x0=x0, args=(self.x,)) coef0 = fit1.x start2 = [expit(coef0[0]), *np.exp(coef0[1:])] if fit2_opts: fit2 = minimize(self.loglik_fun, x0=start2, args=(self.x, False), **fit2_opts) else: fit2 = minimize(self.loglik_fun, x0=start2, args=(self.x, False)) logger.info("N iter fit 1: {0}, fit 2: {1}" .format(fit1.nit, fit2.nit)) return(fit1, fit2)
[docs] def bec(self, fit): """Calculate bout ending criteria from model coefficients Parameters ---------- fit : scipy.optimize.OptimizeResult Object with the optimization result, having a `x` attribute with coefficients of the solution. Returns ------- out : ndarray Notes ----- Current implementation is for a two-process mixture, hence an array of a single float is returned. """ coefs = fit.x p_hat = coefs[0] lambda1_hat = coefs[1] lambda2_hat = coefs[2] bec = (np.log((p_hat * lambda1_hat) / ((1 - p_hat) * lambda2_hat)) / (lambda1_hat - lambda2_hat)) return(np.array(bec))
[docs] def plot_fit(self, fit, ax=None): """Plot log frequency histogram and fitted model Parameters ---------- fit : scipy.optimize.OptimizeResult Object with the optimization result, having a `x` attribute with coefficients of the solution. ax : matplotlib.Axes instance An Axes instance to use as target. Returns ------- ax : `matplotlib.Axes` """ # Method is redefined from Bouts x = self.x coefs = fit.x p_hat = coefs[0] lambdas_hat = coefs[1:] xmin = x.min() xmax = x.max() # BEC becx = self.bec(fit) becy = mle_fun(becx, [p_hat], lambdas_hat) x_pred = np.linspace(xmin, xmax, num=101) # matches R's curve # Need to transpose to unpack columns rather than rows y_pred = mle_fun(x_pred, [p_hat], lambdas_hat) if ax is None: ax = plt.gca() # Data rug plot ax.plot(x, np.ones_like(x) * y_pred.max(), "|", color="k", label="observed") # Plot predicted ax.plot(x_pred, y_pred, label="model") # Plot BEC ylim = ax.get_ylim() ax.vlines(becx, ylim[0], becy, linestyle="--") # Annotations ax.annotate("bec = {0:.3f}".format(becx), (becx, ylim[0]), xytext=(5, 0), textcoords="offset points") ax.legend(loc=8, bbox_to_anchor=(0.5, 1), frameon=False, borderaxespad=0.1, ncol=2) ax.set_xlabel("x") ax.set_ylabel("log frequency") return(ax)
[docs] def plot_ecdf(self, fit, ax=None): """Plot observed and modelled empirical cumulative frequencies Parameters ---------- fit : scipy.optimize.OptimizeResult Object with the optimization result, having a `x` attribute with coefficients of the solution. ax : matplotlib.Axes instance An Axes instance to use as target. Returns ------- ax : `matplotlib.Axes` """ x = self.x coefs = fit.x xx = np.log1p(x) x_ecdf = ECDF(xx) x_pred = np.linspace(0, xx.max(), num=101) y_pred = x_ecdf(x_pred) if ax is None: ax = plt.gca() # Plot ECDF of data ax.step(np.expm1(x_pred), y_pred, label="observed") ax.set_xscale("log") ax.xaxis.set_major_formatter(ScalarFormatter()) ax.set_xlim(np.exp(xx).min(), np.exp(xx).max()) # Plot estimated CDF p = [coefs[0]] # list to bouts.ecdf() lambdas = pd.Series([coefs[1], coefs[2]], name="lambda") y_mod = bouts.ecdf(np.expm1(x_pred), p, lambdas) ax.plot(np.expm1(x_pred), y_mod, label="model") # Add a little offset to ylim for visibility yoffset = (0.05, 1.05) ax.set_ylim(*yoffset) # add some spacing # Plot BEC becx = self.bec(fit) becy = bouts.ecdf(becx, p, lambdas) ax.vlines(becx, 0, becy, linestyle="--") # Annotations ax.legend(loc="upper left") ax.annotate("bec = {0:.3f}".format(becx), (becx, yoffset[0]), xytext=(5, 5), textcoords="offset points") ax.set_xlabel("x") ax.set_ylabel("ECDF [x]") return(ax)
if __name__ == '__main__': # Set up info level logging logging.basicConfig(level=logging.INFO) from skdiveMove.tests import diveMove2skd tdrX = diveMove2skd() pars = {"offset_zoc": 3, "dry_thr": 70, "wet_thr": 3610, "dive_thr": 3, "dive_model": "unimodal", "smooth_par": 0.1, "knot_factor": 20, "descent_crit_q": 0.01, "ascent_crit_q": 0} tdrX.calibrate(zoc_method="offset", offset=pars["offset_zoc"], dry_thr=pars["dry_thr"], wet_thr=pars["dry_thr"], dive_thr=pars["dive_thr"], dive_model=pars["dive_model"], smooth_par=pars["smooth_par"], knot_factor=pars["knot_factor"], descent_crit_q=pars["descent_crit_q"], ascent_crit_q=pars["ascent_crit_q"]) stats = tdrX.dive_stats() stamps = tdrX.stamp_dives(ignore_z=True) stats_tab = pd.concat((stamps, stats), axis=1) # 2=4 here postdives = stats_tab["postdive_dur"][stats_tab["phase_id"] == 4] postdives_diff = postdives.dt.total_seconds().diff()[1:].abs() # Remove isolated dives postdives_diff = postdives_diff[postdives_diff < 2000] # Set up instance bouts_postdive = BoutsMLE(postdives_diff, 0.1) # Get init parameters from broken stick model bout_init_pars = bouts_postdive.init_pars([50], plot=False) # Knowing p_bnd = (-2, None) lda1_bnd = (-5, None) lda2_bnd = (-10, None) bd1 = (p_bnd, lda1_bnd, lda2_bnd) p_bnd = (1e-8, None) lda1_bnd = (1e-8, None) lda2_bnd = (1e-8, None) bd2 = (p_bnd, lda1_bnd, lda2_bnd) fit1, fit2 = bouts_postdive.fit(bout_init_pars, fit1_opts=dict(method="L-BFGS-B", bounds=bd1), fit2_opts=dict(method="L-BFGS-B", bounds=bd2)) # BEC becx = bouts_postdive.bec(fit2) ax = bouts_postdive.plot_fit(fit2) bouts_postdive.plot_ecdf(fit2)