Source code for tdrphases

"""Class handling all TDR phase operations

Phase identification methods take `depth` as input.

Class and Methods Summary
-------------------------

.. autosummary::

   TDRPhases.detect_wet
   TDRPhases.detect_dives
   TDRPhases.detect_dive_phases
   TDRPhases.get_dives_details
   TDRPhases.get_dive_deriv
   TDRPhases.get_wet_activity
   TDRPhases.get_params
   TDRPhases.time_budget
   TDRPhases.stamp_dives

"""

import logging
import numpy as np
import pandas as pd
from skdiveMove.core import robjs, cv, pandas2ri
from skdiveMove.helpers import (get_var_sampling_interval, _cut_dive,
                                rle_key)

logger = logging.getLogger(__name__)
# Add the null handler if importing as library; whatever using this library
# should set up logging.basicConfig() as needed
logger.addHandler(logging.NullHandler())


[docs]class TDRPhases: """Core TDR phase identification routines Attributes ---------- wet_dry : pandas.DataFrame dives : dict Dictionary of dive activity data {'row_ids': pandas.DataFrame, 'model': str, 'splines': dict, 'spline_derivs': pandas.DataFrame, 'crit_vals': pandas.DataFrame}. params : dict Dictionary with parameters used for detection of wet/dry and dive phases. {'wet_dry': {'dry_thr': float, 'wet_thr': float}, 'dives': {'dive_thr': float, 'dive_model': str, 'smooth_par': float, 'knot_factor': int, 'descent_crit_q': float, 'ascent_crit_q': float}} """ def __init__(self): self.wet_dry = None self.dives = dict(row_ids=None, model=None, splines=None, spline_derivs=None, crit_vals=None) self.params = dict(wet_dry={}, dives={})
[docs] def detect_wet(self, depth, dry_thr=70, wet_cond=None, wet_thr=3610, interp_wet=False): """Detect wet/dry activity phases Set the ``wet_dry`` attribute. Parameters ---------- depth : xarray.DataArray DataArray with zero-offset corrected depth measurements. dry_thr : float, optional wet_cond : bool mask, optional wet_thr : float, optional interp_wet : bool, optional Notes ----- See details for arguments in diveMove's ``calibrateDepth``. Unlike `diveMove`, the beginning/ending times for each phase are not stored with the class instance, as this information can be retrieved via the `.time_budget` method. """ depth_py = depth.to_series() time_py = depth_py.index dtime = get_var_sampling_interval(depth).total_seconds() if wet_cond: wet_cond = (pd.Series(wet_cond, index=time_py) .astype("bool")) else: wet_cond = ~depth_py.isna() rstr = """detPhaseFun <- diveMove:::.detPhase""" detPhaseFun = robjs.r(rstr) with cv.localconverter(robjs.default_converter + pandas2ri.converter): phases_l = detPhaseFun(pd.Series(time_py), pd.Series(depth_py), dry_thr=dry_thr, wet_thr=wet_thr, wet_cond=wet_cond, interval=dtime) phases = pd.DataFrame({'phase_id': phases_l[0], 'phase_label': phases_l[1]}, index=time_py) if interp_wet: zdepth = depth_py.copy() iswet = phases["phase_label"] == "W" iswetna = iswet & zdepth.isna() if any(iswetna): depth_intp = depth_py[iswet].interpolate(method="cubic") zdepth[iswetna] = np.maximum(np.zeros_like(depth_intp), depth_intp) self.zoc_pars["depth_zoc"] = zdepth phases.loc[:, "phase_id"] = phases.loc[:, "phase_id"].astype(int) self.wet_dry = phases wet_dry_params = dict(dry_thr=dry_thr, wet_thr=wet_thr, interp_wet=interp_wet) self.params["wet_dry"].update(wet_dry_params)
[docs] def detect_dives(self, depth, dive_thr): """Identify dive events Set the ``dives`` attribute's "row_ids" dictionary element, and update the ``wet_act`` attribute's "phases" dictionary element. Parameters ---------- depth : xarray.DataArray DataArray with zero-offset corrected depth measurements. dive_thr : float Notes ----- See details for arguments in diveMove's ``calibrateDepth``. """ depth_py = depth.to_series() act_phases = self.wet_dry["phase_label"] detDiveFun = robjs.r("""detDiveFun <- diveMove:::.detDive""") with cv.localconverter(robjs.default_converter + pandas2ri.converter): phases_df = detDiveFun(pd.Series(depth_py), pd.Series(act_phases), dive_thr=dive_thr) # Replace dots with underscore phases_df.columns = (phases_df.columns.str .replace(".", "_", regex=False)) phases_df.set_index(depth_py.index, inplace=True) dive_activity = phases_df.pop("dive_activity") # Dive and post-dive ID should be integer phases_df = phases_df.astype(int) self.dives["row_ids"] = phases_df self.wet_dry["phase_label"] = dive_activity self.params["dives"].update({'dive_thr': dive_thr})
[docs] def detect_dive_phases(self, depth, dive_model, smooth_par=0.1, knot_factor=3, descent_crit_q=0, ascent_crit_q=0): """Detect dive phases Complete filling the ``dives`` attribute. Parameters ---------- depth : xarray.DataArray DataArray with zero-offset corrected depth measurements. dive_model : {"unimodal", "smooth.spline"} smooth_par : float, optional knot_factor : int, optional descent_crit_q : float, optional ascent_crit_q : float, optional Notes ----- See details for arguments in diveMove's ``calibrateDepth``. """ depth_py = depth.to_series() phases_df = self.get_dives_details("row_ids") dive_ids = self.get_dives_details("row_ids", columns="dive_id") ok = (dive_ids > 0) & ~depth_py.isna() if any(ok): ddepths = depth_py[ok] # diving depths dtimes = ddepths.index dids = dive_ids[ok] idx = np.squeeze(np.argwhere(ok.to_numpy())) time_num = (dtimes - dtimes[0]).total_seconds().to_numpy() divedf = pd.DataFrame({'dive_id': dids.to_numpy(), 'idx': idx, 'depth': ddepths.to_numpy(), 'time_num': time_num}, index=ddepths.index) grouped = divedf.groupby("dive_id") xx = pd.Categorical(np.repeat(["X"], phases_df.shape[0]), categories=["D", "DB", "B", "BA", "DA", "A", "X"]) self.dives["row_ids"]["dive_phase"] = xx dive_phases = self.dives["row_ids"]["dive_phase"] cval_list = [] spl_der_list = [] spl_list = [] for name, grp in grouped: res = _cut_dive(grp, dive_model=dive_model, smooth_par=smooth_par, knot_factor=knot_factor, descent_crit_q=descent_crit_q, ascent_crit_q=ascent_crit_q) dive_phases.loc[grp.index] = (res.pop("label_matrix")[:, 1]) # Splines spl = res.pop("dive_spline") # Convert directly into a dict, with each element turned # into a list of R objects. Access each via # `_get_dive_spline_slot` spl_dict = dict(zip(spl.names, list(spl))) spl_list.append(spl_dict) # Spline derivatives spl_der = res.pop("spline_deriv") spl_der_idx = pd.TimedeltaIndex(spl_der[:, 0], unit="s") spl_der = pd.DataFrame({'y': spl_der[:, 1]}, index=spl_der_idx) spl_der_list.append(spl_der) # Critical values (all that's left in res) cvals = pd.DataFrame(res, index=[name]) cvals.index.rename("dive_id", inplace=True) # Adjust critical indices for Python convention and ensure # integers cvals.iloc[:, :2] = cvals.iloc[:, :2].astype(int) - 1 cval_list.append(cvals) self.dives["model"] = dive_model # Splines self.dives["splines"] = dict(zip(grouped.groups.keys(), spl_list)) self.dives["spline_derivs"] = pd.concat(spl_der_list, keys=(grouped .groups.keys())) self.dives["crit_vals"] = pd.concat(cval_list) else: logger.warning("No dives found") (self.params["dives"] .update(dict(dive_model=dive_model, smooth_par=smooth_par, knot_factor=knot_factor, descent_crit_q=descent_crit_q, ascent_crit_q=ascent_crit_q)))
[docs] def get_dives_details(self, key, columns=None): """Accessor for the ``dives`` attribute Parameters ---------- key : {"row_ids", "model", "splines", "spline_derivs", crit_vals} Name of the key to retrieve. columns : array_like, optional Names of the columns of the dataframe in `key`, when applicable. """ try: okey = self.dives[key] except KeyError: msg = ("\'{}\' is not found.\nAvailable keys: {}" .format(key, self.dives.keys())) logger.error(msg) raise KeyError(msg) else: if okey is None: raise IndexError("\'{}\' not available.".format(key)) if columns: try: odata = okey[columns] except KeyError: msg = ("At least one of the requested columns does not " "exist.\nAvailable columns: {}").format(okey.columns) logger.error(msg) raise KeyError(msg) else: odata = okey return(odata)
[docs] def get_wet_activity(self): """Accessor for the ``wet_dry`` attribute """ return(self.wet_dry)
[docs] def get_params(self, key): """Return parameters used for identifying wet/dry or diving phases. Parameters ---------- key: {'wet_dry', 'dives'} """ if key == "wet_dry": return(self.params["wet_dry"]) elif key == "dives": return(self.params["dives"])
def _get_dive_spline_slot(self, diveNo, name): """Accessor for the R objects in `dives`["splines"] Private method to retrieve elements easily. Elements can be accessed individually as is, but some elements are handled specially. Parameters ---------- diveNo : int or float Which dive number to retrieve spline details for. name : str Element to retrieve. {"data", "xy", "knots", "coefficients", "order", "lambda.opt", "sigmasq", "degree", "g", "a", "b", "variter"} """ # Safe to assume these are all scalars, based on the current # default settings in diveMove's `.cutDive` scalars = ["order", "lambda.opt", "sigmasq", "degree", "g", "a", "b", "variter"] idata = self.get_dives_details("splines")[diveNo] if name == "data": x = pd.TimedeltaIndex(np.array(idata[name][0]), unit="s") odata = pd.Series(np.array(idata[name][1]), index=x) elif name == "xy": x = pd.TimedeltaIndex(np.array(idata["x"]), unit="s") odata = pd.Series(np.array(idata["y"]), index=x) elif name in scalars: odata = np.float(idata[name][0]) else: odata = np.array(idata[name]) return(odata)
[docs] def get_dive_deriv(self, diveNo, phase): """Retrieve depth spline derivative for a given dive Parameters ---------- diveNo : int Dive number to retrieve derivative for. phase : {"descent", "bottom", "ascent"} If provided, the dive phase to retrieve data for. Returns ------- out : pandas.Series """ der = self.get_dives_details("spline_derivs").loc[diveNo] crit_vals = self.get_dives_details("crit_vals").loc[diveNo] spl_data = self.get_dives_details("splines")[diveNo]["data"] spl_times = np.array(spl_data[0]) # x row is time steps in (s) if phase == "descent": descent_crit = int(crit_vals["descent_crit"]) deltat_crit = pd.Timedelta(spl_times[descent_crit], unit="s") oder = der.loc[:deltat_crit] elif phase == "bottom": descent_crit = int(crit_vals["descent_crit"]) deltat1 = pd.Timedelta(spl_times[descent_crit], unit="s") ascent_crit = int(crit_vals["ascent_crit"]) deltat2 = pd.Timedelta(spl_times[ascent_crit], unit="s") oder = der[(der.index >= deltat1) & (der.index <= deltat2)] elif phase == "ascent": ascent_crit = int(crit_vals["ascent_crit"]) deltat_crit = pd.Timedelta(spl_times[ascent_crit], unit="s") oder = der.loc[deltat_crit:] else: msg = "`phase` must be 'descent', 'bottom' or 'ascent'" logger.error(msg) raise KeyError(msg) return(oder)
def _get_dive_deriv_stats(self, diveNo): """Calculate stats for the depth derivative of a given dive """ desc = self.get_dive_deriv(diveNo, "descent") bott = self.get_dive_deriv(diveNo, "bottom") asc = self.get_dive_deriv(diveNo, "ascent") # Rename DataFrame to match diveNo desc_sts = (pd.DataFrame(desc.describe().iloc[1:]).transpose() .add_prefix("descD_").rename({"y": diveNo})) bott_sts = (pd.DataFrame(bott.describe().iloc[1:]).transpose() .add_prefix("bottD_").rename({"y": diveNo})) asc_sts = (pd.DataFrame(asc.describe().iloc[1:]).transpose() .add_prefix("ascD_").rename({"y": diveNo})) sts = pd.merge(desc_sts, bott_sts, left_index=True, right_index=True) sts = pd.merge(sts, asc_sts, left_index=True, right_index=True) return(sts)
[docs] def time_budget(self, ignore_z=True, ignore_du=True): """Summary of wet/dry activities at the broadest time scale Parameters ---------- ignore_z : bool, optional Whether to ignore trivial aquatic periods. ignore_du : bool, optional Whether to ignore diving and underwater periods. Returns ------- out : pandas.DataFrame DataFrame indexed by phase id, with categorical activity label for each phase, and beginning and ending times. """ labels = (self.get_wet_activity()["phase_label"] .reset_index()) if ignore_z: labels = labels.mask(labels == "Z", "L") if ignore_du: labels = labels.mask((labels == "U") | (labels == "D"), "W") grp_key = rle_key(labels["phase_label"]).rename("phase_id") labels_grp = labels.groupby(grp_key) begs = labels_grp.first().rename(columns={"date_time": "beg"}) ends = labels_grp.last()["date_time"].rename("end") return(pd.concat((begs, ends), axis=1))
[docs] def stamp_dives(self, ignore_z=True): """Identify the wet activity phase corresponding to each dive Parameters ---------- ignore_z : bool, optional Whether to ignore trivial aquatic periods. Returns ------- out : pandas.DataFrame DataFrame indexed by dive ID, and three columns identifying which phase thy are in, and the beginning and ending time stamps. """ phase_lab = self.get_wet_activity()["phase_label"] # "U" and "D" considered as "W" here phase_lab = phase_lab.mask(phase_lab.isin(["U", "D"]), "W") if ignore_z: phase_lab = phase_lab.mask(phase_lab == "Z", "L") dive_ids = self.get_dives_details("row_ids", columns="dive_id") grp_key = (phase_lab .ne(phase_lab.shift()) .cumsum() + 1).rename("phase_id") isdive = dive_ids > 0 merged = (pd.concat((grp_key, dive_ids, phase_lab), axis=1) .loc[isdive, :].reset_index()) # Rest index to use in first() and last() merged_grp = merged.groupby("phase_id") dives_ll = [] for name, group in merged_grp: dives_uniq = pd.Series(group["dive_id"].unique(), name="dive_id") beg = [group["date_time"].iloc[0]] * dives_uniq.size end = [group["date_time"].iloc[-1]] * dives_uniq.size dive_df = pd.DataFrame({'phase_id': [name] * dives_uniq.size, 'beg': beg, 'end': end}, index=dives_uniq) dives_ll.append(dive_df) dives_all = pd.concat(dives_ll) return(dives_all)