Source code for hepstats.hypotests.calculators.basecalculator

#!/usr/bin/python
from typing import Dict, Union, Tuple, List
import numpy as np

from ..hypotests_object import HypotestsObject
from ..parameters import POI, POIarray, asarray
from ...utils import pll, base_sampler, base_sample
from ..toyutils import ToysManager


[docs]class BaseCalculator(HypotestsObject): """Base class for calculator. Args: * **input** : loss or fit result * **minimizer** : minimizer to use to find the minimum of the loss function Example with `zfit`: >>> import zfit >>> from zfit.core.loss import UnbinnedNLL >>> from zfit.minimize import Minuit >>> obs = zfit.Space('x', limits=(0.1, 2.0)) >>> data = zfit.data.Data.from_numpy(obs=obs, array=np.random.normal(1.2, 0.1, 10000)) >>> mean = zfit.Parameter("mu", 1.2) >>> sigma = zfit.Parameter("sigma", 0.1) >>> model = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma) >>> loss = UnbinnedNLL(model=model, data=data) >>> calc = BaseCalculator(input=loss, minimizer=Minuit()) """ def __init__(self, input, minimizer): super(BaseCalculator, self).__init__(input, minimizer) self._obs_nll = {} self._parameters = {} for m in self.model: for d in m.get_params(): self._parameters[d.name] = d
[docs] def obs_nll(self, pois) -> np.ndarray: """ Compute observed negative log-likelihood values for given parameters of interest. Args: * **pois** (List[`hypotests.POI`]): parameters of interest Returns: `numpy.array`: observed nll values Example with `zfit`: >>> mean = zfit.Parameter("mu", 1.2) >>> poi = POI(mean, [1.1, 1.2, 1.0]) >>> nll = calc.obs_nll(poi) """ ret = np.empty(pois.shape) for i, p in enumerate(pois): if p not in self._obs_nll.keys(): nll = pll(minimizer=self.minimizer, loss=self.loss, pois=p) self._obs_nll[p] = nll ret[i] = self._obs_nll[p] return ret
[docs] def qobs( self, poinull: List[POI], onesided=True, onesideddiscovery=False, qtilde=False ): """Computes observed values of the :math:`\Delta` log-likelihood test statistic. Args: * **poinull** (List[`hypotests.POI`]): parameters of interest for the null hypothesis * **qtilde** (bool, optional): if `True` use the :math:`\\tilde{q}` test statistics else (default) use the :math:`q` test statistic * **onesided** (bool, optional): if `True` (default) computes onesided pvalues * **onesideddiscovery** (bool, optional): if `True` (default) computes onesided pvalues for a discovery test Returns: `numpy.array`: observed values of q Example with `zfit`: >>> mean = zfit.Parameter("mu", 1.2) >>> poi = POI(mean, [1.1, 1.2, 1.0]) >>> q = calc.qobs(poi) """ self.check_pois(poinull) param = poinull.parameter bestfit = self.bestfit.params[param]["value"] if qtilde and poinull.ndim == 1: bestfitpoi = POI(param, 0) else: bestfitpoi = POI(param, bestfit) if len(poinull) == 1: self._obs_nll[bestfitpoi] = self.bestfit.fmin nll_poinull_obs = self.obs_nll(poinull) nll_bestfitpoi_obs = self.obs_nll(bestfitpoi) qobs = self.q( nll1=nll_poinull_obs, nll2=nll_bestfitpoi_obs, poi1=poinull, poi2=bestfitpoi, onesided=onesided, onesideddiscovery=onesideddiscovery, ) return qobs
[docs] def pvalue( self, poinull: List[POI], poialt: Union[List[POI], None] = None, qtilde=False, onesided=True, onesideddiscovery=False, ) -> Tuple[np.ndarray, np.ndarray]: """Computes pvalues for the null and alternative hypothesis. Args: * **poinull** (List[`hypotests.POI`]): parameters of interest for the null hypothesis * **poialt** (List[`hypotests.POI`], optional): parameters of interest for the alternative hypothesis * **qtilde** (bool, optional): if `True` use the :math:`\widetilde{q}` test statistics else (default) use the :math:`q` test statistic * **onesided** (bool, optional): if `True` (default) computes onesided pvalues * **onesideddiscovery** (bool, optional): if `True` (default) computes onesided pvalues for a discovery test Returns: Tuple(`numpy.array`, `numpy.array`): pnull, palt Example with `zfit`: >>> mean = zfit.Parameter("mu", 1.2) >>> poinull = POI(mean, [1.1, 1.2, 1.0]) >>> poialt = POI(mean, 1.2) >>> pvalues = calc.pavalue(poinull, poialt) """ self.check_pois(poinull) if poialt: self.check_pois(poialt) self.check_pois_compatibility(poinull, poialt) if qtilde and (poialt.values < 0).any(): poialt = POIarray(parameter=poialt.parameter, values=np.where(poialt.values < 0, 0, poialt.values)) return self._pvalue_( poinull=poinull, poialt=poialt, qtilde=qtilde, onesided=onesided, onesideddiscovery=onesideddiscovery, )
def _pvalue_(self, poinull, poialt, qtilde, onesided, onesideddiscovery): """ To be overwritten in `BaseCalculator` subclasses. """ raise NotImplementedError
[docs] def expected_pvalue( self, poinull: List[POI], poialt: List[POI], nsigma, CLs=False, qtilde=False, onesided=True, onesideddiscovery=False, ) -> Dict[int, np.array]: """Computes the expected pvalues and error bands for different values of :math:`\sigma` (0=expected/median) Args: * **poinull** (List[`hypotests.POI`]): parameters of interest for the null hypothesis * **poialt** (List[`hypotests.POI`], optional): parameters of interest for the alternative hypothesis * **nsigma** (`numpy.array`): array of values of :math:`\sigma` to compute the expected pvalue * **CLs** (bool, optional): if `True` computes pvalues as :math:`p_{cls}=p_{null}/p_{alt}=p_{clsb}/p_{clb}` else as :math:`p_{clsb} = p_{null}` * **qtilde** (bool, optional): if `True` use the :math:`\widetilde{q}` test statistics else (default) use the :math:`q` test statistic * **onesided** (bool, optional): if `True` (default) computes onesided pvalues * **onesideddiscovery** (bool, optional): if `True` (default) computes onesided pvalues for a discovery Returns: `numpy.array`: array of expected pvalues for each :math:`\sigma` value Example with `zfit`: >>> mean = zfit.Parameter("mu", 1.2) >>> poinull = POI(mean, [1.1, 1.2, 1.0]) >>> poialt = POI(mean, 1.2) >>> nll = calc.expected_pvalue(poinull, poialt) """ self.check_pois(poinull) if poialt: self.check_pois(poialt) self.check_pois_compatibility(poinull, poialt) if qtilde and (poialt.values < 0).any(): poialt = POIarray(parameter=poialt.parameter, values=np.where(poialt.values < 0, 0, poialt.values)) return self._expected_pvalue_( poinull=poinull, poialt=poialt, nsigma=nsigma, CLs=CLs, qtilde=qtilde, onesided=onesided, onesideddiscovery=onesideddiscovery, )
def _expected_pvalue_( self, poinull, poialt, nsigma, CLs, qtilde, onesided, onesideddiscovery ): """ To be overwritten in `BaseCalculator` subclasses. """ raise NotImplementedError
[docs] def expected_poi( self, poinull: List[POI], poialt: List[POI], nsigma, alpha=0.05, CLs=False, qtilde=False, onesided=True, onesideddiscovery=False, ): """Computes the expected parameter of interest values such that the expected p_values = :math:`\alpha` for different values of :math:`\sigma` (0=expected/median) Args: * **poinull** (List[`hypotests.POI`]): parameters of interest for the null hypothesis * **poialt** (List[`hypotests.POI`], optional): parameters of interest for the alternative hypothesis * **nsigma** (`numpy.array`): array of values of :math:`\sigma` to compute the expected pvalue * **alpha** (float, default=0.05): significance level * **CLs** (bool, optional): if `True` uses pvalues as :math:`p_{cls}=p_{null}/p_{alt}=p_{clsb}/p_{clb}` else as :math:`p_{clsb} = p_{null}` * **qtilde** (bool, optional): if `True` use the :math:`\widetilde{q}` test statistics else (default) use the :math:`q` test statistic * **onesided** (bool, optional): if `True` (default) computes onesided pvalues * **onesideddiscovery** (bool, optional): if `True` (default) computes onesided pvalues for a discovery Returns: `numpy.array`: array of expected POI values for each :math:`\sigma value Example with `zfit`: >>> mean = zfit.Parameter("mu", 1.2) >>> poinull = POI(mean, [1.1, 1.2, 1.0]) >>> poialt = POI(mean, 1.2) >>> nll = calc.expected_poi(poinull, poialt) """ self.check_pois(poinull) if poialt: self.check_pois(poialt) self.check_pois_compatibility(poinull, poialt) if qtilde and (poialt.values < 0).any(): poialt = POIarray(parameter=poialt.parameter, values=np.where(poialt.values < 0, 0, poialt.values)) return self._expected_poi_( poinull=poinull, poialt=poialt, nsigma=nsigma, alpha=alpha, CLs=CLs, qtilde=qtilde, onesided=onesided, onesideddiscovery=onesideddiscovery, )
def _expected_poi_( self, poinull, poialt, nsigma, alpha, CLs, qtilde, onesided, onesideddiscovery ): """ To be overwritten in `BaseCalculator` subclasses. """ raise NotImplementedError
[docs] @staticmethod def check_pois(pois): """ Checks if the parameters of interest are all `hepstats.parameters.POI/POIarray` instances. """ msg = "POI/POIarray is required." if not isinstance(pois, POIarray): raise TypeError(msg) if pois.ndim > 1: msg = "Tests with more that one parameter of interest are not yet implemented." raise NotImplementedError(msg)
[docs] @staticmethod def check_pois_compatibility(poi1, poi2): """ Checks compatibility between two lists of `hepstats.parameters.POIarray` instances. """ if poi1.ndim != poi2.ndim: msg = f"POIs should have the same dimensions, poi1={poi1.ndim}, poi2={poi2.ndim}" raise ValueError(msg) if poi1.ndim == 1: if poi1.name != poi2.name: msg = "The variables used in the parameters of interest should have the same names," msg += f" poi1={poi1.name}, poi2={poi2.name}" raise ValueError(msg)
[docs] def q( self, nll1: np.array, nll2: np.array, poi1, poi2, onesided=True, onesideddiscovery=False, ) -> np.ndarray: """Compute values of the test statistic q defined as the difference between negative log-likelihood values :math:`q = nll1 - nll2`. Args: * **nll1** (`numpy.array`): array of nll values #1, evaluated with poi1 * **nll2** (`numpy.array`): array of nll values #2, evaluated with poi2 * **poi1** ((List[`hypotests.POI`])): list of POI's #1 * **poi2** ((List[`hypotests.POI`])): list of POI's #2 * **onesided** (bool, optional, default=True) * **onesideddiscovery** (bool, optional, default=True) Returns: `numpy.array`: array of :math:`q` values """ self.check_pois(poi1) self.check_pois(poi2) self.check_pois_compatibility(poi1, poi2) assert len(nll1) == len(poi1) assert len(nll2) == len(poi2) poi1 = poi1.values poi2 = poi2.values q = 2 * (nll1 - nll2) # filter_non_nan = ~(np.isnan(q) | np.isinf(q)) # q = q[filter_non_nan] # # if isinstance(poi2, np.ndarray): # poi2 = poi2[filter_non_nan] zeros = np.zeros(q.shape) if onesideddiscovery: condition = (poi2 < poi1) | (q < 0) q = np.where(condition, zeros, q) elif onesided: condition = (poi2 > poi1) | (q < 0) q = np.where(condition, zeros, q) else: q = q return q
[docs]class BaseToysCalculator(BaseCalculator): def __init__(self, input, minimizer, sampler, sample): """Basis for toys calculator class. Args: * **input** : loss or fit result * **minimizer** : minimizer to use to find the minimum of the loss function * **sampler** : function used to create sampler with models, number of events and floating parameters in the sample Default is `hepstats.fitutils.sampling.base_sampler`. * **sample** : function used to get samples from the sampler. Default is `hepstats.fitutils.sampling.base_sample`. """ super(BaseToysCalculator, self).__init__(input, minimizer)
[docs]class ToysCalculator(BaseToysCalculator, ToysManager): """ Class for calculators using toys. """ def __init__( self, input, minimizer, ntoysnull=100, ntoysalt=100, sampler=base_sampler, sample=base_sample, ): """Toys calculator class. Args: * **input** : loss or fit result * **minimizer** : minimizer to use to find the minimum of the loss function * **ntoysnull** (int, default=100): minimum number of toys to generate for the null hypothesis * **ntoysalt** (int, default=100): minimum number of toys to generate for the alternative hypothesis * **sampler** : function used to create sampler with models, number of events and floating** parameters in the sample Default is `hepstats.fitutils.sampling.base_sampler`. * **sample : function used to get samples from the sampler. Default is `hepstats.fitutils.sampling.base_sample`. """ super(ToysCalculator, self).__init__(input, minimizer, sampler, sample) self._ntoysnull = ntoysnull self._ntoysalt = ntoysalt
[docs] @classmethod def from_yaml( cls, filename, loss, minimizer, ntoysnull=100, ntoysalt=100, sampler=base_sampler, sample=base_sample, ): """ ToysCalculator constructor with the toys loaded from a yaml file. Args: * **filename** (str) * **input** : loss or fit result * **minimizer** : minimizer to use to find the minimum of the loss function * **ntoysnull** (int, default=100): minimum number of toys to generate for the null hypothesis * **ntoysalt** (int, default=100): minimum number of toys to generate for the alternative hypothesis * **sampler** : function used to create sampler with models, number of events and floating parameters in the sample Default is `hepstats.fitutils.sampling.base_sampler`. * **sample** : function used to get samples from the sampler. Default is `hepstats.fitutils.sampling.base_sample`. Returns ToysCalculator """ calculator = cls(loss, minimizer, ntoysnull, ntoysalt, sampler, sample) toysresults = calculator.toysresults_from_yaml(filename) for t in toysresults: calculator.add_toyresult(t) return calculator
@property def ntoysnull(self): """ Returns the number of toys generated for the null hypothesis. """ return self._ntoysnull @property def ntoysalt(self): """ Returns the number of toys generated for the alternative hypothesis. """ return self._ntoysalt def _get_toys(self, poigen, poieval=None, qtilde=False, hypothesis="null"): """ Return the generated toys for a given POI. Args: * **poigen** (POI): POI used to generate the toys * **poieval** (POIarray): POI values to evaluate the loss function * **qtilde** (bool, optional): if `True` use the :math:`\tilde{q}` test statistics else (default) use the :math:`q` test statistic * **hypothesis** : `null` or `alternative` """ assert hypothesis in ["null", "alternative"] if hypothesis == "null": ntoys = self.ntoysnull else: ntoys = self.ntoysalt ret = {} for p in poigen: poieval_p = poieval if poieval_p is None: poieval_p = POIarray(poigen.parameter, [p.value]) else: if p not in poieval_p: poieval_p = poieval_p.append(p.value) if qtilde and 0.0 not in poieval_p.values: poieval_p = poieval_p.append(0.0) poieval_p = asarray(poieval_p) ngenerated = self.ntoys(p, poieval_p) if ngenerated < ntoys: ntogen = ntoys - ngenerated else: ntogen = 0 if ntogen > 0: print(f"Generating {hypothesis} hypothesis toys for {p}.") self.generate_and_fit_toys(ntoys=ntogen, poigen=p, poieval=poieval_p) ret[p] = self.get_toyresult(p, poieval_p) return ret
[docs] def get_toys_null(self, poigen, poieval, qtilde=False): """ Return the generated toys for the null hypothesis. Args: * **poigen** (POI): POI used to generate the toys * **ntoys** (int): number of toys to generate * **poieval** (POIarray): POI values to evaluate the loss function * **qtilde** (bool, optional): if `True` use the :math:`\tilde{q}` test statistics else (default) use the :math:`q` test statistic Example with `zfit`: >>> mean = zfit.Parameter("mu", 1.2) >>> poinull = POIarray(mean, [1.1, 1.2, 1.0]) >>> poialt = POI(mean, 1.2) >>> for p in poinull: ... calc.get_toys_alt(p, poieval=poialt) """ return self._get_toys( poigen=poigen, poieval=poieval, qtilde=qtilde, hypothesis="null" )
[docs] def get_toys_alt(self, poigen, poieval, qtilde=False): """ Return the generated toys for the alternative hypothesis. Args: * **poigen** (POI): POI used to generate the toys * **ntoys** (int): number of toys to generate * **poieval** (POIarray): POI values to evaluate the loss function * **qtilde** (bool, optional): if `True` use the :math:`\tilde{q}` test statistics else (default) use the :math:`q` test statistic Example with `zfit`: >>> mean = zfit.Parameter("mu", 1.2) >>> poinull = POIarray(mean, [1.1, 1.2, 1.0]) >>> poialt = POI(mean, 1.2) >>> calc.get_toys_alt(poialt, poieval=poinull) """ return self._get_toys( poigen=poigen, poieval=poieval, qtilde=qtilde, hypothesis="alternative" )