from typing import Tuple
import numpy as np
from scipy.stats import norm
from .basecalculator import BaseCalculator
from ...utils import eval_pdf, array2dataset, pll
from ..parameters import POI, POIarray
[docs]def generate_asimov_hist(model, params, nbins=100):
""" Generate the Asimov histogram using a model and dictionary of parameters.
Args:
* **model** : model used to generate the dataset
* **params** (Dict) : values of the parameters of the models
* **nbins** (int, optional) : number of bins
Returns:
(`numpy.array`, `numpy.array`) : hist, bin_edges
Example with `zfit`:
>>> obs = zfit.Space('x', limits=(0.1, 2.0))
>>> mean = zfit.Parameter("mu", 1.2)
>>> sigma = zfit.Parameter("sigma", 0.1)
>>> model = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma)
>>> hist, bin_edges = generate_asimov_hist(model, {"mean": 1.2, "sigma": 0.1})
"""
space = model.space
bounds = space.limit1d
bin_edges = np.linspace(*bounds, nbins + 1)
bin_centers = bin_edges[0:-1] + np.diff(bin_edges) / 2
hist = eval_pdf(model, bin_centers, params, allow_extended=True)
hist *= space.area() / nbins
return hist, bin_edges
[docs]class AsymptoticCalculator(BaseCalculator):
"""
Class for asymptotic calculators. Can be used only with one parameter
of interest.
See G. Cowan, K. Cranmer, E. Gross and O. Vitells: Asymptotic formulae for
likelihood- based tests of new physics. Eur. Phys. J., C71:1–19, 2011
"""
def __init__(self, input, minimizer, asimov_bins=100):
"""Asymptotic calculator class.
Args:
* **input** : loss or fit result
* **minimizer** : minimizer to use to find the minimum of the loss function
* **asimov_bins** (Optional, int) : number of bins of the Asimov dataset
Example with `zfit`:
>>> import zfit
>>> from zfit.core.loss import UnbinnedNLL
>>> from zfit.minimize import Minuit
>>> obs = zfit.Space('x', limits=(0.1, 2.0))
>>> data = zfit.data.Data.from_numpy(obs=obs, array=np.random.normal(1.2, 0.1, 10000))
>>> mean = zfit.Parameter("mu", 1.2)
>>> sigma = zfit.Parameter("sigma", 0.1)
>>> model = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma)
>>> loss = UnbinnedNLL(model=model, data=data)
>>> calc = AsymptoticCalculator(input=loss, minimizer=Minuit(), asimov_bins=100)
"""
super(AsymptoticCalculator, self).__init__(input, minimizer)
self._asimov_bins = asimov_bins
self._asimov_dataset = {}
self._asimov_loss = {}
# cache of nll values computed with the asimov dataset
self._asimov_nll = {}
[docs] @staticmethod
def check_pois(pois):
"""
Checks if the parameters of interest are all `hepstats.parameters.POI/POIarray` instances.
"""
msg = "POI/POIarray is required."
if not isinstance(pois, POIarray):
raise TypeError(msg)
if pois.ndim > 1:
msg = "Tests using the asymptotic calculator can only be used with one parameter of interest."
raise NotImplementedError(msg)
[docs] def asimov_dataset(self, poi) -> Tuple[np.ndarray, np.ndarray]:
"""Gets the Asimov dataset for a given alternative hypothesis.
Args:
* **poi** (`POI`): parameter of interest of the alternative hypothesis
Returns:
Dataset
Example with `zfit`:
>>> poialt = POI(mean, 1.2)
>>> dataset = calc.asimov_dataset(poialt)
"""
if poi not in self._asimov_dataset.keys():
model = self.model
data = self.data
minimizer = self.minimizer
oldverbose = minimizer.verbosity
minimizer.verbosity = 5
poiparam = poi.parameter
poivalue = poi.value
msg = "\nGet fit best values for nuisance parameters for the"
msg += " alternative hypothesis!"
print(msg)
with poiparam.set_value(poivalue):
poiparam.floating = False
minimum = minimizer.minimize(loss=self.loss)
poiparam.floating = True
print(minimum)
minimizer.verbosity = oldverbose
values = minimum.params
values[poiparam] = {"value": poivalue}
asimov_data = []
if not isinstance(self._asimov_bins, list):
asimov_bins = [self._asimov_bins] * len(data)
else:
asimov_bins = self._asimov_bins
assert len(asimov_bins) == len(data)
asimov_hists = [generate_asimov_hist(m, values, bins) for m, bins in zip(model, asimov_bins)]
for i, ad in enumerate(asimov_hists):
weights, bin_edges = ad
bin_centers = bin_edges[0:-1] + np.diff(bin_edges) / 2
asimov_data.append(
array2dataset(type(data[i]), data[i].space, bin_centers, weights)
)
self._asimov_dataset[poi] = asimov_data
return self._asimov_dataset[poi]
[docs] def asimov_loss(self, poi):
"""Constructs a loss function using the Asimov dataset for a given alternative hypothesis.
Args:
* **poi** (`POI`): parameter of interest of the alternative hypothesis
Returns:
Loss function
Example with `zfit`:
>>> poialt = POI(mean, 1.2)
>>> loss = calc.asimov_loss(poialt)
"""
if poi not in self._asimov_loss.keys():
loss = self.lossbuilder(self.model, self.asimov_dataset(poi))
self._asimov_loss[poi] = loss
return self._asimov_loss[poi]
[docs] def asimov_nll(self, pois, poialt) -> np.ndarray:
"""Computes negative log-likelihood values for given parameters of interest using the Asimov dataset
generated with a given alternative hypothesis.
Args:
* **pois** (`POIarray`): parameters of interest
* **poialt** (`POI`): parameter of interest of the alternative hypothesis
Returns:
`numpy.array`: alternative nll values
Example with `zfit`:
>>> mean = zfit.Parameter("mu", 1.2)
>>> poinull = POIarray(mean, [1.1, 1.2, 1.0])
>>> poialt = POI(mean, 1.2)
>>> nll = calc.asimov_nll(poinull, poialt)
"""
self.check_pois(pois)
self.check_pois(poialt)
minimizer = self.minimizer
ret = np.empty(pois.shape)
for i, p in enumerate(pois):
if p not in self._asimov_nll.keys():
loss = self.asimov_loss(poialt)
nll = pll(minimizer, loss, p)
self._asimov_nll[p] = nll
ret[i] = self._asimov_nll[p]
return ret
[docs] def pnull(
self,
qobs,
qalt=None,
onesided=True,
onesideddiscovery=False,
qtilde=False,
nsigma=0,
) -> np.ndarray:
"""Computes the pvalue for the null hypothesis.
Args:
* **qobs** (`numpy.array`): observed values of the test-statistic q
* **qalt** (`numpy.array`): alternative values of the test-statistic q using the asimov dataset
* **onesided** (bool, optional): if `True` (default) computes onesided pvalues
* **onesideddiscovery** (bool, optional): if `True` (default) computes onesided pvalues for a discovery
* **qtilde** (bool, optional): if `True` use the :math:`\widetilde{q}` test statistics else (default)
use the :math:`q` test statistic
* **nsigma** (float, optional): significance shift
Returns:
`np.array` : array of the pvalues for the null hypothesis
"""
sqrtqobs = np.sqrt(qobs)
# 1 - norm.cdf(x) == norm.cdf(-x)
if onesided or onesideddiscovery:
pnull = 1.0 - norm.cdf(sqrtqobs - nsigma)
else:
pnull = (1.0 - norm.cdf(sqrtqobs - nsigma)) * 2.0
if qalt is not None and qtilde:
cond = (qobs > qalt) & (qalt > 0)
sqrtqalt = np.sqrt(qalt)
pnull_2 = 1.0 - norm.cdf((qobs + qalt) / (2.0 * sqrtqalt) - nsigma)
if not (onesided or onesideddiscovery):
pnull_2 += 1.0 - norm.cdf(sqrtqobs - nsigma)
pnull = np.where(cond, pnull_2, pnull)
return pnull
[docs] def qalt(self, poinull, poialt, onesided, onesideddiscovery) -> np.ndarray:
"""Computes alternative hypothesis values of the :math:`\Delta` log-likelihood test statistic using the asimov
dataset.
Args:
* **poinull** (`POIarray`): parameters of interest for the null hypothesis
* **poialt** (`POIarray`): parameters of interest for the alternative hypothesis
* **onesided** (bool, optional): if `True` (default) computes onesided pvalues
* **onesideddiscovery** (bool, optional): if `True` (default) computes onesided pvalues for a
discovery test
Returns:
`numpy.array`: observed values of q
Example with `zfit`:
>>> mean = zfit.Parameter("mu", 1.2)
>>> poinull = POI(mean, [1.1, 1.2, 1.0])
>>> poialt = POI(mean, [1.2])
>>> q = calc.qalt([poinull], [poialt])
"""
nll_poinull_asy = self.asimov_nll(poinull, poialt)
nll_poialt_asy = self.asimov_nll(poialt, poialt)
return self.q(
nll1=nll_poinull_asy,
nll2=nll_poialt_asy,
poi1=poinull,
poi2=poialt,
onesided=onesided,
onesideddiscovery=onesideddiscovery,
)
[docs] def palt(
self, qobs, qalt, onesided=True, onesideddiscovery=False, qtilde=False
) -> np.ndarray:
"""Computes the pvalue for the alternative hypothesis.
Args:
* **qobs** (`np.array`): observed values of the test-statistic q
* **qalt** (`np.array`): alternative values of the test-statistic q using the Asimov dataset
* **onesided** (bool, optional): if `True` (default) computes onesided pvalues
* **onesideddiscovery** (bool, optional): if `True` (default) computes onesided pvalues for a discovery
* **qtilde** (bool, optional): if `True` use the :math:`\widetilde{q}` test statistics else (default)
use the :math:`q` test statistic
Returns:
`numpy.array` : array of the pvalues for the alternative hypothesis
"""
sqrtqobs = np.sqrt(qobs)
sqrtqalt = np.sqrt(qalt)
# 1 - norm.cdf(x) == norm.cdf(-x)
if onesided or onesideddiscovery:
palt = 1.0 - norm.cdf(sqrtqobs - sqrtqalt)
else:
palt = 1.0 - norm.cdf(sqrtqobs + sqrtqalt)
palt += 1.0 - norm.cdf(sqrtqobs - sqrtqalt)
if qtilde:
cond = (qobs > qalt) & (qalt > 0)
palt_2 = 1.0 - norm.cdf((qobs - qalt) / (2.0 * sqrtqalt))
if not (onesided or onesideddiscovery):
palt_2 += 1.0 - norm.cdf(sqrtqobs + sqrtqalt)
palt = np.where(cond, palt_2, palt)
return palt
def _pvalue_(self, poinull, poialt, qtilde, onesided, onesideddiscovery):
qobs = self.qobs(
poinull,
onesided=onesided,
qtilde=qtilde,
onesideddiscovery=onesideddiscovery,
)
needpalt = poialt is not None
if needpalt:
qalt = self.qalt(poinull, poialt, onesided, onesideddiscovery)
palt = self.palt(
qobs=qobs,
qalt=qalt,
onesided=onesided,
qtilde=qtilde,
onesideddiscovery=onesideddiscovery,
)
else:
qalt = None
palt = None
pnull = self.pnull(
qobs=qobs,
qalt=qalt,
onesided=onesided,
qtilde=qtilde,
onesideddiscovery=onesideddiscovery,
)
return pnull, palt
def _expected_pvalue_(
self, poinull, poialt, nsigma, CLs, onesided, onesideddiscovery, qtilde
):
qalt = self.qalt(
poinull, poialt, onesided=onesided, onesideddiscovery=onesideddiscovery
)
qalt = np.where(qalt < 0, 0, qalt)
expected_pvalues = []
for ns in nsigma:
p_clsb = self.pnull(
qobs=qalt,
qalt=None,
onesided=onesided,
qtilde=qtilde,
onesideddiscovery=onesideddiscovery,
nsigma=ns,
)
if CLs:
p_clb = norm.cdf(ns)
p_cls = p_clsb / p_clb
expected_pvalues.append(np.where(p_cls < 0, 0, p_cls))
else:
expected_pvalues.append(np.where(p_clsb < 0, 0, p_clsb))
return expected_pvalues
def _expected_poi_(
self, poinull, poialt, nsigma, alpha, CLs, qtilde, onesided, onesideddiscovery
):
qalt = self.qalt(
poinull, poialt, onesided=onesided, onesideddiscovery=onesideddiscovery
)
qalt = np.where(qalt < 0, 0, qalt)
sigma = np.sqrt((poinull[0].value - poialt[0].value) ** 2 / qalt)
expected_values = []
for ns in nsigma:
if CLs:
exp = sigma * (norm.ppf(1 - alpha * norm.cdf(ns)) + ns)
else:
exp = sigma * (norm.ppf(1 - alpha) + ns)
expected_values.append(poialt[0].value + exp)
return expected_values