Module statkit.metrics
Classification metrics not part of sci-kit learn.
Expand source code
"""Classification metrics not part of sci-kit learn."""
from numpy import array, ndarray
from pandas import Series
from sklearn.metrics import roc_curve
def youden_j_threshold(y_true, y_pred) -> float:
"""Classification threshold with highest Youden's J.
Args:
y_true: Ground truth labels.
y_pred: Labels predicted by the classifier.
"""
fpr, tpr, thresholds = roc_curve(y_true, y_pred, pos_label=1)
j_scores = tpr - fpr
j_ordered = sorted(zip(j_scores, thresholds))
return j_ordered[-1][1]
def youden_j(y_true, y_pred) -> float:
r"""Classifier informedness as a balance between true and false postivies.
Youden's J statistic is defined as:
$$
J = r_{\mathrm{tp}} - r_{\mathrm{fp}}.
$$
Args:
y_true: Ground truth labels.
y_pred: Labels predicted by the classifier.
"""
return sensitivity(y_true, y_pred) + specificity(y_true, y_pred) - 1
def true_positive_rate(y_true, y_prob, threshold: float = 0.5) -> float:
r"""The number of true positive out of all positives (recall).
Aliases:
- Sensitivity,
- Recall,
- Hit rate.
$$r_{\mathrm{tp}} = \frac{t_p}{t_p + f_n} = \frac{t_p}{p}$$
Args:
y_true: Ground truth label (binarised).
y_prob: Probability of positive class.
threshold: Classify as positive when probability exceeds threshold.
"""
if not isinstance(y_true, (ndarray, Series)):
y_true = array(y_true)
if not isinstance(y_prob, (ndarray, Series)):
y_prob = array(y_prob)
y_pred = y_prob >= threshold
positives = sum(y_true)
true_positives = sum(y_true.astype(bool) & y_pred)
return true_positives / positives
def false_positive_rate(y_true, y_prob, threshold: float = 0.5) -> float:
r"""The number of false positive out of all negatives.
Also called the fall out rate.
$$r_{\mathrm{fp}} = \frac{f_p}{t_p + f_n} = \frac{f_p}{p}$$
Args:
y_true: Ground truth label (binarised).
y_prob: Probability of positive class.
threshold: Classify as positive when probability exceeds threshold.
"""
if not isinstance(y_true, (ndarray, Series)):
y_true = array(y_true)
if not isinstance(y_prob, (ndarray, Series)):
y_prob = array(y_prob)
y_pred = y_prob > threshold
negatives = y_true.size - sum(y_true)
# Actual negative, but classified as positive.
false_positives = sum((~y_true.astype(bool)) & y_pred)
return false_positives / negatives
def sensitivity(y_true, y_prob, threshold: float = 0.5) -> float:
r"""The number of true positive out of all positives.
Aliases:
- True positive rate,
- Recall,
- Hit rate.
$$r_{\mathrm{tp}} = \frac{t_p}{t_p + f_n} = \frac{t_p}{p}$$
Args:
y_true: Ground truth label (binarised).
y_prob: Probability of positive class.
threshold: Classify as positive when probability exceeds threshold.
"""
return true_positive_rate(y_true, y_prob, threshold)
def specificity(y_true, y_prob, threshold: float = 0.5) -> float:
r"""The number of true negatives out of all negatives.
$$r_{\mathrm{tn}} = \frac{t_n}{t_n + f_p} = \frac{t_n}{n} = 1 - r_{\mathrm{fp}}$$
Aliases:
- True negative rate,
- Selectivity.
Args:
y_true: Ground truth label (binarised).
y_prob: Probability of positive class.
threshold: Classify as positive when probability exceeds threshold.
"""
return 1 - false_positive_rate(y_true, y_prob, threshold)
Functions
def false_positive_rate(y_true, y_prob, threshold: float = 0.5) ‑> float
-
The number of false positive out of all negatives.
Also called the fall out rate. r_{\mathrm{fp}} = \frac{f_p}{t_p + f_n} = \frac{f_p}{p}
Args
y_true
- Ground truth label (binarised).
y_prob
- Probability of positive class.
threshold
- Classify as positive when probability exceeds threshold.
Expand source code
def false_positive_rate(y_true, y_prob, threshold: float = 0.5) -> float: r"""The number of false positive out of all negatives. Also called the fall out rate. $$r_{\mathrm{fp}} = \frac{f_p}{t_p + f_n} = \frac{f_p}{p}$$ Args: y_true: Ground truth label (binarised). y_prob: Probability of positive class. threshold: Classify as positive when probability exceeds threshold. """ if not isinstance(y_true, (ndarray, Series)): y_true = array(y_true) if not isinstance(y_prob, (ndarray, Series)): y_prob = array(y_prob) y_pred = y_prob > threshold negatives = y_true.size - sum(y_true) # Actual negative, but classified as positive. false_positives = sum((~y_true.astype(bool)) & y_pred) return false_positives / negatives
def sensitivity(y_true, y_prob, threshold: float = 0.5) ‑> float
-
The number of true positive out of all positives.
Aliases
- True positive rate,
- Recall,
- Hit rate.
r_{\mathrm{tp}} = \frac{t_p}{t_p + f_n} = \frac{t_p}{p}
Args
y_true
- Ground truth label (binarised).
y_prob
- Probability of positive class.
threshold
- Classify as positive when probability exceeds threshold.
Expand source code
def sensitivity(y_true, y_prob, threshold: float = 0.5) -> float: r"""The number of true positive out of all positives. Aliases: - True positive rate, - Recall, - Hit rate. $$r_{\mathrm{tp}} = \frac{t_p}{t_p + f_n} = \frac{t_p}{p}$$ Args: y_true: Ground truth label (binarised). y_prob: Probability of positive class. threshold: Classify as positive when probability exceeds threshold. """ return true_positive_rate(y_true, y_prob, threshold)
def specificity(y_true, y_prob, threshold: float = 0.5) ‑> float
-
The number of true negatives out of all negatives.
r_{\mathrm{tn}} = \frac{t_n}{t_n + f_p} = \frac{t_n}{n} = 1 - r_{\mathrm{fp}}
Aliases
- True negative rate,
- Selectivity.
Args
y_true
- Ground truth label (binarised).
y_prob
- Probability of positive class.
threshold
- Classify as positive when probability exceeds threshold.
Expand source code
def specificity(y_true, y_prob, threshold: float = 0.5) -> float: r"""The number of true negatives out of all negatives. $$r_{\mathrm{tn}} = \frac{t_n}{t_n + f_p} = \frac{t_n}{n} = 1 - r_{\mathrm{fp}}$$ Aliases: - True negative rate, - Selectivity. Args: y_true: Ground truth label (binarised). y_prob: Probability of positive class. threshold: Classify as positive when probability exceeds threshold. """ return 1 - false_positive_rate(y_true, y_prob, threshold)
def true_positive_rate(y_true, y_prob, threshold: float = 0.5) ‑> float
-
The number of true positive out of all positives (recall).
Aliases
- Sensitivity,
- Recall,
- Hit rate.
r_{\mathrm{tp}} = \frac{t_p}{t_p + f_n} = \frac{t_p}{p}
Args
y_true
- Ground truth label (binarised).
y_prob
- Probability of positive class.
threshold
- Classify as positive when probability exceeds threshold.
Expand source code
def true_positive_rate(y_true, y_prob, threshold: float = 0.5) -> float: r"""The number of true positive out of all positives (recall). Aliases: - Sensitivity, - Recall, - Hit rate. $$r_{\mathrm{tp}} = \frac{t_p}{t_p + f_n} = \frac{t_p}{p}$$ Args: y_true: Ground truth label (binarised). y_prob: Probability of positive class. threshold: Classify as positive when probability exceeds threshold. """ if not isinstance(y_true, (ndarray, Series)): y_true = array(y_true) if not isinstance(y_prob, (ndarray, Series)): y_prob = array(y_prob) y_pred = y_prob >= threshold positives = sum(y_true) true_positives = sum(y_true.astype(bool) & y_pred) return true_positives / positives
def youden_j(y_true, y_pred) ‑> float
-
Classifier informedness as a balance between true and false postivies.
Youden's J statistic is defined as: J = r_{\mathrm{tp}} - r_{\mathrm{fp}}.
Args
y_true
- Ground truth labels.
y_pred
- Labels predicted by the classifier.
Expand source code
def youden_j(y_true, y_pred) -> float: r"""Classifier informedness as a balance between true and false postivies. Youden's J statistic is defined as: $$ J = r_{\mathrm{tp}} - r_{\mathrm{fp}}. $$ Args: y_true: Ground truth labels. y_pred: Labels predicted by the classifier. """ return sensitivity(y_true, y_pred) + specificity(y_true, y_pred) - 1
def youden_j_threshold(y_true, y_pred) ‑> float
-
Classification threshold with highest Youden's J.
Args
y_true
- Ground truth labels.
y_pred
- Labels predicted by the classifier.
Expand source code
def youden_j_threshold(y_true, y_pred) -> float: """Classification threshold with highest Youden's J. Args: y_true: Ground truth labels. y_pred: Labels predicted by the classifier. """ fpr, tpr, thresholds = roc_curve(y_true, y_pred, pos_label=1) j_scores = tpr - fpr j_ordered = sorted(zip(j_scores, thresholds)) return j_ordered[-1][1]