SolasAI Disparity Calculations#

import plotly.io as pio
pio.renderers.default = "svg"
import solas_disparity as sd
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn import metrics
from sklearn.model_selection import train_test_split

Building a Model#

df = pd.read_csv("hmda.csv.gz", index_col="id")
df.sample(random_state=161803, n=5)
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[4], line 1
----> 1 df = pd.read_csv("hmda.csv.gz", index_col="id")
      2 df.sample(random_state=161803, n=5)

File ~/code/solas-ai/.venv/lib/python3.8/site-packages/pandas/util/_decorators.py:211, in deprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper(*args, **kwargs)
    209     else:
    210         kwargs[new_arg_name] = new_arg_value
--> 211 return func(*args, **kwargs)

File ~/code/solas-ai/.venv/lib/python3.8/site-packages/pandas/util/_decorators.py:331, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
    325 if len(args) > num_allow_args:
    326     warnings.warn(
    327         msg.format(arguments=_format_argument_list(allow_args)),
    328         FutureWarning,
    329         stacklevel=find_stack_level(),
    330     )
--> 331 return func(*args, **kwargs)

File ~/code/solas-ai/.venv/lib/python3.8/site-packages/pandas/io/parsers/readers.py:950, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
    935 kwds_defaults = _refine_defaults_read(
    936     dialect,
    937     delimiter,
   (...)
    946     defaults={"delimiter": ","},
    947 )
    948 kwds.update(kwds_defaults)
--> 950 return _read(filepath_or_buffer, kwds)

File ~/code/solas-ai/.venv/lib/python3.8/site-packages/pandas/io/parsers/readers.py:605, in _read(filepath_or_buffer, kwds)
    602 _validate_names(kwds.get("names", None))
    604 # Create the parser.
--> 605 parser = TextFileReader(filepath_or_buffer, **kwds)
    607 if chunksize or iterator:
    608     return parser

File ~/code/solas-ai/.venv/lib/python3.8/site-packages/pandas/io/parsers/readers.py:1442, in TextFileReader.__init__(self, f, engine, **kwds)
   1439     self.options["has_index_names"] = kwds["has_index_names"]
   1441 self.handles: IOHandles | None = None
-> 1442 self._engine = self._make_engine(f, self.engine)

File ~/code/solas-ai/.venv/lib/python3.8/site-packages/pandas/io/parsers/readers.py:1735, in TextFileReader._make_engine(self, f, engine)
   1733     if "b" not in mode:
   1734         mode += "b"
-> 1735 self.handles = get_handle(
   1736     f,
   1737     mode,
   1738     encoding=self.options.get("encoding", None),
   1739     compression=self.options.get("compression", None),
   1740     memory_map=self.options.get("memory_map", False),
   1741     is_text=is_text,
   1742     errors=self.options.get("encoding_errors", "strict"),
   1743     storage_options=self.options.get("storage_options", None),
   1744 )
   1745 assert self.handles is not None
   1746 f = self.handles.handle

File ~/code/solas-ai/.venv/lib/python3.8/site-packages/pandas/io/common.py:750, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
    746 if compression == "gzip":
    747     if isinstance(handle, str):
    748         # error: Incompatible types in assignment (expression has type
    749         # "GzipFile", variable has type "Union[str, BaseBuffer]")
--> 750         handle = gzip.GzipFile(  # type: ignore[assignment]
    751             filename=handle,
    752             mode=ioargs.mode,
    753             **compression_args,
    754         )
    755     else:
    756         handle = gzip.GzipFile(
    757             # No overload variant of "GzipFile" matches argument types
    758             # "Union[str, BaseBuffer]", "str", "Dict[str, Any]"
   (...)
    761             **compression_args,
    762         )

File /usr/lib/python3.8/gzip.py:173, in GzipFile.__init__(self, filename, mode, compresslevel, fileobj, mtime)
    171     mode += 'b'
    172 if fileobj is None:
--> 173     fileobj = self.myfileobj = builtins.open(filename, mode or 'rb')
    174 if filename is None:
    175     filename = getattr(fileobj, 'name', '')

FileNotFoundError: [Errno 2] No such file or directory: 'hmda.csv.gz'
features = [
    "Loan Amount",
    "Loan-to-Value Ratio",
    "Intro Rate Period",
    "Property Value",
    "Income",
    "Debt-to-Income Ratio",
    "Term 360",
    "Conforming",
]
label = "Interest Rate"

X_train, X_test, y_train, y_test = train_test_split(df[features], df[label], test_size=0.25, random_state=161803)
test_index = y_test.index
xgb_regressor = xgb.XGBRegressor(
    max_depth=4,
    learning_rate=1.0,
    n_estimators=4,
    base_score=y_train.mean(),
    random_state=161803,
)
xgb_regressor.fit(X=X_train, y=y_train)

predictions = pd.Series(data=xgb_regressor.predict(X_test), index=X_test.index)
f"Model R^2: {metrics.r2_score(y_true=y_test, y_pred=predictions):0.3f}"
'Model R^2: 0.178'

Store arguments in a dictionary for reusability in multiple calls to disparity functions.

reused_arguments = dict(
    group_data=df.loc[test_index],
    protected_groups=["Black", "Asian", "Native American", "Hispanic", "Female"],
    reference_groups=["White", "White", "White", "Non-Hispanic", "Male"],
    group_categories=["Race", "Race", "Race", "Ethnicity", "Sex"],
    sample_weight=None,
)

Adverse Impact Ratio (AIR)#

air = sd.adverse_impact_ratio(
    outcome=predictions <= predictions.quantile(0.5),
    air_threshold=0.8,
    percent_difference_threshold=0.0,
    **reused_arguments,
)
air

Disparity Calculation: Adverse Impact Ratio

┌───────────────────────────────────┬─────────────────────────────────────────────────────────────────────────────┐
│ Protected Groups                  │ Black, Asian, Native American, Hispanic, Female                             │
│ Reference Groups                  │ White, White, White, Non-Hispanic, Male                                     │
│ Group Categories                  │ Race, Race, Race, Ethnicity, Sex                                            │
│ AIR Threshold                     │ 0.8                                                                         │
│ % Diff Threshold                  │ 0.0                                                                         │
│ Affected Groups                   │ Hispanic                                                                    │
│ Affected Reference                │ Non-Hispanic                                                                │
│ Affected Categories               │ Ethnicity                                                                   │
└───────────────────────────────────┴─────────────────────────────────────────────────────────────────────────────┘

* Percent Missing: Ethnicity: 13.68%, Race: 13.56%, Sex: 46.88%

Adverse Impact Ratio Summary Table

Group Category Group Reference Group Observations Percent Missing Total Favorable Percent Favorable Percent Difference Favorable AIR P-Values Practically Significant
Race Black White 4,322 13.56% 340.0 141.0 41.47% 9.70% 0.810 0.001 No
Race Asian White 4,322 13.56% 327.0 243.0 74.31% -23.14% 1.452 0.000 No
Race Native American White 4,322 13.56% 20.0 9.0 45.00% 6.17% 0.879 0.657 No
Race White 4,322 13.56% 3,623.0 1,854.0 51.17%
Ethnicity Hispanic Non-Hispanic 4,316 13.68% 508.0 167.0 32.87% 21.54% 0.604 0.000 Yes
Ethnicity Non-Hispanic 4,316 13.68% 3,808.0 2,072.0 54.41%
Sex Female Male 2,656 46.88% 1,034.0 414.0 40.04% 9.78% 0.804 0.000 No
Sex Male 2,656 46.88% 1,622.0 808.0 49.82%
../../../../_images/7a99ba8d0250ec1a195aed32675c664fa05c0f41d28f4c9a9bf8ec13ac315e1b.svg
sd.ui.show(air.summary_table)
Group Category Group Reference Group Observations Percent Missing Total Favorable Percent Favorable Percent Difference Favorable AIR P-Values Practically Significant
Race Black White 4,322 13.56% 340.0 141.0 41.47% 9.70% 0.810 0.001 No
Race Asian White 4,322 13.56% 327.0 243.0 74.31% -23.14% 1.452 0.000 No
Race Native American White 4,322 13.56% 20.0 9.0 45.00% 6.17% 0.879 0.657 No
Race White 4,322 13.56% 3,623.0 1,854.0 51.17%
Ethnicity Hispanic Non-Hispanic 4,316 13.68% 508.0 167.0 32.87% 21.54% 0.604 0.000 Yes
Ethnicity Non-Hispanic 4,316 13.68% 3,808.0 2,072.0 54.41%
Sex Female Male 2,656 46.88% 1,034.0 414.0 40.04% 9.78% 0.804 0.000 No
Sex Male 2,656 46.88% 1,622.0 808.0 49.82%

Standardized Mean Difference (SMD)#

smd = sd.standardized_mean_difference(
    outcome=predictions,
    label=y_test,
    smd_threshold=30,
    lower_score_favorable=True,
    **reused_arguments,
)
smd

Disparity Calculation: SMD

┌───────────────────────────────────┬─────────────────────────────────────────────────────────────────────────────┐
│ Protected Groups                  │ Black, Asian, Native American, Hispanic, Female                             │
│ Reference Groups                  │ White, White, White, Non-Hispanic, Male                                     │
│ Group Categories                  │ Race, Race, Race, Ethnicity, Sex                                            │
│ SMD Threshold                     │ 30.0                                                                        │
│ Affected Groups                   │ Hispanic                                                                    │
│ Affected Reference                │ Non-Hispanic                                                                │
│ Affected Categories               │ Ethnicity                                                                   │
└───────────────────────────────────┴─────────────────────────────────────────────────────────────────────────────┘

* Percent Missing: Ethnicity: 13.68%, Race: 13.56%, Sex: 46.88%

SMD Summary Table

Group Category Group Reference Group Observations Percent Missing Total Average Label Average Outcome Std. Dev. of Outcomes SMD P-Values Practically Significant
Race Black White 4,322 13.56% 340.0 0.05 0.05 0.00 27.559 0.000 No
Race Asian White 4,322 13.56% 327.0 0.04 0.05 0.00 -70.355 0.000 No
Race Native American White 4,322 13.56% 20.0 0.05 0.05 0.00 27.617 0.199 No
Race White 4,322 13.56% 3,623.0 0.05 0.05 0.00
Ethnicity Hispanic Non-Hispanic 4,316 13.68% 508.0 0.05 0.05 0.00 43.545 0.000 Yes
Ethnicity Non-Hispanic 4,316 13.68% 3,808.0 0.05 0.05 0.00
Sex Female Male 2,656 46.88% 1,034.0 0.05 0.05 0.00 16.655 0.000 No
Sex Male 2,656 46.88% 1,622.0 0.05 0.05 0.00
../../../../_images/51711e558c354db6e5671e8c33d3af77ad0297f78f10c923c218e1e90dcd0c19.svg
sd.ui.show(smd.summary_table)
Group Category Group Reference Group Observations Percent Missing Total Average Label Average Outcome Std. Dev. of Outcomes SMD P-Values Practically Significant
Race Black White 4,322 13.56% 340.0 0.05 0.05 0.00 27.559 0.000 No
Race Asian White 4,322 13.56% 327.0 0.04 0.05 0.00 -70.355 0.000 No
Race Native American White 4,322 13.56% 20.0 0.05 0.05 0.00 27.617 0.199 No
Race White 4,322 13.56% 3,623.0 0.05 0.05 0.00
Ethnicity Hispanic Non-Hispanic 4,316 13.68% 508.0 0.05 0.05 0.00 43.545 0.000 Yes
Ethnicity Non-Hispanic 4,316 13.68% 3,808.0 0.05 0.05 0.00
Sex Female Male 2,656 46.88% 1,034.0 0.05 0.05 0.00 16.655 0.000 No
Sex Male 2,656 46.88% 1,622.0 0.05 0.05 0.00

Adverse Impact Ratio by Quantile#

airq = sd.adverse_impact_ratio_by_quantile(
    outcome=predictions,
    air_threshold=0.8,
    percent_difference_threshold=0.0,
    quantiles=[decile / 10 for decile in range(1, 11)],
    lower_score_favorable=True,
    **reused_arguments,
)
airq

Disparity Calculation: AIR by Quantile

┌───────────────────────────────────┬─────────────────────────────────────────────────────────────────────────────┐
│ Protected Groups                  │ Black, Asian, Native American, Hispanic, Female                             │
│ Reference Groups                  │ White, White, White, Non-Hispanic, Male                                     │
│ Group Categories                  │ Race, Race, Race, Ethnicity, Sex                                            │
│ AIR Threshold                     │ 0.8                                                                         │
│ % Diff Threshold                  │ 0.0                                                                         │
│ Affected Groups                   │ Black, Hispanic, Female                                                     │
│ Affected Reference                │ White, Non-Hispanic, Male                                                   │
│ Affected Categories               │ Race, Ethnicity, Sex                                                        │
└───────────────────────────────────┴─────────────────────────────────────────────────────────────────────────────┘

AIR by Quantile Summary Table

Group Category Group Reference Group Quantile Quantile Cutoff Observations Percent Missing Total Favorable Percent Favorable Percent Difference Favorable AIR P-Values Practically Significant
0 Race Black White 0.1 0.044761 4322 0.1356 340.0 13.0 0.038235 0.049261 0.436992 9.345011e-04 Yes
1 Race Asian White 0.1 0.044761 4322 0.1356 327.0 91.0 0.278287 -0.190791 3.180554 5.301298e-21 No
2 Race Native American White 0.1 0.044761 4322 0.1356 20.0 1.0 0.050000 0.037497 0.571451 1.000000e+00 No
3 Race White 0.1 0.044761 4322 0.1356 3623.0 317.0 0.087497 NaN NaN NaN
4 Ethnicity Hispanic Non-Hispanic 0.1 0.044761 4316 0.1368 508.0 15.0 0.029528 0.077878 0.274917 5.469793e-10 Yes
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
67 Race White 1.0 0.058530 4322 0.1356 3623.0 3623.0 1.000000 NaN NaN NaN
68 Ethnicity Hispanic Non-Hispanic 1.0 0.058530 4316 0.1368 508.0 508.0 1.000000 0.000000 1.000000 1.000000e+00 No
69 Ethnicity Non-Hispanic 1.0 0.058530 4316 0.1368 3808.0 3808.0 1.000000 NaN NaN NaN
70 Sex Female Male 1.0 0.058530 2656 0.4688 1034.0 1034.0 1.000000 0.000000 1.000000 1.000000e+00 No
71 Sex Male 1.0 0.058530 2656 0.4688 1622.0 1622.0 1.000000 NaN NaN NaN

72 rows × 14 columns

../../../../_images/4db883368a7e7f8167111160076f6b72fc596bafb2152c619d71d79a57a94eb6.svg
sd.ui.show(airq.summary_table)
Group Category Group Reference Group Quantile Quantile Cutoff Observations Percent Missing Total Favorable Percent Favorable Percent Difference Favorable AIR P-Values Practically Significant
Race Black White 10.0% 0.044761 4,322 13.56% 340.0 13.0 3.82% 4.93% 0.437 0.001 Yes
Race Asian White 10.0% 0.044761 4,322 13.56% 327.0 91.0 27.83% -19.08% 3.181 0.000 No
Race Native American White 10.0% 0.044761 4,322 13.56% 20.0 1.0 5.00% 3.75% 0.571 1.000 No
Race White 10.0% 0.044761 4,322 13.56% 3,623.0 317.0 8.75%
Ethnicity Hispanic Non-Hispanic 10.0% 0.044761 4,316 13.68% 508.0 15.0 2.95% 7.79% 0.275 0.000 Yes
Ethnicity Non-Hispanic 10.0% 0.044761 4,316 13.68% 3,808.0 409.0 10.74%
Sex Female Male 10.0% 0.044761 2,656 46.88% 1,034.0 66.0 6.38% 3.05% 0.677 0.006 Yes
Sex Male 10.0% 0.044761 2,656 46.88% 1,622.0 153.0 9.43%
Race Black White 20.0% 0.045863 4,322 13.56% 340.0 37.0 10.88% 9.85% 0.525 0.000 Yes
Race Asian White 20.0% 0.045863 4,322 13.56% 327.0 132.0 40.37% -19.64% 1.947 0.000 No
Race Native American White 20.0% 0.045863 4,322 13.56% 20.0 2.0 10.00% 10.73% 0.482 0.403 No
Race White 20.0% 0.045863 4,322 13.56% 3,623.0 751.0 20.73%
Ethnicity Hispanic Non-Hispanic 20.0% 0.045863 4,316 13.68% 508.0 42.0 8.27% 14.95% 0.356 0.000 Yes
Ethnicity Non-Hispanic 20.0% 0.045863 4,316 13.68% 3,808.0 884.0 23.21%
Sex Female Male 20.0% 0.045863 2,656 46.88% 1,034.0 155.0 14.99% 4.92% 0.753 0.002 Yes
Sex Male 20.0% 0.045863 2,656 46.88% 1,622.0 323.0 19.91%
Race Black White 30.0% 0.046427 4,322 13.56% 340.0 62.0 18.24% 11.30% 0.617 0.000 Yes
Race Asian White 30.0% 0.046427 4,322 13.56% 327.0 175.0 53.52% -23.98% 1.812 0.000 No
Race Native American White 30.0% 0.046427 4,322 13.56% 20.0 4.0 20.00% 9.53% 0.677 0.464 No
Race White 30.0% 0.046427 4,322 13.56% 3,623.0 1,070.0 29.53%
Ethnicity Hispanic Non-Hispanic 30.0% 0.046427 4,316 13.68% 508.0 69.0 13.58% 19.14% 0.415 0.000 Yes
Ethnicity Non-Hispanic 30.0% 0.046427 4,316 13.68% 3,808.0 1,246.0 32.72%
Sex Female Male 30.0% 0.046427 2,656 46.88% 1,034.0 225.0 21.76% 5.74% 0.791 0.001 Yes
Sex Male 30.0% 0.046427 2,656 46.88% 1,622.0 446.0 27.50%
Race Black White 40.0% 0.046703 4,322 13.56% 340.0 103.0 30.29% 16.38% 0.649 0.000 Yes
Race Asian White 40.0% 0.046703 4,322 13.56% 327.0 238.0 72.78% -26.11% 1.559 0.000 No
Race Native American White 40.0% 0.046703 4,322 13.56% 20.0 8.0 40.00% 6.67% 0.857 0.656 No
Race White 40.0% 0.046703 4,322 13.56% 3,623.0 1,691.0 46.67%
Ethnicity Hispanic Non-Hispanic 40.0% 0.046703 4,316 13.68% 508.0 139.0 27.36% 22.30% 0.551 0.000 Yes
Ethnicity Non-Hispanic 40.0% 0.046703 4,316 13.68% 3,808.0 1,891.0 49.66%
Sex Female Male 40.0% 0.046703 2,656 46.88% 1,034.0 380.0 36.75% 7.27% 0.835 0.000 No
Sex Male 40.0% 0.046703 2,656 46.88% 1,622.0 714.0 44.02%
Race Black White 50.0% 0.047009 4,322 13.56% 340.0 141.0 41.47% 9.70% 0.810 0.001 No
Race Asian White 50.0% 0.047009 4,322 13.56% 327.0 243.0 74.31% -23.14% 1.452 0.000 No
Race Native American White 50.0% 0.047009 4,322 13.56% 20.0 9.0 45.00% 6.17% 0.879 0.657 No
Race White 50.0% 0.047009 4,322 13.56% 3,623.0 1,854.0 51.17%
Ethnicity Hispanic Non-Hispanic 50.0% 0.047009 4,316 13.68% 508.0 167.0 32.87% 21.54% 0.604 0.000 Yes
Ethnicity Non-Hispanic 50.0% 0.047009 4,316 13.68% 3,808.0 2,072.0 54.41%
Sex Female Male 50.0% 0.047009 2,656 46.88% 1,034.0 414.0 40.04% 9.78% 0.804 0.000 No
Sex Male 50.0% 0.047009 2,656 46.88% 1,622.0 808.0 49.82%
Race Black White 60.0% 0.047266 4,322 13.56% 340.0 161.0 47.35% 13.62% 0.777 0.000 Yes
Race Asian White 60.0% 0.047266 4,322 13.56% 327.0 260.0 79.51% -18.54% 1.304 0.000 No
Race Native American White 60.0% 0.047266 4,322 13.56% 20.0 11.0 55.00% 5.97% 0.902 0.648 No
Race White 60.0% 0.047266 4,322 13.56% 3,623.0 2,209.0 60.97%
Ethnicity Hispanic Non-Hispanic 60.0% 0.047266 4,316 13.68% 508.0 214.0 42.13% 21.53% 0.662 0.000 Yes
Ethnicity Non-Hispanic 60.0% 0.047266 4,316 13.68% 3,808.0 2,424.0 63.66%
Sex Female Male 60.0% 0.047266 2,656 46.88% 1,034.0 520.0 50.29% 7.60% 0.869 0.000 No
Sex Male 60.0% 0.047266 2,656 46.88% 1,622.0 939.0 57.89%
Race Black White 80.0% 0.048018 4,322 13.56% 340.0 248.0 72.94% 7.96% 0.902 0.001 No
Race Asian White 80.0% 0.048018 4,322 13.56% 327.0 308.0 94.19% -13.29% 1.164 0.000 No
Race Native American White 80.0% 0.048018 4,322 13.56% 20.0 14.0 70.00% 10.90% 0.865 0.250 No
Race White 80.0% 0.048018 4,322 13.56% 3,623.0 2,931.0 80.90%
Ethnicity Hispanic Non-Hispanic 80.0% 0.048018 4,316 13.68% 508.0 364.0 71.65% 10.83% 0.869 0.000 No
Ethnicity Non-Hispanic 80.0% 0.048018 4,316 13.68% 3,808.0 3,141.0 82.48%
Sex Female Male 80.0% 0.048018 2,656 46.88% 1,034.0 765.0 73.98% 4.44% 0.943 0.010 No
Sex Male 80.0% 0.048018 2,656 46.88% 1,622.0 1,272.0 78.42%
Race Black White 90.0% 0.048694 4,322 13.56% 340.0 288.0 84.71% 5.41% 0.940 0.003 No
Race Asian White 90.0% 0.048694 4,322 13.56% 327.0 321.0 98.17% -8.05% 1.089 0.000 No
Race Native American White 90.0% 0.048694 4,322 13.56% 20.0 17.0 85.00% 5.12% 0.943 0.441 No
Race White 90.0% 0.048694 4,322 13.56% 3,623.0 3,265.0 90.12%
Ethnicity Hispanic Non-Hispanic 90.0% 0.048694 4,316 13.68% 508.0 428.0 84.25% 6.85% 0.925 0.000 No
Ethnicity Non-Hispanic 90.0% 0.048694 4,316 13.68% 3,808.0 3,469.0 91.10%
Sex Female Male 90.0% 0.048694 2,656 46.88% 1,034.0 887.0 85.78% 4.04% 0.955 0.002 No
Sex Male 90.0% 0.048694 2,656 46.88% 1,622.0 1,457.0 89.83%
Race Black White 100.0% 0.058530 4,322 13.56% 340.0 340.0 100.00% 0.00% 1.000 1.000 No
Race Asian White 100.0% 0.058530 4,322 13.56% 327.0 327.0 100.00% 0.00% 1.000 1.000 No
Race Native American White 100.0% 0.058530 4,322 13.56% 20.0 20.0 100.00% 0.00% 1.000 1.000 No
Race White 100.0% 0.058530 4,322 13.56% 3,623.0 3,623.0 100.00%
Ethnicity Hispanic Non-Hispanic 100.0% 0.058530 4,316 13.68% 508.0 508.0 100.00% 0.00% 1.000 1.000 No
Ethnicity Non-Hispanic 100.0% 0.058530 4,316 13.68% 3,808.0 3,808.0 100.00%
Sex Female Male 100.0% 0.058530 2,656 46.88% 1,034.0 1,034.0 100.00% 0.00% 1.000 1.000 No
Sex Male 100.0% 0.058530 2,656 46.88% 1,622.0 1,622.0 100.00%

Odds Ratio#

odds_ratio = sd.odds_ratio(
    outcome=predictions <= predictions.quantile(0.5),
    odds_ratio_threshold=0.68,
    percent_difference_threshold=0.0,
    **reused_arguments,
)
odds_ratio

Disparity Calculation: Odds Ratio

┌───────────────────────────────────┬─────────────────────────────────────────────────────────────────────────────┐
│ Protected Groups                  │ Black, Asian, Native American, Hispanic, Female                             │
│ Reference Groups                  │ White, White, White, Non-Hispanic, Male                                     │
│ Group Categories                  │ Race, Race, Race, Ethnicity, Sex                                            │
│ Odds Ratio Threshold              │ 0.68                                                                        │
│ % Diff Threshold                  │ 0.0                                                                         │
│ Affected Groups                   │ Black, Hispanic, Female                                                     │
│ Affected Reference                │ White, Non-Hispanic, Male                                                   │
│ Affected Categories               │ Race, Ethnicity, Sex                                                        │
└───────────────────────────────────┴─────────────────────────────────────────────────────────────────────────────┘

* Percent Missing: Ethnicity: 13.68%, Race: 13.56%, Sex: 46.88%

Odds Ratio Summary Table

Group Category Group Reference Group Observations Percent Missing Total Favorable Percent Favorable Odds Percent Difference Favorable Odds Ratio P-Values Practically Significant
Race Black White 4,322 13.56% 340.0 141.0 41.47% 0.708543 9.70% 0.676058 0.001 Yes
Race Asian White 4,322 13.56% 327.0 243.0 74.31% 2.892857 -23.14% 2.760229 0.000 No
Race Native American White 4,322 13.56% 20.0 9.0 45.00% 0.818182 6.17% 0.780671 0.657 No
Race White 4,322 13.56% 3,623.0 1,854.0 51.17% 1.048050
Ethnicity Hispanic Non-Hispanic 4,316 13.68% 508.0 167.0 32.87% 0.489736 21.54% 0.410319 0.000 Yes
Ethnicity Non-Hispanic 4,316 13.68% 3,808.0 2,072.0 54.41% 1.193548
Sex Female Male 2,656 46.88% 1,034.0 414.0 40.04% 0.667742 9.78% 0.672700 0.000 Yes
Sex Male 2,656 46.88% 1,622.0 808.0 49.82% 0.992629
../../../../_images/8ec64ca45f855dfb4d4d1ef61c441e6e9ead5e4317ca66b4ef9267e7fb986721.svg
sd.ui.show(odds_ratio.summary_table)
Group Category Group Reference Group Observations Percent Missing Total Favorable Percent Favorable Odds Percent Difference Favorable Odds Ratio P-Values Practically Significant
Race Black White 4,322 13.56% 340.0 141.0 41.47% 0.708543 9.70% 0.676058 0.001 Yes
Race Asian White 4,322 13.56% 327.0 243.0 74.31% 2.892857 -23.14% 2.760229 0.000 No
Race Native American White 4,322 13.56% 20.0 9.0 45.00% 0.818182 6.17% 0.780671 0.657 No
Race White 4,322 13.56% 3,623.0 1,854.0 51.17% 1.048050
Ethnicity Hispanic Non-Hispanic 4,316 13.68% 508.0 167.0 32.87% 0.489736 21.54% 0.410319 0.000 Yes
Ethnicity Non-Hispanic 4,316 13.68% 3,808.0 2,072.0 54.41% 1.193548
Sex Female Male 2,656 46.88% 1,034.0 414.0 40.04% 0.667742 9.78% 0.672700 0.000 Yes
Sex Male 2,656 46.88% 1,622.0 808.0 49.82% 0.992629

Categorical Adverse Impact Ratio#

Generate an example categorical outcome.

categorical_outcome = pd.qcut(predictions, q=[0.0, 0.25, 0.5, 0.75, 1.0])
categories = categorical_outcome.cat.categories.to_series()
categories = pd.Series(["Best", "Great", "Good", "Bad"], index=categories.index)
categorical_outcome.replace(categories.to_dict(), inplace=True)
cair = sd.categorical_adverse_impact_ratio(
    outcome=categorical_outcome,
    ordinal_categories=list(reversed(categories.tolist())),
    air_threshold=0.8,
    percent_difference_threshold=0.0,
    **reused_arguments,
)
cair

Disparity Calculation: Categorical AIR

┌───────────────────────────────────┬─────────────────────────────────────────────────────────────────────────────┐
│ Protected Groups                  │ Black, Asian, Native American, Hispanic, Female                             │
│ Reference Groups                  │ White, White, White, Non-Hispanic, Male                                     │
│ Group Categories                  │ Race, Race, Race, Ethnicity, Sex                                            │
│ Categories                        │ ['Bad', 'Good', 'Great', 'Best']                                            │
│ Affected Groups                   │ Asian                                                                       │
│ Affected Reference                │ White                                                                       │
│ Affected Categories               │ Race                                                                        │
└───────────────────────────────────┴─────────────────────────────────────────────────────────────────────────────┘

Categorical AIR Summary Table

Group Category Group Reference Group Category Category Ordinality Observations Percent Missing Total Favorable Percent Favorable Percent Difference Favorable AIR P-Values Practically Significant
0 Race Black White Bad 0 4322 0.1356 340.0 92.0 0.270588 -0.079586 1.416678 6.208101e-04 No
1 Race Asian White Bad 0 4322 0.1356 327.0 19.0 0.058104 0.132898 0.304206 3.108517e-11 Yes
2 Race Native American White Bad 0 4322 0.1356 20.0 6.0 0.300000 -0.108998 1.570665 2.496030e-01 No
3 Race White Bad 0 4322 0.1356 3623.0 692.0 0.191002 NaN NaN NaN
4 Ethnicity Hispanic Non-Hispanic Bad 0 4316 0.1368 508.0 144.0 0.283465 -0.108307 1.618340 2.138407e-08 No
5 Ethnicity Non-Hispanic Bad 0 4316 0.1368 3808.0 667.0 0.175158 NaN NaN NaN
6 Sex Female Male Bad 0 2656 0.4688 1034.0 269.0 0.260155 -0.044372 1.205631 9.589997e-03 No
7 Sex Male Bad 0 2656 0.4688 1622.0 350.0 0.215783 NaN NaN NaN
8 Race Black White Good 1 4322 0.1356 340.0 199.0 0.585294 -0.097025 1.198711 7.667616e-04 No
9 Race Asian White Good 1 4322 0.1356 327.0 84.0 0.256881 0.231389 0.526105 3.356985e-16 Yes
10 Race Native American White Good 1 4322 0.1356 20.0 11.0 0.550000 -0.061731 1.126427 6.570540e-01 No
11 Race White Good 1 4322 0.1356 3623.0 1769.0 0.488269 NaN NaN NaN
12 Ethnicity Hispanic Non-Hispanic Good 1 4316 0.1368 508.0 341.0 0.671260 -0.215377 1.472441 1.099404e-19 No
13 Ethnicity Non-Hispanic Good 1 4316 0.1368 3808.0 1736.0 0.455882 NaN NaN NaN
14 Sex Female Male Good 1 2656 0.4688 1034.0 620.0 0.599613 -0.097764 1.194807 1.012637e-06 No
15 Sex Male Good 1 2656 0.4688 1622.0 814.0 0.501850 NaN NaN NaN
16 Race Black White Great 2 4322 0.1356 340.0 296.0 0.870588 -0.108789 1.142805 1.974075e-06 No
17 Race Asian White Great 2 4322 0.1356 327.0 182.0 0.556575 0.205225 0.730605 1.011976e-14 Yes
18 Race Native American White Great 2 4322 0.1356 20.0 18.0 0.900000 -0.138200 1.181413 1.913231e-01 No
19 Race White Great 2 4322 0.1356 3623.0 2760.0 0.761800 NaN NaN NaN
20 Ethnicity Hispanic Non-Hispanic Great 2 4316 0.1368 508.0 461.0 0.907480 -0.172186 1.234173 3.273870e-20 No
21 Ethnicity Non-Hispanic Great 2 4316 0.1368 3808.0 2800.0 0.735294 NaN NaN NaN
22 Sex Female Male Great 2 2656 0.4688 1034.0 859.0 0.830754 -0.052086 1.066891 1.299613e-03 No
23 Sex Male Great 2 2656 0.4688 1622.0 1263.0 0.778668 NaN NaN NaN
24 Race Black White Best 3 4322 0.1356 340.0 340.0 1.000000 0.000000 1.000000 1.000000e+00 No
25 Race Asian White Best 3 4322 0.1356 327.0 327.0 1.000000 0.000000 1.000000 1.000000e+00 No
26 Race Native American White Best 3 4322 0.1356 20.0 20.0 1.000000 0.000000 1.000000 1.000000e+00 No
27 Race White Best 3 4322 0.1356 3623.0 3623.0 1.000000 NaN NaN NaN
28 Ethnicity Hispanic Non-Hispanic Best 3 4316 0.1368 508.0 508.0 1.000000 0.000000 1.000000 1.000000e+00 No
29 Ethnicity Non-Hispanic Best 3 4316 0.1368 3808.0 3808.0 1.000000 NaN NaN NaN
30 Sex Female Male Best 3 2656 0.4688 1034.0 1034.0 1.000000 0.000000 1.000000 1.000000e+00 No
31 Sex Male Best 3 2656 0.4688 1622.0 1622.0 1.000000 NaN NaN NaN
../../../../_images/ecf7ca8377ce168a7d90cc39c65a73dc58ac845c25dcef8c1d0dd5d2b36fbeb4.svg
sd.ui.show(cair.summary_table)
Group Category Group Reference Group Category Category Ordinality Observations Percent Missing Total Favorable Percent Favorable Percent Difference Favorable AIR P-Values Practically Significant
Race Black White Bad 0 4,322 13.56% 340.0 92.0 27.06% -7.96% 1.417 0.001 No
Race Asian White Bad 0 4,322 13.56% 327.0 19.0 5.81% 13.29% 0.304 0.000 Yes
Race Native American White Bad 0 4,322 13.56% 20.0 6.0 30.00% -10.90% 1.571 0.250 No
Race White Bad 0 4,322 13.56% 3,623.0 692.0 19.10%
Ethnicity Hispanic Non-Hispanic Bad 0 4,316 13.68% 508.0 144.0 28.35% -10.83% 1.618 0.000 No
Ethnicity Non-Hispanic Bad 0 4,316 13.68% 3,808.0 667.0 17.52%
Sex Female Male Bad 0 2,656 46.88% 1,034.0 269.0 26.02% -4.44% 1.206 0.010 No
Sex Male Bad 0 2,656 46.88% 1,622.0 350.0 21.58%
Race Black White Good 1 4,322 13.56% 340.0 199.0 58.53% -9.70% 1.199 0.001 No
Race Asian White Good 1 4,322 13.56% 327.0 84.0 25.69% 23.14% 0.526 0.000 Yes
Race Native American White Good 1 4,322 13.56% 20.0 11.0 55.00% -6.17% 1.126 0.657 No
Race White Good 1 4,322 13.56% 3,623.0 1,769.0 48.83%
Ethnicity Hispanic Non-Hispanic Good 1 4,316 13.68% 508.0 341.0 67.13% -21.54% 1.472 0.000 No
Ethnicity Non-Hispanic Good 1 4,316 13.68% 3,808.0 1,736.0 45.59%
Sex Female Male Good 1 2,656 46.88% 1,034.0 620.0 59.96% -9.78% 1.195 0.000 No
Sex Male Good 1 2,656 46.88% 1,622.0 814.0 50.18%
Race Black White Great 2 4,322 13.56% 340.0 296.0 87.06% -10.88% 1.143 0.000 No
Race Asian White Great 2 4,322 13.56% 327.0 182.0 55.66% 20.52% 0.731 0.000 Yes
Race Native American White Great 2 4,322 13.56% 20.0 18.0 90.00% -13.82% 1.181 0.191 No
Race White Great 2 4,322 13.56% 3,623.0 2,760.0 76.18%
Ethnicity Hispanic Non-Hispanic Great 2 4,316 13.68% 508.0 461.0 90.75% -17.22% 1.234 0.000 No
Ethnicity Non-Hispanic Great 2 4,316 13.68% 3,808.0 2,800.0 73.53%
Sex Female Male Great 2 2,656 46.88% 1,034.0 859.0 83.08% -5.21% 1.067 0.001 No
Sex Male Great 2 2,656 46.88% 1,622.0 1,263.0 77.87%
Race Black White Best 3 4,322 13.56% 340.0 340.0 100.00% 0.00% 1.000 1.000 No
Race Asian White Best 3 4,322 13.56% 327.0 327.0 100.00% 0.00% 1.000 1.000 No
Race Native American White Best 3 4,322 13.56% 20.0 20.0 100.00% 0.00% 1.000 1.000 No
Race White Best 3 4,322 13.56% 3,623.0 3,623.0 100.00%
Ethnicity Hispanic Non-Hispanic Best 3 4,316 13.68% 508.0 508.0 100.00% 0.00% 1.000 1.000 No
Ethnicity Non-Hispanic Best 3 4,316 13.68% 3,808.0 3,808.0 100.00%
Sex Female Male Best 3 2,656 46.88% 1,034.0 1,034.0 100.00% 0.00% 1.000 1.000 No
Sex Male Best 3 2,656 46.88% 1,622.0 1,622.0 100.00%

Residual Standardized Mean Difference#

rsmd = sd.residual_standardized_mean_difference(
    prediction=predictions,
    label=y_test,
    residual_smd_threshold=30,
    lower_score_favorable=True,
    **reused_arguments,
)
rsmd

Disparity Calculation: Residual SMD

┌───────────────────────────────────┬─────────────────────────────────────────────────────────────────────────────┐
│ Protected Groups                  │ Black, Asian, Native American, Hispanic, Female                             │
│ Reference Groups                  │ White, White, White, Non-Hispanic, Male                                     │
│ Group Categories                  │ Race, Race, Race, Ethnicity, Sex                                            │
│ Affected Groups                   │                                                                             │
│ Affected Reference                │                                                                             │
│ Affected Categories               │                                                                             │
└───────────────────────────────────┴─────────────────────────────────────────────────────────────────────────────┘

* Percent Missing: Ethnicity: 13.68%, Race: 13.56%, Sex: 46.88%

Residual SMD Summary Table

Group Category Group Reference Group Observations Percent Missing Total Average Prediction Average Label Average Residual Std. Dev. of Residuals Residual SMD P-Values Practically Significant
Race Black White 4,322 13.56% 340.0 0.047486 0.05 0.000472 0.004864 5.445416 0.337 No
Race Asian White 4,322 13.56% 327.0 0.045165 0.04 -0.000769 0.004864 -20.069041 0.000 No
Race Native American White 4,322 13.56% 20.0 0.047487 0.05 0.000628 0.004864 8.646494 0.699 No
Race White 4,322 13.56% 3,623.0 0.046833 0.05 0.000207 0.004864
Ethnicity Hispanic Non-Hispanic 4,316 13.68% 508.0 0.047667 0.05 0.001097 0.004864 21.513889 0.000 No
Ethnicity Non-Hispanic 4,316 13.68% 3,808.0 0.046634 0.05 0.000051 0.004864
Sex Female Male 2,656 46.88% 1,034.0 0.047268 0.05 0.000435 0.004864 5.047723 0.229 No
Sex Male 2,656 46.88% 1,622.0 0.046873 0.05 0.000189 0.004864
../../../../_images/436d78f6535804825b0b8ee108d369c03eccc874ff284ef12d641ce9a3f06b56.svg
sd.ui.show(rsmd.summary_table)
Group Category Group Reference Group Observations Percent Missing Total Average Prediction Average Label Average Residual Std. Dev. of Residuals Residual SMD P-Values Practically Significant
Race Black White 4,322 13.56% 340.0 0.047486 0.05 0.000472 0.004864 5.445416 0.337 No
Race Asian White 4,322 13.56% 327.0 0.045165 0.04 -0.000769 0.004864 -20.069041 0.000 No
Race Native American White 4,322 13.56% 20.0 0.047487 0.05 0.000628 0.004864 8.646494 0.699 No
Race White 4,322 13.56% 3,623.0 0.046833 0.05 0.000207 0.004864
Ethnicity Hispanic Non-Hispanic 4,316 13.68% 508.0 0.047667 0.05 0.001097 0.004864 21.513889 0.000 No
Ethnicity Non-Hispanic 4,316 13.68% 3,808.0 0.046634 0.05 0.000051 0.004864
Sex Female Male 2,656 46.88% 1,034.0 0.047268 0.05 0.000435 0.004864 5.047723 0.229 No
Sex Male 2,656 46.88% 1,622.0 0.046873 0.05 0.000189 0.004864

Segmented Adverse Impact Ratio#

Generate example income segments.

segments = pd.qcut(df.loc[test_index, "Income"], q=[0.0, 1 / 3, 2 / 3, 1.0])
categories = segments.cat.categories.to_series()
categories = pd.Series(["Low Income", "Mid Income", "High Income"], index=categories.index)
segments.replace(categories.to_dict(), inplace=True)
sair = sd.segmented_adverse_impact_ratio(
    outcome=predictions <= predictions.quantile(0.5),
    air_threshold=0.8,
    percent_difference_threshold=0.0,
    fdr_threshold=0.2,
    segment=segments,
    **reused_arguments,
)
sair.summary_table
Group Category Group Reference Group Segment Observations Percent Missing Total Favorable Percent Favorable AIR P-Values Benjamani-Hochberg Critical Value Practically Significant
0 Race Black White Low Income 1510 0.117991 151.0 33.0 0.218543 0.839389 2.798899e-01 NaN No
1 Race Asian White Low Income 1510 0.117991 69.0 22.0 0.318841 1.224616 3.254284e-01 NaN No
2 Race Native American White Low Income 1510 0.117991 9.0 3.0 0.333333 1.280280 7.038812e-01 NaN No
3 Race White Low Income 1510 0.117991 1279.0 333.0 0.260360 NaN NaN NaN
4 Ethnicity Hispanic Non-Hispanic Low Income 1527 0.108061 242.0 40.0 0.165289 0.603400 2.927028e-04 NaN Yes
5 Ethnicity Non-Hispanic Low Income 1527 0.108061 1285.0 352.0 0.273930 NaN NaN NaN
6 Sex Female Male Low Income 1263 0.262266 528.0 107.0 0.202652 0.763840 1.214281e-02 NaN No
7 Sex Male Low Income 1263 0.262266 735.0 195.0 0.265306 NaN NaN NaN
8 Race Black White Mid Income 1414 0.131450 117.0 56.0 0.478632 0.905009 3.322955e-01 NaN No
9 Race Asian White Mid Income 1414 0.131450 89.0 67.0 0.752809 1.423428 3.629567e-05 NaN No
10 Race Native American White Mid Income 1414 0.131450 6.0 4.0 0.666667 1.260549 6.900009e-01 NaN No
11 Race White Mid Income 1414 0.131450 1195.0 632.0 0.528870 NaN NaN NaN
12 Ethnicity Hispanic Non-Hispanic Mid Income 1393 0.144349 183.0 74.0 0.404372 0.715336 6.272270e-05 NaN Yes
13 Ethnicity Non-Hispanic Mid Income 1393 0.144349 1210.0 684.0 0.565289 NaN NaN NaN
14 Sex Female Male Mid Income 815 0.499386 316.0 160.0 0.506329 0.850701 1.560329e-02 NaN No
15 Sex Male Mid Income 815 0.499386 499.0 297.0 0.595190 NaN NaN NaN
16 Race Black White High Income 1398 0.157831 72.0 52.0 0.722222 0.933446 3.133940e-01 NaN No
17 Race Asian White High Income 1398 0.157831 169.0 154.0 0.911243 1.177748 1.642507e-05 NaN No
18 Race Native American White High Income 1398 0.157831 5.0 2.0 0.400000 0.516985 8.115602e-02 NaN No
19 Race White High Income 1398 0.157831 1149.0 889.0 0.773716 NaN NaN NaN
20 Ethnicity Hispanic Non-Hispanic High Income 1396 0.159036 83.0 53.0 0.638554 0.809287 2.405162e-03 NaN Yes
21 Ethnicity Non-Hispanic High Income 1396 0.159036 1313.0 1036.0 0.789033 NaN NaN NaN
22 Sex Female Male High Income 578 0.651807 190.0 147.0 0.773684 0.949967 2.679606e-01 NaN No
23 Sex Male High Income 578 0.651807 388.0 316.0 0.814433 NaN NaN NaN
24 Race Black White CMH Test 4322 0.135600 340.0 NaN NaN 0.898663 7.080029e-02 NaN No
25 Race Asian White CMH Test 4322 0.135600 327.0 NaN NaN 1.241111 1.937556e-08 NaN No
26 Race Native American White CMH Test 4322 0.135600 20.0 NaN NaN 0.958973 8.487593e-01 NaN No
27 Race White CMH Test 4322 0.135600 3623.0 NaN NaN NaN NaN NaN
28 Ethnicity Hispanic Non-Hispanic CMH Test 4316 0.136800 508.0 NaN NaN 0.709947 5.406350e-10 NaN Yes
29 Ethnicity Non-Hispanic CMH Test 4316 0.136800 3808.0 NaN NaN NaN NaN NaN
30 Sex Female Male CMH Test 2656 0.468800 1034.0 NaN NaN 0.857313 1.846391e-04 NaN No
31 Sex Male CMH Test 2656 0.468800 1622.0 NaN NaN NaN NaN NaN
32 Race Black White Breslow-Day Test 4322 0.135600 340.0 NaN NaN 0.898663 9.764418e-01 NaN
33 Race Asian White Breslow-Day Test 4322 0.135600 327.0 NaN NaN 1.241111 5.809534e-02 NaN
34 Race Native American White Breslow-Day Test 4322 0.135600 20.0 NaN NaN 0.958973 1.070722e-01 NaN
35 Race White Breslow-Day Test 4322 0.135600 3623.0 NaN NaN NaN NaN NaN
36 Ethnicity Hispanic Non-Hispanic Breslow-Day Test 4316 0.136800 508.0 NaN NaN 0.709947 9.286203e-01 NaN
37 Ethnicity Non-Hispanic Breslow-Day Test 4316 0.136800 3808.0 NaN NaN NaN NaN NaN
38 Sex Female Male Breslow-Day Test 2656 0.468800 1034.0 NaN NaN 0.857313 9.058839e-01 NaN
39 Sex Male Breslow-Day Test 2656 0.468800 1622.0 NaN NaN NaN NaN NaN
sair.summary_table_by_segments
Group Category Group Reference Group Segment Observations Percent Missing Total Favorable Percent Favorable AIR P-Values Benjamani-Hochberg Critical Value Practically Significant
0 Race Black White Low Income 1510 0.117991 151.0 33.0 0.218543 0.839389 0.279890 NaN No
1 Race Asian White Low Income 1510 0.117991 69.0 22.0 0.318841 1.224616 0.325428 NaN No
2 Race Native American White Low Income 1510 0.117991 9.0 3.0 0.333333 1.280280 0.703881 NaN No
3 Race White Low Income 1510 0.117991 1279.0 333.0 0.260360 NaN NaN NaN
4 Ethnicity Hispanic Non-Hispanic Low Income 1527 0.108061 242.0 40.0 0.165289 0.603400 0.000293 NaN Yes
5 Ethnicity Non-Hispanic Low Income 1527 0.108061 1285.0 352.0 0.273930 NaN NaN NaN
6 Sex Female Male Low Income 1263 0.262266 528.0 107.0 0.202652 0.763840 0.012143 NaN No
7 Sex Male Low Income 1263 0.262266 735.0 195.0 0.265306 NaN NaN NaN
8 Race Black White Mid Income 1414 0.131450 117.0 56.0 0.478632 0.905009 0.332296 NaN No
9 Race Asian White Mid Income 1414 0.131450 89.0 67.0 0.752809 1.423428 0.000036 NaN No
10 Race Native American White Mid Income 1414 0.131450 6.0 4.0 0.666667 1.260549 0.690001 NaN No
11 Race White Mid Income 1414 0.131450 1195.0 632.0 0.528870 NaN NaN NaN
12 Ethnicity Hispanic Non-Hispanic Mid Income 1393 0.144349 183.0 74.0 0.404372 0.715336 0.000063 NaN Yes
13 Ethnicity Non-Hispanic Mid Income 1393 0.144349 1210.0 684.0 0.565289 NaN NaN NaN
14 Sex Female Male Mid Income 815 0.499386 316.0 160.0 0.506329 0.850701 0.015603 NaN No
15 Sex Male Mid Income 815 0.499386 499.0 297.0 0.595190 NaN NaN NaN
16 Race Black White High Income 1398 0.157831 72.0 52.0 0.722222 0.933446 0.313394 NaN No
17 Race Asian White High Income 1398 0.157831 169.0 154.0 0.911243 1.177748 0.000016 NaN No
18 Race Native American White High Income 1398 0.157831 5.0 2.0 0.400000 0.516985 0.081156 NaN No
19 Race White High Income 1398 0.157831 1149.0 889.0 0.773716 NaN NaN NaN
20 Ethnicity Hispanic Non-Hispanic High Income 1396 0.159036 83.0 53.0 0.638554 0.809287 0.002405 NaN Yes
21 Ethnicity Non-Hispanic High Income 1396 0.159036 1313.0 1036.0 0.789033 NaN NaN NaN
22 Sex Female Male High Income 578 0.651807 190.0 147.0 0.773684 0.949967 0.267961 NaN No
23 Sex Male High Income 578 0.651807 388.0 316.0 0.814433 NaN NaN NaN
sair

Disparity Calculation: Segmented AIR

┌───────────────────────────────────┬─────────────────────────────────────────────────────────────────────────────┐
│ Protected Groups                  │ Black, Asian, Native American, Hispanic, Female                             │
│ Reference Groups                  │ White, White, White, Non-Hispanic, Male                                     │
│ Group Categories                  │ Race, Race, Race, Ethnicity, Sex                                            │
│ AIR Threshold                     │ 0.8                                                                         │
│ % Diff Threshold                  │ 0.0                                                                         │
│ FDR Threshold                     │ 0.2                                                                         │
│ Segments                          │ ['Low Income', 'Mid Income', 'High Income']                                 │
│ Affected Groups                   │ Hispanic                                                                    │
│ Affected Reference                │ Non-Hispanic                                                                │
│ Affected Categories               │ Ethnicity                                                                   │
└───────────────────────────────────┴─────────────────────────────────────────────────────────────────────────────┘

Segmented AIR Summary Table

Group Category Group Reference Group Segment Observations Percent Missing Total Favorable Percent Favorable AIR P-Values Benjamani-Hochberg Critical Value Practically Significant
0 Race Black White Low Income 1510 0.117991 151.0 33.0 0.218543 0.839389 2.798899e-01 NaN No
1 Race Asian White Low Income 1510 0.117991 69.0 22.0 0.318841 1.224616 3.254284e-01 NaN No
2 Race Native American White Low Income 1510 0.117991 9.0 3.0 0.333333 1.280280 7.038812e-01 NaN No
3 Race White Low Income 1510 0.117991 1279.0 333.0 0.260360 NaN NaN NaN
4 Ethnicity Hispanic Non-Hispanic Low Income 1527 0.108061 242.0 40.0 0.165289 0.603400 2.927028e-04 NaN Yes
5 Ethnicity Non-Hispanic Low Income 1527 0.108061 1285.0 352.0 0.273930 NaN NaN NaN
6 Sex Female Male Low Income 1263 0.262266 528.0 107.0 0.202652 0.763840 1.214281e-02 NaN No
7 Sex Male Low Income 1263 0.262266 735.0 195.0 0.265306 NaN NaN NaN
8 Race Black White Mid Income 1414 0.131450 117.0 56.0 0.478632 0.905009 3.322955e-01 NaN No
9 Race Asian White Mid Income 1414 0.131450 89.0 67.0 0.752809 1.423428 3.629567e-05 NaN No
10 Race Native American White Mid Income 1414 0.131450 6.0 4.0 0.666667 1.260549 6.900009e-01 NaN No
11 Race White Mid Income 1414 0.131450 1195.0 632.0 0.528870 NaN NaN NaN
12 Ethnicity Hispanic Non-Hispanic Mid Income 1393 0.144349 183.0 74.0 0.404372 0.715336 6.272270e-05 NaN Yes
13 Ethnicity Non-Hispanic Mid Income 1393 0.144349 1210.0 684.0 0.565289 NaN NaN NaN
14 Sex Female Male Mid Income 815 0.499386 316.0 160.0 0.506329 0.850701 1.560329e-02 NaN No
15 Sex Male Mid Income 815 0.499386 499.0 297.0 0.595190 NaN NaN NaN
16 Race Black White High Income 1398 0.157831 72.0 52.0 0.722222 0.933446 3.133940e-01 NaN No
17 Race Asian White High Income 1398 0.157831 169.0 154.0 0.911243 1.177748 1.642507e-05 NaN No
18 Race Native American White High Income 1398 0.157831 5.0 2.0 0.400000 0.516985 8.115602e-02 NaN No
19 Race White High Income 1398 0.157831 1149.0 889.0 0.773716 NaN NaN NaN
20 Ethnicity Hispanic Non-Hispanic High Income 1396 0.159036 83.0 53.0 0.638554 0.809287 2.405162e-03 NaN Yes
21 Ethnicity Non-Hispanic High Income 1396 0.159036 1313.0 1036.0 0.789033 NaN NaN NaN
22 Sex Female Male High Income 578 0.651807 190.0 147.0 0.773684 0.949967 2.679606e-01 NaN No
23 Sex Male High Income 578 0.651807 388.0 316.0 0.814433 NaN NaN NaN
24 Race Black White CMH Test 4322 0.135600 340.0 NaN NaN 0.898663 7.080029e-02 NaN No
25 Race Asian White CMH Test 4322 0.135600 327.0 NaN NaN 1.241111 1.937556e-08 NaN No
26 Race Native American White CMH Test 4322 0.135600 20.0 NaN NaN 0.958973 8.487593e-01 NaN No
27 Race White CMH Test 4322 0.135600 3623.0 NaN NaN NaN NaN NaN
28 Ethnicity Hispanic Non-Hispanic CMH Test 4316 0.136800 508.0 NaN NaN 0.709947 5.406350e-10 NaN Yes
29 Ethnicity Non-Hispanic CMH Test 4316 0.136800 3808.0 NaN NaN NaN NaN NaN
30 Sex Female Male CMH Test 2656 0.468800 1034.0 NaN NaN 0.857313 1.846391e-04 NaN No
31 Sex Male CMH Test 2656 0.468800 1622.0 NaN NaN NaN NaN NaN
32 Race Black White Breslow-Day Test 4322 0.135600 340.0 NaN NaN 0.898663 9.764418e-01 NaN
33 Race Asian White Breslow-Day Test 4322 0.135600 327.0 NaN NaN 1.241111 5.809534e-02 NaN
34 Race Native American White Breslow-Day Test 4322 0.135600 20.0 NaN NaN 0.958973 1.070722e-01 NaN
35 Race White Breslow-Day Test 4322 0.135600 3623.0 NaN NaN NaN NaN NaN
36 Ethnicity Hispanic Non-Hispanic Breslow-Day Test 4316 0.136800 508.0 NaN NaN 0.709947 9.286203e-01 NaN
37 Ethnicity Non-Hispanic Breslow-Day Test 4316 0.136800 3808.0 NaN NaN NaN NaN NaN
38 Sex Female Male Breslow-Day Test 2656 0.468800 1034.0 NaN NaN 0.857313 9.058839e-01 NaN
39 Sex Male Breslow-Day Test 2656 0.468800 1622.0 NaN NaN NaN NaN NaN
../../../../_images/c22db8b44e2356f9c9f69ed552b7d26d773bf6a71d3a8827a0f3484e101f74a7.svg
sd.ui.show(sair.summary_table)
Group Category Group Reference Group Segment Observations Percent Missing Total Favorable Percent Favorable AIR P-Values Benjamani-Hochberg Critical Value Practically Significant
Race Black White Low Income 1,510 11.80% 151.0 33.0 21.85% 0.839 0.280 No
Race Asian White Low Income 1,510 11.80% 69.0 22.0 31.88% 1.225 0.325 No
Race Native American White Low Income 1,510 11.80% 9.0 3.0 33.33% 1.280 0.704 No
Race White Low Income 1,510 11.80% 1,279.0 333.0 26.04%
Ethnicity Hispanic Non-Hispanic Low Income 1,527 10.81% 242.0 40.0 16.53% 0.603 0.000 Yes
Ethnicity Non-Hispanic Low Income 1,527 10.81% 1,285.0 352.0 27.39%
Sex Female Male Low Income 1,263 26.23% 528.0 107.0 20.27% 0.764 0.012 No
Sex Male Low Income 1,263 26.23% 735.0 195.0 26.53%
Race Black White Mid Income 1,414 13.14% 117.0 56.0 47.86% 0.905 0.332 No
Race Asian White Mid Income 1,414 13.14% 89.0 67.0 75.28% 1.423 0.000 No
Race Native American White Mid Income 1,414 13.14% 6.0 4.0 66.67% 1.261 0.690 No
Race White Mid Income 1,414 13.14% 1,195.0 632.0 52.89%
Ethnicity Hispanic Non-Hispanic Mid Income 1,393 14.43% 183.0 74.0 40.44% 0.715 0.000 Yes
Ethnicity Non-Hispanic Mid Income 1,393 14.43% 1,210.0 684.0 56.53%
Sex Female Male Mid Income 815 49.94% 316.0 160.0 50.63% 0.851 0.016 No
Sex Male Mid Income 815 49.94% 499.0 297.0 59.52%
Race Black White High Income 1,398 15.78% 72.0 52.0 72.22% 0.933 0.313 No
Race Asian White High Income 1,398 15.78% 169.0 154.0 91.12% 1.178 0.000 No
Race Native American White High Income 1,398 15.78% 5.0 2.0 40.00% 0.517 0.081 No
Race White High Income 1,398 15.78% 1,149.0 889.0 77.37%
Ethnicity Hispanic Non-Hispanic High Income 1,396 15.90% 83.0 53.0 63.86% 0.809 0.002 Yes
Ethnicity Non-Hispanic High Income 1,396 15.90% 1,313.0 1,036.0 78.90%
Sex Female Male High Income 578 65.18% 190.0 147.0 77.37% 0.950 0.268 No
Sex Male High Income 578 65.18% 388.0 316.0 81.44%
Race Black White CMH Test 4,322 13.56% 340.0 0.899 0.071 No
Race Asian White CMH Test 4,322 13.56% 327.0 1.241 0.000 No
Race Native American White CMH Test 4,322 13.56% 20.0 0.959 0.849 No
Race White CMH Test 4,322 13.56% 3,623.0
Ethnicity Hispanic Non-Hispanic CMH Test 4,316 13.68% 508.0 0.710 0.000 Yes
Ethnicity Non-Hispanic CMH Test 4,316 13.68% 3,808.0
Sex Female Male CMH Test 2,656 46.88% 1,034.0 0.857 0.000 No
Sex Male CMH Test 2,656 46.88% 1,622.0
Race Black White Breslow-Day Test 4,322 13.56% 340.0 0.899 0.976
Race Asian White Breslow-Day Test 4,322 13.56% 327.0 1.241 0.058
Race Native American White Breslow-Day Test 4,322 13.56% 20.0 0.959 0.107
Race White Breslow-Day Test 4,322 13.56% 3,623.0
Ethnicity Hispanic Non-Hispanic Breslow-Day Test 4,316 13.68% 508.0 0.710 0.929
Ethnicity Non-Hispanic Breslow-Day Test 4,316 13.68% 3,808.0
Sex Female Male Breslow-Day Test 2,656 46.88% 1,034.0 0.857 0.906
Sex Male Breslow-Day Test 2,656 46.88% 1,622.0