from dataclasses import dataclass, field
#Changed this over to a dataclass versus the previous function-based implementaton.
#Seemed to make more sense this way since I'm not really doing anything
#To the params once they are set, plus I don't like named tuples plus this
#Seems more easier to pass around, comes with as many dunder attribs as wanted
#And just seems like an overall better idea.
#Sphinx and Python is too smart and Dataclasses are already kind of self-documenting
# so we don't actually need all these doc strings and having it in there makes the rtd
# page absolutely fugly so they won't be included.
@dataclass(frozen=True, order=True)
[docs]
class Hyperparameters:
'''Class representing the hyperparameters for the simulation.'''
# Attributes:
# threshold: float
# The threshold for simulation convergence. (Default: 1e-1)
# k1: int
# Maximum number of clusters to simulate for. (Default: 5)
# alpha0: float
# Prior coefficient count, also known as the concentration parameter for
# Dirichelet prior on the mixture proportions. This field is calculated
# from 1/k1. (Default: 0.2)
# a0: int
# Degrees of freedom for the Gamma prior on the cluster precision, which
# controls the shape of the Gamma distribution. A higher number results
# in a more peaked distribution. (Default: 3)
# beta0: float
# Shrinkage parameter of the Gaussian conditional prior on the cluster
# mean. This influences the tightness and spread of the cluster, smaller
# shrinkage leads to tighter clusters. (Default: 1e-3)
# d0: int
# Shape parameter of the Beta distribution on the probability. A value of
# 1 results in a uniform distribution. (Default: 1)
# t_max: int
# Maximum starting annealing temperature. Value of 1 has no annealing.
# (Default: 1)
# max_itr: int
# Maximum number of iterations. (Default: 25)
# max_annealed_itr: int
# Maximum number of iterations for annealing, if applicable. (Default: 10)
# max_models: int
# Maximum number of models to run for averaging (Default: 10)
[docs]
threshold: float = 1e-1
[docs]
alpha0: float = field(init=False)
[docs]
t_max: int = 1 #can NOT be 0.
[docs]
max_annealed_itr: int = 10
[docs]
def __post_init__(self):
# nifty little hack to temporarily unfreeze the class and set alpha0
object.__setattr__(self, 'alpha0', 1/self.k1)
@dataclass(order=True)
[docs]
class SimulationParameters:
'''Class representing the simulation parameters.
These parameters are the "settings" for the simulation experiment. For more
information regarding the simulation, see [INSERT PAPER HERE]. Mixture
proportions must be numbers between 0 and 1. The n_relevants array must
be all numbers that are less than the n_variables value, as it is not
possible to have a higher number of relevant variables than total variables.
'''
# Attributes
# n_observations: list[int] (Optional) (Default: [100,1000])
# The number of observations to observe in the simulation.
# n_variables: int (Optional) (Default: 200)
# The number of variables to consider. The value must exceed the largest
# number in `n_relevants`.
# n_relevants: list[int] (Optional) (Default: [10, 20, 50, 100])
# A list of integer values of different quantities of relevant variables
# to test for. These numbers should not exceed `n_variables`.
# mixture_proportions: list[float] (Optional) (Default: [0.5, 0.3, 0.2])
# A list of float values for ~ proportion of observations in each cluster.
# The length of the array influence the number of simulated clusters. All
# values should be between 0 and 1, and all values must sum to 1.
# means: list[int] (Optional) (Default: [-2, 0, 2])
# List of integers of Gaussian distributions for each cluster.
[docs]
n_observations: list[int] = field(default_factory=lambda:[100,1000])
[docs]
n_relevants: list[int] = field(default_factory=lambda:[10, 20, 50, 100])
[docs]
mixture_proportions: list[float] = field(default_factory=lambda:[0.2, 0.3, 0.5])
[docs]
means: list[int] = field(default_factory=lambda:[-2, 0, 2])