import warnings
import numpy as np
from scipy.integrate import nquad
from scipy.stats import entropy, multivariate_normal
[docs]
def make_quadratic_classification(n_samples: int, n_features: int, noise=False, seed=None):
"""Simulate classification data from a quadratic model.
This is a form of the simulation used in :footcite:`panda2018learning`.
Parameters
----------
n_samples : int
The number of samples to generate.
n_features : int
The number of dimensions in the dataset.
noise : bool, optional
Whether or not to add noise, by default False.
seed : int, optional
Random seed, by default None.
Returns
-------
x : array-like, shape (2 * n_samples, n_features)
Data array.
v : array-like, shape (2 * n_samples,)
Target array of 1's and 0's.
References
----------
.. footbibliography::
"""
rng = np.random.default_rng(seed)
x = rng.standard_normal(size=(n_samples, n_features))
coeffs = np.array([np.exp(-0.0325 * (i + 24)) for i in range(n_features)])
eps = rng.standard_normal(size=(n_samples, n_features))
x_coeffs = x * coeffs
y = x_coeffs**2 + noise * eps
# generate the classification labels
n1 = x.shape[0]
n2 = y.shape[0]
v = np.vstack([np.zeros((n1, 1)), np.ones((n2, 1))])
x = np.vstack((x, y))
return x, v
# Dictionary of simulations from Marron and Wand 1992
# keys: names of each simulation corresponding to the class MarronWandSims
# values: probabilities associated with the mixture of Gaussians
MARRON_WAND_SIMS = {
"gaussian": [1],
"skewed_unimodal": [1 / 5, 1 / 5, 3 / 5],
"strongly_skewed": [1 / 8] * 8,
"kurtotic_unimodal": [2 / 3, 1 / 3],
"outlier": [1 / 10, 9 / 10],
"bimodal": [1 / 2] * 2,
"separated_bimodal": [1 / 2] * 2,
"skewed_bimodal": [3 / 4, 1 / 4],
"trimodal": [9 / 20, 9 / 20, 1 / 10],
"claw": [1 / 2, *[1 / 10] * 5],
"double_claw": [49 / 100, 49 / 100, *[1 / 350] * 7],
"asymmetric_claw": [1 / 2, *[2 ** (1 - i) / 31 for i in range(-2, 3)]],
"asymmetric_double_claw": [*[46 / 100] * 2, *[1 / 300] * 3, *[7 / 300] * 3],
"smooth_comb": [2 ** (5 - i) / 63 for i in range(6)],
"discrete_comb": [*[2 / 7] * 3, *[1 / 21] * 3],
}
[docs]
def make_marron_wand_classification(
n_samples,
n_dim=4096,
n_informative=256,
simulation: str = "gaussian",
rho: int = 0,
band_type: str = "ma",
return_params: bool = False,
scaling_factor: float = 1.0,
seed=None,
):
"""Generate Marron-Wand binary classification dataset.
The simulation is similar to that of :func:`sktree.datasets.make_trunk_classification`
where the first class is generated from a multivariate-Gaussians with mean vector of
0's. The second class is generated from a mixture of Gaussians with mean vectors
specified by the Marron-Wand simulations, but as the dimensionality increases, the second
class distribution approaches the first class distribution by a factor of :math:`1 / sqrt(d)`.
Full details for the Marron-Wand simulations can be found in :footcite:`marron1992exact`.
Instead of the identity covariance matrix, one can implement a banded covariance matrix
that follows :footcite:`Bickel_2008`.
Parameters
----------
n_samples : int
Number of sample to generate. Must be an even number, else the total number of samples generated will be ``n_samples - 1``.
n_dim : int, optional
The dimensionality of the dataset and the number of
unique labels, by default 4096.
n_informative : int, optional
The informative dimensions. All others for ``n_dim - n_informative``
are Gaussian noise. Default is 256.
simulation : str, optional
Which simulation to run. Must be one of the
following Marron-Wand simulations: 'gaussian', 'skewed_unimodal', 'strongly_skewed',
'kurtotic_unimodal', 'outlier', 'bimodal', 'separated_bimodal', 'skewed_bimodal',
'trimodal', 'claw', 'double_claw', 'asymmetric_claw', 'asymmetric_double_claw',
'smooth_comb', 'discrete_comb'.
When calling the Marron-Wand simulations, only the covariance parameters are considered
(`rho` and `band_type`). Means are taken from :footcite:`marron1992exact`.
By default 'gaussian'.
rho : float, optional
The covariance value of the bands. By default 0 indicating, an identity matrix is used.
band_type : str
The band type to use. For details, see Example 1 and 2 in :footcite:`Bickel_2008`.
Either 'ma', or 'ar'.
return_params : bool, optional
Whether or not to return the distribution parameters of the classes normal distributions.
scaling_factor : float, optional
The scaling factor for the covariance matrix. By default 1.
seed : int, optional
Random seed, by default None.
Returns
-------
X : np.ndarray of shape (n_samples, n_dim), dtype=np.float64
Trunk dataset as a dense array.
y : np.ndarray of shape (n_samples,), dtype=np.intp
Labels of the dataset.
G : np.ndarray of shape (n_samples, n_dim), dtype=np.float64
The mixture of Gaussians for the Marron-Wand simulations.
Returned if ``return_params`` is True.
w : np.ndarray of shape (n_dim,), dtype=np.float64
The weight vector for the Marron-Wand simulations.
Returned if ``return_params`` is True.
Notes
-----
**Marron-Wand Simulations**: The Marron-Wand simulations generate two classes of data with the
setup specified in the paper.
Covariance: The covariance matrix among different dimensions is controlled by the ``rho`` parameter
and the ``band_type`` parameter. The ``band_type`` parameter controls the type of band to use, while
the ``rho`` parameter controls the specific scaling factor for the covariance matrix while going
from one dimension to the next.
For each dimension in the first distribution, there is a mean of :math:`1 / d`, where
``d`` is the dimensionality. The covariance is the identity matrix.
The second distribution has a mean vector that is the negative of the first.
As ``d`` increases, the two distributions become closer and closer.
Full details for the trunk simulation can be found in :footcite:`trunk1982`.
References
----------
.. footbibliography::
"""
if n_dim < n_informative:
n_informative = n_dim
warnings.warn(
"Number of informative dimensions {n_informative} must be less than number "
f"of dimensions, {n_dim}. Setting n_informative to n_dim.",
RuntimeWarning,
)
if simulation not in MARRON_WAND_SIMS.keys():
raise ValueError(
f"Simulation must be: trunk, trunk_overlap, trunk_mix, {MARRON_WAND_SIMS.keys()}"
)
rng = np.random.default_rng(seed=seed)
if rho != 0:
if band_type == "ma":
cov = _moving_avg_cov(n_informative, rho)
elif band_type == "ar":
cov = _autoregressive_cov(n_informative, rho)
else:
raise ValueError(f'Band type {band_type} must be one of "ma", or "ar".')
else:
cov = np.identity(n_informative)
# allow arbitrary uniform scaling of the covariance matrix
cov = scaling_factor * cov
# speed up computations for large multivariate normal matrix with SVD approximation
if n_informative > 1000:
mvg_sampling_method = "cholesky"
else:
mvg_sampling_method = "svd"
mixture_idx = rng.choice(
len(MARRON_WAND_SIMS[simulation]), # type: ignore
size=n_samples // 2,
replace=True,
p=MARRON_WAND_SIMS[simulation],
)
# the parameters used for each Gaussian in the mixture for each Marron Wand simulation
norm_params = MarronWandSims(n_dim=n_informative, cov=cov)(simulation)
G = np.fromiter(
(
rng_children.multivariate_normal(*(norm_params[i]), size=1, method=mvg_sampling_method)
for i, rng_children in zip(mixture_idx, rng.spawn(n_samples // 2))
),
dtype=np.dtype((float, n_informative)),
)
# as the dimensionality of the simulations increasing, we are adding more and
# more noise to the data using the w parameter
w_vec = np.array([1.0 / np.sqrt(i) for i in range(1, n_informative + 1)])
# create new generator instance to ensure reproducibility with multiple runs with the same seed
rng_F = np.random.default_rng(seed=seed).spawn(2)
X = np.vstack(
(
rng_F[0].multivariate_normal(
np.zeros(n_informative), cov, n_samples // 2, method=mvg_sampling_method
),
(1 - w_vec)
* rng_F[1].multivariate_normal(
np.zeros(n_informative), cov, n_samples // 2, method=mvg_sampling_method
)
+ w_vec * G.reshape(n_samples // 2, n_informative),
)
)
if n_dim > n_informative:
# create new generator instance to ensure reproducibility with multiple runs with the same seed
rng_noise = np.random.default_rng(seed=seed)
X = np.hstack(
(
X,
np.hstack(
[
rng_children.normal(loc=0, scale=1, size=(X.shape[0], 1))
for rng_children in rng_noise.spawn(n_dim - n_informative)
]
),
)
)
y = np.concatenate((np.zeros(n_samples // 2), np.ones(n_samples // 2)))
if return_params:
return [X, y, *list(zip(*norm_params)), G, w_vec]
return X, y
[docs]
def make_trunk_mixture_classification(
n_samples,
n_dim=4096,
n_informative=256,
mu_0: int = 0,
mu_1: int = 1,
rho: int = 0,
band_type: str = "ma",
return_params: bool = False,
mix: float = 0.5,
scaling_factor: float = 1.0,
seed=None,
):
"""Generate trunk mixture binary classification dataset.
The first class is generated from a multivariate-Gaussians with mean vector of
0's. The second class is generated from a mixture of Gaussians with mean vectors
specified by ``mu_0`` and ``mu_1``. The mixture is specified by the ``mix`` parameter,
which is the probability of the first Gaussian in the mixture.
For each dimension in the first distribution, there is a mean of :math:`1 / d`, where
``d`` is the dimensionality. The covariance is the identity matrix.
The second distribution has a mean vector that is the negative of the first.
As ``d`` increases, the two distributions become closer and closer.
Full details for the trunk simulation can be found in :footcite:`trunk1982`.
Instead of the identity covariance matrix, one can implement a banded covariance matrix
that follows :footcite:`Bickel_2008`.
Parameters
----------
n_samples : int
Number of sample to generate. Must be an even number, else the total number of samples generated will be ``n_samples - 1``.
n_dim : int, optional
The dimensionality of the dataset and the number of
unique labels, by default 4096.
n_informative : int, optional
The informative dimensions. All others for ``n_dim - n_informative``
are Gaussian noise. Default is 256.
mu_0 : int, optional
The mean of the first distribution. By default -1. The mean of the distribution will decrease
by a factor of ``sqrt(i)`` for each dimension ``i``.
mu_1 : int, optional
The mean of the second distribution. By default 1. The mean of the distribution will decrease
by a factor of ``sqrt(i)`` for each dimension ``i``.
rho : float, optional
The covariance value of the bands. By default 0 indicating, an identity matrix is used.
band_type : str
The band type to use. For details, see Example 1 and 2 in :footcite:`Bickel_2008`.
Either 'ma', or 'ar'.
return_params : bool, optional
Whether or not to return the distribution parameters of the classes normal distributions.
mix : int, optional
The probabilities associated with the mixture of Gaussians in the ``trunk-mix`` simulation.
By default 0.5.
scaling_factor : float, optional
The scaling factor for the covariance matrix. By default 1.
seed : int, optional
Random seed, by default None.
Returns
-------
X : np.ndarray of shape (n_samples, n_dim), dtype=np.float64
Trunk dataset as a dense array.
y : np.ndarray of shape (n_samples,), dtype=np.intp
Labels of the dataset.
means : list of ArrayLike of shape (n_dim,), dtype=np.float64
The mean vector for each class starting with class 0.
Returned if ``return_params`` is True.
covs : list of ArrayLike of shape (n_dim, n_dim), dtype=np.float64
The covariance for each class. Returned if ``return_params`` is True.
X_mixture : np.ndarray of shape (n_samples, n_dim), dtype=np.float64
The mixture of Gaussians.
Returned if ``return_params`` is True.
Notes
-----
**Trunk**: The trunk simulation decreases the signal-to-noise ratio as the dimensionality
increases. This is implemented by decreasing the mean of the distribution by a factor of
``sqrt(i)`` for each dimension ``i``. Thus for instance if the means of distribution one
and two are 1 and -1 respectively, the means for the first dimension will be 1 and -1,
for the second dimension will be 1/sqrt(2) and -1/sqrt(2), and so on.
**Trunk Mix**: The trunk mix simulation generates two classes of data with the same covariance
matrix. The first class (label 0) is generated from a multivariate-Gaussians with mean vector of
zeros and the second class is generated from a mixture of Gaussians with mean vectors
specified by ``mu_0`` and ``mu_1``. The mixture is specified by the ``mix`` parameter, which
is the probability of the first Gaussian in the mixture.
Covariance: The covariance matrix among different dimensions is controlled by the ``rho`` parameter
and the ``band_type`` parameter. The ``band_type`` parameter controls the type of band to use, while
the ``rho`` parameter controls the specific scaling factor for the covariance matrix while going
from one dimension to the next.
References
----------
.. footbibliography::
"""
if n_dim < n_informative:
n_informative = n_dim
warnings.warn(
"Number of informative dimensions {n_informative} must be less than number "
f"of dimensions, {n_dim}. Setting n_informative to n_dim.",
RuntimeWarning,
)
if mix < 0 or mix > 1: # type: ignore
raise ValueError("Mix must be between 0 and 1.")
rng = np.random.default_rng(seed=seed)
mu_1_vec = np.array([mu_1 / np.sqrt(i) for i in range(1, n_informative + 1)])
mu_0_vec = np.array([mu_0 / np.sqrt(i) for i in range(1, n_informative + 1)])
if rho != 0:
if band_type == "ma":
cov = _moving_avg_cov(n_informative, rho)
elif band_type == "ar":
cov = _autoregressive_cov(n_informative, rho)
else:
raise ValueError(f'Band type {band_type} must be one of "ma", or "ar".')
else:
cov = np.identity(n_informative)
# Note: When variance is 1, trunk-mix does not look bimodal at low dimensions.
# It is set it to (2/3)**2 since that is consistent with Marron and Wand bimodal
cov = scaling_factor * cov
# speed up computations for large multivariate normal matrix with SVD approximation
if n_informative > 1000:
method = "cholesky"
else:
method = "svd"
mixture_idx = rng.choice(2, n_samples // 2, replace=True, shuffle=True, p=[mix, 1 - mix]) # type: ignore
norm_params = [[mu_0_vec, cov], [mu_1_vec, cov]]
dim_sample = len(norm_params[0][0]) # Dimensionality of the samples
X_mixture = np.empty((n_samples // 2, dim_sample)) # Pre-allocate array for samples
for idx, (i, rng_child) in enumerate(zip(mixture_idx, rng.spawn(n_samples // 2))):
mean, cov = norm_params[i]
X_mixture[idx, :] = rng_child.multivariate_normal(mean, cov, size=1, method=method)
# create new generator instance to ensure reproducibility with multiple runs with the same seed
rng_F = np.random.default_rng(seed=seed)
X = np.vstack(
(
rng_F.multivariate_normal(np.zeros(n_informative), cov, n_samples // 2, method=method),
X_mixture.reshape(n_samples // 2, n_informative),
)
)
if n_dim > n_informative:
# create new generator instance to ensure reproducibility with multiple runs with the same seed
rng_noise = np.random.default_rng(seed=seed)
X = np.hstack(
(
X,
np.hstack(
[
rng_children.normal(loc=0, scale=1, size=(X.shape[0], 1))
for rng_children in rng_noise.spawn(n_dim - n_informative)
]
),
)
)
y = np.concatenate((np.zeros(n_samples // 2), np.ones(n_samples // 2)))
if return_params:
return [X, y, *list(zip(*norm_params)), X_mixture]
return X, y
[docs]
def make_trunk_classification(
n_samples,
n_dim=4096,
n_informative=256,
mu_0: int = 0,
mu_1: int = 1,
rho: int = 0,
band_type: str = "ma",
return_params: bool = False,
scaling_factor: float = 1.0,
seed=None,
):
"""Generate trunk binary classification dataset.
For each dimension in the first distribution, there is a mean of :math:`1 / d`, where
``d`` is the dimensionality. The covariance is the identity matrix.
The second distribution has a mean vector that is the negative of the first.
As ``d`` increases, the two distributions become closer and closer.
Full details for the trunk simulation can be found in :footcite:`trunk1982`.
Instead of the identity covariance matrix, one can implement a banded covariance matrix
that follows :footcite:`Bickel_2008`.
Parameters
----------
n_samples : int
Number of sample to generate. Must be an even number, else the total number of samples generated will be ``n_samples - 1``.
n_dim : int, optional
The dimensionality of the dataset and the number of
unique labels, by default 4096.
n_informative : int, optional
The informative dimensions. All others for ``n_dim - n_informative``
are Gaussian noise. Default is 256.
mu_0 : int, optional
The mean of the first distribution. By default -1. The mean of the distribution will decrease
by a factor of ``sqrt(i)`` for each dimension ``i``.
mu_1 : int, optional
The mean of the second distribution. By default 1. The mean of the distribution will decrease
by a factor of ``sqrt(i)`` for each dimension ``i``.
rho : float, optional
The covariance value of the bands. By default 0 indicating, an identity matrix is used.
band_type : str
The band type to use. For details, see Example 1 and 2 in :footcite:`Bickel_2008`.
Either 'ma', or 'ar'.
return_params : bool, optional
Whether or not to return the distribution parameters of the classes normal distributions.
Default false.
scaling_factor : float, optional
The scaling factor for the covariance matrix. By default 1.
seed : int, optional
Random seed, by default None.
Returns
-------
X : np.ndarray of shape (n_samples, n_dim), dtype=np.float64
Trunk dataset as a dense array.
y : np.ndarray of shape (n_samples,), dtype=np.intp
Labels of the dataset.
means : list of ArrayLike of shape (n_dim,), dtype=np.float64
The mean vector for each class starting with class 0.
Returned if ``return_params`` is True.
covs : list of ArrayLike of shape (n_dim, n_dim), dtype=np.float64
The covariance for each class. Returned if ``return_params`` is True.
Notes
-----
**Trunk**: The trunk simulation decreases the signal-to-noise ratio as the dimensionality
increases. This is implemented by decreasing the mean of the distribution by a factor of
``sqrt(i)`` for each dimension ``i``. Thus for instance if the means of distribution one
and two are 1 and -1 respectively, the means for the first dimension will be 1 and -1,
for the second dimension will be 1/sqrt(2) and -1/sqrt(2), and so on.
**Trunk Overlap**: The trunk overlap simulation generates two classes of data with the same
covariance matrix and mean vector of zeros.
Covariance: The covariance matrix among different dimensions is controlled by the ``rho`` parameter
and the ``band_type`` parameter. The ``band_type`` parameter controls the type of band to use, while
the ``rho`` parameter controls the specific scaling factor for the covariance matrix while going
from one dimension to the next.
References
----------
.. footbibliography::
"""
if n_dim < n_informative:
n_informative = n_dim
warnings.warn(
"Number of informative dimensions {n_informative} must be less than number "
f"of dimensions, {n_dim}. Setting n_informative to n_dim.",
RuntimeWarning,
)
rng = np.random.default_rng(seed=seed)
mu_1_vec = np.array([mu_1 / np.sqrt(i) for i in range(1, n_informative + 1)])
mu_0_vec = np.array([mu_0 / np.sqrt(i) for i in range(1, n_informative + 1)])
if rho != 0:
if band_type == "ma":
cov = _moving_avg_cov(n_informative, rho)
elif band_type == "ar":
cov = _autoregressive_cov(n_informative, rho)
else:
raise ValueError(f'Band type {band_type} must be one of "ma", or "ar".')
else:
cov = np.identity(n_informative)
# allow arbitrary uniform scaling of the covariance matrix
cov = scaling_factor * cov
# speed up computations for large multivariate normal matrix with SVD approximation
if n_informative > 1000:
method = "cholesky"
else:
method = "svd"
X = np.vstack(
[
rng_children.multivariate_normal(mu_vec, cov, n_samples // 2, method=method)
for rng_children, mu_vec in zip(rng.spawn(2), [mu_0_vec, mu_1_vec])
]
)
if n_dim > n_informative:
X = np.hstack(
(
X,
np.hstack(
[
rng_children.normal(loc=0, scale=1, size=(X.shape[0], 1))
for rng_children in rng.spawn(n_dim - n_informative)
]
),
)
)
y = np.concatenate((np.zeros(n_samples // 2), np.ones(n_samples // 2)))
if return_params:
return [X, y, [mu_0_vec, mu_1_vec], [cov, cov]]
return X, y
class MarronWandSims:
def __init__(self, n_dim=1, cov=1):
self.n_dim = n_dim
self.cov = cov
def __call__(self, simulation):
sims = self._my_method_generator()
if simulation in sims.keys():
return sims[simulation]()
else:
raise ValueError(f"simulation is not one of these: {sims.keys()}")
def _my_method_generator(self):
return {
method: getattr(self, method) for method in dir(self) if not method.startswith("__")
}
def gaussian(self):
return [[np.zeros(self.n_dim), self.cov]]
def skewed_unimodal(self):
return [
[np.zeros(self.n_dim), self.cov],
[np.full(self.n_dim, 1 / 2), self.cov * (2 / 3) ** 2],
[np.full(self.n_dim, 13 / 12), self.cov * (5 / 9) ** 2],
]
def strongly_skewed(self):
return [
[np.full(self.n_dim, 3 * ((2 / 3) ** l_mix - 1)), self.cov * (2 / 3) ** (2 * l_mix)]
for l_mix in range(8)
]
def kurtotic_unimodal(self):
return [[np.zeros(self.n_dim), self.cov], [np.zeros(self.n_dim), self.cov * (1 / 10) ** 2]]
def outlier(self):
return [[np.zeros(self.n_dim), self.cov], [np.zeros(self.n_dim), self.cov * (1 / 10) ** 2]]
def bimodal(self):
return [
[-np.ones(self.n_dim), self.cov * (2 / 3) ** 2],
[np.ones(self.n_dim), self.cov * (2 / 3) ** 2],
]
def separated_bimodal(self):
return [
[-np.full(self.n_dim, 3 / 2), self.cov * (1 / 2) ** 2],
[np.full(self.n_dim, 3 / 2), self.cov * (1 / 2) ** 2],
]
def skewed_bimodal(self):
return [
[np.zeros(self.n_dim), self.cov],
[np.full(self.n_dim, 3 / 2), self.cov * (1 / 3) ** 2],
]
def trimodal(self):
return [
[np.full(self.n_dim, -6 / 5), self.cov * (3 / 5) ** 2],
[np.full(self.n_dim, 6 / 5), self.cov * (3 / 5) ** 2],
[np.zeros(self.n_dim), self.cov * (1 / 4) ** 2],
]
def claw(self):
return [
[np.zeros(self.n_dim), self.cov],
*[
[np.full(self.n_dim, (l_mix / 2) - 1), self.cov * (1 / 10) ** 2]
for l_mix in range(5)
],
]
def double_claw(self):
return [
[-np.ones(self.n_dim), self.cov * (2 / 3) ** 2],
[np.ones(self.n_dim), self.cov * (2 / 3) ** 2],
*[
[np.full(self.n_dim, (l_mix - 3) / 2), self.cov * (1 / 100) ** 2]
for l_mix in range(7)
],
]
def asymmetric_claw(self):
return [
[np.zeros(self.n_dim), self.cov],
*[
[np.full(self.n_dim, l_mix + 1 / 2), self.cov * (1 / ((2**l_mix) * 10)) ** 2]
for l_mix in range(-2, 3)
],
]
def asymmetric_double_claw(self):
return [
*[[np.full(self.n_dim, 2 * l_mix - 1), self.cov * (2 / 3) ** 2] for l_mix in range(2)],
*[
[-np.full(self.n_dim, l_mix / 2), self.cov * (1 / 100) ** 2]
for l_mix in range(1, 4)
],
*[[np.full(self.n_dim, l_mix / 2), self.cov * (7 / 100) ** 2] for l_mix in range(1, 4)],
]
def smooth_comb(self):
return [
[
np.full(self.n_dim, (65 - 96 * ((1 / 2) ** l_mix)) / 21),
self.cov * (32 / 63) ** 2 / (2 ** (2 * l_mix)),
]
for l_mix in range(6)
]
def discrete_comb(self):
return [
*[
[np.full(self.n_dim, (12 * l_mix - 15) / 7), self.cov * (2 / 7) ** 2]
for l_mix in range(3)
],
*[
[np.full(self.n_dim, (2 * l_mix) / 7), self.cov * (1 / 21) ** 2]
for l_mix in range(8, 11)
],
]
def _moving_avg_cov(n_dim, rho):
# Create a meshgrid of indices
i, j = np.meshgrid(np.arange(1, n_dim + 1), np.arange(1, n_dim + 1), indexing="ij")
# Calculate the covariance matrix using the corrected formula
cov_matrix = rho ** np.abs(i - j)
# Apply the banding condition
cov_matrix[abs(i - j) > 1] = 0
return cov_matrix
def _autoregressive_cov(n_dim, rho):
# Create a meshgrid of indices
i, j = np.meshgrid(np.arange(1, n_dim + 1), np.arange(1, n_dim + 1), indexing="ij")
# Calculate the covariance matrix using the corrected formula
cov_matrix = rho ** np.abs(i - j)
return cov_matrix
def _compute_mi_bounds(means, covs, class_probs):
# compute bounds using https://arxiv.org/pdf/2101.11670.pdf
prior_y = np.array(class_probs)
H_Y = entropy(prior_y, base=np.exp(1))
# number of mixtures
N = len(class_probs)
cov = covs[0]
H_YX_lb = 0.0
H_YX_ub = 0.0
for idx in range(N):
num = 0.0
denom = 0.0
mean_i = means[idx]
for jdx in range(N):
mean_j = means[jdx]
c_alpha, kl_div = _compute_c_alpha_and_kl(mean_i, mean_j, cov, alpha=0.5)
num += class_probs[jdx] * np.exp(-c_alpha)
for kdx in range(N):
mean_k = means[kdx]
c_alpha, kl_div = _compute_c_alpha_and_kl(mean_i, mean_k, cov, alpha=0.5)
denom += class_probs[jdx] * np.exp(-kl_div)
H_YX_lb += class_probs[idx] * np.log(num / denom)
H_YX_ub += class_probs[idx] * np.log(denom / num)
I_lb = H_Y - H_YX_lb
I_ub = H_Y - H_YX_ub
return I_lb, I_ub
def _compute_c_alpha_and_kl(mean_i, mean_j, cov, alpha=0.5):
mean_ij = mean_i - mean_j
lambda_ij = mean_ij.T @ np.linalg.inv(cov) @ mean_ij
return alpha * (1.0 - alpha) * lambda_ij / 2.0, lambda_ij / 2