Source code for sktree.datasets.hyppo

import numpy as np


[docs] def make_quadratic_classification(n_samples: int, n_features: int, noise=False, seed=None): """Simulate classification data from a quadratic model. This is a form of the simulation used in :footcite:`panda2018learning`. Parameters ---------- n_samples : int The number of samples to generate. n_features : int The number of dimensions in the dataset. noise : bool, optional Whether or not to add noise, by default False. seed : int, optional Random seed, by default None. Returns ------- x : array-like, shape (2 * n_samples, n_features) Data array. v : array-like, shape (2 * n_samples,) Target array of 1's and 0's. References ---------- .. footbibliography:: """ rng = np.random.default_rng(seed) x = rng.standard_normal(size=(n_samples, n_features)) coeffs = np.array([np.exp(-0.0325 * (i + 24)) for i in range(n_features)]) eps = rng.standard_normal(size=(n_samples, n_features)) x_coeffs = x * coeffs y = x_coeffs**2 + noise * eps # generate the classification labels n1 = x.shape[0] n2 = y.shape[0] v = np.vstack([np.zeros((n1, 1)), np.ones((n2, 1))]) x = np.vstack((x, y)) return x, v