[docs]defcompute_forest_similarity_matrix(forest,X):"""Compute the similarity matrix of samples in X using a trained forest. As an intermediate calculation, the forest applies the dataset and gets the leaves for each sample. Then, the similarity matrix is computed by counting the number of times each pair of samples ends up in the same leaf. Parameters ---------- forest : BaseForest or BaseDecisionTree The fitted forest. X : array-like of shape (n_samples, n_features) The input data. Returns ------- aff_matrix : array-like of shape (n_samples, n_samples) The estimated distance matrix. """ifhasattr(forest,"estimator_"):# apply to the leavesX_leaves=forest.apply(X)n_est=forest.n_estimatorselse:# apply to the leaves for a single treeX_leaves=forest.apply(X)[:,np.newaxis]n_est=1aff_matrix=sum(np.equal.outer(X_leaves[:,i],X_leaves[:,i])foriinrange(n_est))# normalize by the number of treesaff_matrix=np.divide(aff_matrix,n_est)returnaff_matrix
def_compute_distance_matrix(aff_matrix):"""Private function to compute distance matrix after `compute_similarity_matrix`."""dists=1.0-aff_matrixreturndists# ported from https://github.com/neurodata/hyppo/blob/main/hyppo/independence/_utils.pyclassSimMatrixMixin:"""Mixin class to calculate similarity and dissimilarity matrices. This augments tree/forest models with the sklearn's nearest-neighbors API. """defcompute_similarity_matrix(self,X):""" Compute the similarity matrix of samples in X. Parameters ---------- X : array-like of shape (n_samples, n_features) The input data. Returns ------- sim_matrix : array-like of shape (n_samples, n_samples) The similarity matrix among the samples. """returncompute_forest_similarity_matrix(self,X)