from sklearn.utils.estimator_checks import estimator_checks_generator
from sklearn.base import BaseEstimator
import numpy as np
from sklearn.utils import check_X_y, check_array, check_random_state
[docs]def check_clustpy_estimator(estimator_obj: BaseEstimator, checks_to_ignore: tuple | list = ("check_complex_data")):
"""
Run the check_estimator function from sklearn ignoring the check for complex data.
For more information, check: https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/utils/estimator_checks.py
Parameters
----------
estimator_obj : BaseEstimator
Initialization of the tested BaseEstimator
checks_to_ignore : tuple | list
List containing the names of checks to ignore (default: ("check_complex_data"))
"""
all_checks = estimator_checks_generator(estimator_obj)
for estimator, check in all_checks:
check_name = check.func.__name__
if not check_name in checks_to_ignore:
try:
check(estimator)
except Exception as e:
print("Check", check_name, "failed.")
raise e
else:
print("Skip check:", check_name)
[docs]def check_parameters(X: np.ndarray, *, y: np.ndarray=None, random_state: np.random.RandomState | int=None,
allow_nd: bool=False, allow_size_1: bool=False, estimator_obj: BaseEstimator = None) -> (np.ndarray, np.ndarray, np.random.RandomState):
"""
Check if parameters for X, y and random_state are defined in accordance with the sklearn standard.
Parameters
----------
X : np.ndarray
the given data set
y : np.ndarray
the labels (can usually be ignored) (default: None)
random_state : np.random.RandomState | int
the random state (default: None)
allow_nd : bool
allow n-dimensional arrays instead of only allowing 2d arrays (default: False)
allow_size_1 : bool
allow a dataset with a single sample
estimator_obj : BaseEstimator
Initialization of the tested BaseEstimator (default: None)
Returns
-------
tuple : (np.ndarray, np.ndarray, np.random.RandomState)
the checked data set,
the checked labels
the checked random_state
"""
ensure_2d = not allow_nd
if y is None:
X = check_array(X, accept_sparse=False, allow_nd=allow_nd, ensure_2d=ensure_2d)
else:
X, y = check_X_y(X, y, accept_sparse=False, allow_nd=allow_nd, ensure_2d=ensure_2d)
class_labels = np.unique(y)
if np.min(class_labels) == 1 and np.max(class_labels) == len(class_labels):
y -= 1
class_labels -= 1
print("WARNING: labels in y were within [1, {0}], changed to be within [0, {1}] instead".format(len(class_labels), len(class_labels) - 1))
assert np.array_equal(class_labels, np.arange(len(class_labels))), "y is not defined as expected. Should only contain labels within [0, n_classes - 1]. Labels in y: {0}".format(class_labels)
if X.ndim == 1:
raise ValueError("Data can not be a 1d array.")
if not allow_size_1 and X.shape[0] == 1:
raise ValueError("Model cannot be fitted if n_samples = 1. X shape =", X.shape)
if estimator_obj is not None and estimator_obj.n_features_in_ != X.shape[1]:
raise ValueError("X has {0} features, but {1} is expecting {2} features as input.".format(X.shape[1], estimator_obj.__class__.__name__, estimator_obj.n_features_in_))
random_state = check_random_state(random_state)
return X, y, random_state