Source code for clustpy.metrics.clustering_metrics

import numpy as np
from scipy.optimize import linear_sum_assignment
from clustpy.metrics.confusion_matrix import ConfusionMatrix
from scipy.special import comb
from sklearn.metrics import normalized_mutual_info_score as nmi


def _check_number_of_points(labels_true: np.ndarray, labels_pred: np.ndarray) -> bool:
    """
    Check if the length of the ground truth labels and the prediction labels match.
    If they do not match throw an exception.

    Parameters
    ----------
    labels_true : np.ndarray
        The ground truth labels of the data set
    labels_pred : np.ndarray
        The labels as predicted by a clustering algorithm

    Returns
    -------
    boolean : bool
        True if execution was successful
    """
    if labels_pred.shape[0] != labels_true.shape[0]:
        raise Exception(
            "Number of objects of the prediction and ground truth are not equal.\nNumber of prediction objects: " + str(
                labels_pred.shape[0]) + "\nNumber of ground truth objects: " + str(labels_true.shape[0]))
    return True


[docs]def variation_of_information(labels_true: np.ndarray, labels_pred: np.ndarray) -> float:
    """
    Calculate the variation of information between the ground truth labels and the predicted labels.
    Returns a minimum value of 0.0 which corresponds to a perfect match.
    Implemented as defined in https://en.wikipedia.org/wiki/Variation_of_information

    Parameters
    ----------
    labels_true : np.ndarray
        The ground truth labels of the data set
    labels_pred : np.ndarray
        The labels as predicted by a clustering algorithm

    Returns
    -------
    vi : float
        The variation of information

    References
    -------
    Meilă, Marina. "Comparing clusterings by the variation of information."
    Learning theory and kernel machines. Springer, Berlin, Heidelberg, 2003. 173-187.
    """
    _check_number_of_points(labels_true, labels_pred)
    n = len(labels_true)
    cluster_ids_true = np.unique(labels_true)
    cluster_ids_pred = np.unique(labels_pred)
    result = 0.0
    for id_true in cluster_ids_true:
        points_in_cluster_gt = np.argwhere(labels_true == id_true)[:, 0]
        p = len(points_in_cluster_gt) / n
        for id_pred in cluster_ids_pred:
            points_in_cluster_pred = np.argwhere(labels_pred == id_pred)[:, 0]
            q = len(points_in_cluster_pred) / n
            r = len([point for point in points_in_cluster_gt if point in points_in_cluster_pred]) / n
            if r != 0:
                result += r * (np.log(r / p) + np.log(r / q))
    vi = -1 * result
    return vi


[docs]def unsupervised_clustering_accuracy(labels_true: np.ndarray, labels_pred: np.ndarray) -> float:
    """
    Evaluate the quality of predicted labels by comparing it to the ground truth labels using the
    clustering accuracy.
    Returns a value between 1.0 (perfect match) and 0.0 (arbitrary result).
    Since the id of a cluster is not fixed in a clustering setting, the clustering accuracy evaluates each possible
    combination with the ground truth labels.

    Parameters
    ----------
    labels_true : np.ndarray
        The ground truth labels of the data set
    labels_pred : np.ndarray
        The labels as predicted by a clustering algorithm

    Returns
    -------
    acc : float
        The accuracy between the two input label sets.

    References
    -------
    Yang, Yi, et al. "Image clustering using local discriminant models and global integration."
    IEEE Transactions on Image Processing 19.10 (2010): 2761-2773.
    """
    _check_number_of_points(labels_true, labels_pred)
    max_label = int(max(labels_pred.max(), labels_true.max()) + 1)
    match_matrix = np.zeros((max_label, max_label), dtype=np.int64)
    for i in range(labels_true.shape[0]):
        match_matrix[int(labels_true[i]), int(labels_pred[i])] -= 1
    indices = linear_sum_assignment(match_matrix)
    acc = -np.sum(match_matrix[indices]) / labels_pred.size
    return acc


[docs]def information_theoretic_external_cluster_validity_measure(labels_true: np.ndarray, labels_pred: np.ndarray,
                                                            scale: bool = True) -> float:
    """
    Evaluate the quality of predicted labels by comparing it to the ground truth labels using the
    Information-Theoretic External Cluster-Validity Measure. Often simply called DOM.
    A lower value indicates a better clustering result.
    If the result is scaled, this method will return a value between 1.0 (perfect match) and 0.0 (arbitrary result).
    An advantage of this metric is that it also works with a differing number of clusters.

    Parameters
    ----------
    labels_true : np.ndarray
        The ground truth labels of the data set
    labels_pred : np.ndarray
        The labels as predicted by a clustering algorithm
    scale : bool
        Scale the result to (0, 1], where 1 indicates a perfect match and 0 indicates an arbitrary result (default: True)

    Returns
    -------
    dom : float
        The validity between the two input label sets.

    References
    -------
    Byron E. Dom. 2002. "An information-theoretic external cluster-validity measure."
    In Proceedings of the Eighteenth conference on Uncertainty in artificial intelligence (UAI'02).
    """
    _check_number_of_points(labels_true, labels_pred)
    # Build confusion matrix
    cm = ConfusionMatrix(labels_true, labels_pred)
    n_points = labels_true.shape[0]
    n_classes = cm.confusion_matrix.shape[0]
    # Get number of objects per predicted label
    hks = np.sum(cm.confusion_matrix, axis=0)
    # Calculate Q_0
    cm_tmp = cm.confusion_matrix.copy()  # Needed if some cells are 0 so log can be calculated
    cm_tmp[cm_tmp == 0] = 1  # will later be multiplied by 0, so this does not change the final result
    empirical_conditional_entropy = cm.confusion_matrix / n_points * np.log(cm_tmp / hks)
    empirical_conditional_entropy = - np.sum(
        empirical_conditional_entropy)  # [~np.isnan(empirical_conditional_entropy)])
    sum_binom_coefficient = np.sum([np.log(comb(hk + n_classes - 1, n_classes - 1)) for hk in hks])
    Q_0 = empirical_conditional_entropy + sum_binom_coefficient / n_points
    if scale:
        # --- Scale Q_0 to (0, 1] ---
        # Get number of objects per ground truth label
        hcs = np.sum(cm.confusion_matrix, axis=1)
        # Calculate Q_2
        min_Q_0 = np.sum([np.log(comb(hc + n_classes - 1, n_classes - 1)) for hc in hcs]) / n_points
        entropy_H_C = -np.sum([hc / n_points * np.log(hc / n_points) for hc in hcs])
        max_Q_0 = entropy_H_C + np.log(n_classes)
        assert Q_0 >= min_Q_0 and Q_0 <= max_Q_0, "Q_0 must be between min_Q_0 and max_Q_0"
        # Scale Q_0 to receive final value
        Q_0 = (max_Q_0 - Q_0) / (max_Q_0 - min_Q_0)
    return Q_0


[docs]def fair_normalized_mutual_information(labels_true: np.ndarray, labels_pred: np.ndarray) -> float:
    """
    Evaluate the quality of predicted labels by comparing to the ground truth labels using the
    fair normalized mutual information score. Often simply called FNMI.
    A value of 1 indicates a perfect clustering result, a value of 0 indicates a totally random result.
    The FNMI punishes results where the number of predicted clusters diverges from the ground truth number of clusters.
    Therefore, it uses the normalized mutual information from sklearn and scales the value by using the predicted and ground truth number of clusters.

    Parameters
    ----------
    labels_true : np.ndarray
        The ground truth labels of the data set
    labels_pred : np.ndarray
        The labels as predicted by a clustering algorithm

    Returns
    -------
    fnmi : float
        The score between the two input label sets.

    References
    -------
    Amelio, Alessia, and Clara Pizzuti. "Is normalized mutual information a fair measure for comparing community detection methods?."
    Proceedings of the 2015 IEEE/ACM international conference on advances in social networks analysis and mining 2015. 2015.
    """
    _check_number_of_points(labels_true, labels_pred)
    # Get the normalized mutual information
    my_nmi = nmi(labels_true, labels_pred)
    # Get number of clusters
    n_clusters_true = len(np.unique(labels_true))
    n_clusters_pred = len(np.unique(labels_pred))
    # Calculate FNMI
    factor = np.exp(-abs(n_clusters_true - n_clusters_pred) / n_clusters_true)
    fnmi = factor * my_nmi
    return fnmi


[docs]def purity(labels_true: np.ndarray, labels_pred: np.ndarray) -> float:
    """
    Evaluate the quality of predicted labels by comparing it to the ground truth labels using the
    clustering purity.
    Returns a value between 1.0 (perfect match) and 0.0 (arbitrary result).
    Note that the purity is usually very high when the number of predicted clusters is much larger than the ground truth number of clusters.

    Parameters
    ----------
    labels_true : np.ndarray
        The ground truth labels of the data set
    labels_pred : np.ndarray
        The labels as predicted by a clustering algorithm

    Returns
    -------
    purity : float
        The purity between the two input label sets.

    References
    -------
    Manning, Christopher D. An introduction to information retrieval. 2009.
    """
    _check_number_of_points(labels_true, labels_pred)
    conf_matrix = ConfusionMatrix(labels_true, labels_pred).confusion_matrix
    best_matches = np.max(conf_matrix, axis=0)
    purity = np.sum(best_matches) / labels_true.shape[0]
    return purity