Source code for clustpy.metrics.clustering_metrics
import numpy as np
from scipy.optimize import linear_sum_assignment
from clustpy.metrics.confusion_matrix import ConfusionMatrix
from scipy.special import comb
from sklearn.metrics import normalized_mutual_info_score as nmi
def _check_number_of_points(labels_true: np.ndarray, labels_pred: np.ndarray) -> bool:
"""
Check if the length of the ground truth labels and the prediction labels match.
If they do not match throw an exception.
Parameters
----------
labels_true : np.ndarray
The ground truth labels of the data set
labels_pred : np.ndarray
The labels as predicted by a clustering algorithm
Returns
-------
boolean : bool
True if execution was successful
"""
if labels_pred.shape[0] != labels_true.shape[0]:
raise Exception(
"Number of objects of the prediction and ground truth are not equal.\nNumber of prediction objects: " + str(
labels_pred.shape[0]) + "\nNumber of ground truth objects: " + str(labels_true.shape[0]))
return True
[docs]def variation_of_information(labels_true: np.ndarray, labels_pred: np.ndarray) -> float:
"""
Calculate the variation of information between the ground truth labels and the predicted labels.
Returns a minimum value of 0.0 which corresponds to a perfect match.
Implemented as defined in https://en.wikipedia.org/wiki/Variation_of_information
Parameters
----------
labels_true : np.ndarray
The ground truth labels of the data set
labels_pred : np.ndarray
The labels as predicted by a clustering algorithm
Returns
-------
vi : float
The variation of information
References
-------
Meilă, Marina. "Comparing clusterings by the variation of information."
Learning theory and kernel machines. Springer, Berlin, Heidelberg, 2003. 173-187.
"""
_check_number_of_points(labels_true, labels_pred)
n = len(labels_true)
cluster_ids_true = np.unique(labels_true)
cluster_ids_pred = np.unique(labels_pred)
result = 0.0
for id_true in cluster_ids_true:
points_in_cluster_gt = np.argwhere(labels_true == id_true)[:, 0]
p = len(points_in_cluster_gt) / n
for id_pred in cluster_ids_pred:
points_in_cluster_pred = np.argwhere(labels_pred == id_pred)[:, 0]
q = len(points_in_cluster_pred) / n
r = len([point for point in points_in_cluster_gt if point in points_in_cluster_pred]) / n
if r != 0:
result += r * (np.log(r / p) + np.log(r / q))
vi = -1 * result
return vi
[docs]def unsupervised_clustering_accuracy(labels_true: np.ndarray, labels_pred: np.ndarray) -> float:
"""
Evaluate the quality of predicted labels by comparing it to the ground truth labels using the
clustering accuracy.
Returns a value between 1.0 (perfect match) and 0.0 (arbitrary result).
Since the id of a cluster is not fixed in a clustering setting, the clustering accuracy evaluates each possible
combination with the ground truth labels.
Parameters
----------
labels_true : np.ndarray
The ground truth labels of the data set
labels_pred : np.ndarray
The labels as predicted by a clustering algorithm
Returns
-------
acc : float
The accuracy between the two input label sets.
References
-------
Yang, Yi, et al. "Image clustering using local discriminant models and global integration."
IEEE Transactions on Image Processing 19.10 (2010): 2761-2773.
"""
_check_number_of_points(labels_true, labels_pred)
max_label = int(max(labels_pred.max(), labels_true.max()) + 1)
match_matrix = np.zeros((max_label, max_label), dtype=np.int64)
for i in range(labels_true.shape[0]):
match_matrix[int(labels_true[i]), int(labels_pred[i])] -= 1
indices = linear_sum_assignment(match_matrix)
acc = -np.sum(match_matrix[indices]) / labels_pred.size
return acc
[docs]def information_theoretic_external_cluster_validity_measure(labels_true: np.ndarray, labels_pred: np.ndarray,
scale: bool = True) -> float:
"""
Evaluate the quality of predicted labels by comparing it to the ground truth labels using the
Information-Theoretic External Cluster-Validity Measure. Often simply called DOM.
A lower value indicates a better clustering result.
If the result is scaled, this method will return a value between 1.0 (perfect match) and 0.0 (arbitrary result).
An advantage of this metric is that it also works with a differing number of clusters.
Parameters
----------
labels_true : np.ndarray
The ground truth labels of the data set
labels_pred : np.ndarray
The labels as predicted by a clustering algorithm
scale : bool
Scale the result to (0, 1], where 1 indicates a perfect match and 0 indicates an arbitrary result (default: True)
Returns
-------
dom : float
The validity between the two input label sets.
References
-------
Byron E. Dom. 2002. "An information-theoretic external cluster-validity measure."
In Proceedings of the Eighteenth conference on Uncertainty in artificial intelligence (UAI'02).
"""
_check_number_of_points(labels_true, labels_pred)
# Build confusion matrix
cm = ConfusionMatrix(labels_true, labels_pred)
n_points = labels_true.shape[0]
n_classes = cm.confusion_matrix.shape[0]
# Get number of objects per predicted label
hks = np.sum(cm.confusion_matrix, axis=0)
# Calculate Q_0
cm_tmp = cm.confusion_matrix.copy() # Needed if some cells are 0 so log can be calculated
cm_tmp[cm_tmp == 0] = 1 # will later be multiplied by 0, so this does not change the final result
empirical_conditional_entropy = cm.confusion_matrix / n_points * np.log(cm_tmp / hks)
empirical_conditional_entropy = - np.sum(
empirical_conditional_entropy) # [~np.isnan(empirical_conditional_entropy)])
sum_binom_coefficient = np.sum([np.log(comb(hk + n_classes - 1, n_classes - 1)) for hk in hks])
Q_0 = empirical_conditional_entropy + sum_binom_coefficient / n_points
if scale:
# --- Scale Q_0 to (0, 1] ---
# Get number of objects per ground truth label
hcs = np.sum(cm.confusion_matrix, axis=1)
# Calculate Q_2
min_Q_0 = np.sum([np.log(comb(hc + n_classes - 1, n_classes - 1)) for hc in hcs]) / n_points
entropy_H_C = -np.sum([hc / n_points * np.log(hc / n_points) for hc in hcs])
max_Q_0 = entropy_H_C + np.log(n_classes)
assert Q_0 >= min_Q_0 and Q_0 <= max_Q_0, "Q_0 must be between min_Q_0 and max_Q_0"
# Scale Q_0 to receive final value
Q_0 = (max_Q_0 - Q_0) / (max_Q_0 - min_Q_0)
return Q_0
[docs]def fair_normalized_mutual_information(labels_true: np.ndarray, labels_pred: np.ndarray) -> float:
"""
Evaluate the quality of predicted labels by comparing to the ground truth labels using the
fair normalized mutual information score. Often simply called FNMI.
A value of 1 indicates a perfect clustering result, a value of 0 indicates a totally random result.
The FNMI punishes results where the number of predicted clusters diverges from the ground truth number of clusters.
Therefore, it uses the normalized mutual information from sklearn and scales the value by using the predicted and ground truth number of clusters.
Parameters
----------
labels_true : np.ndarray
The ground truth labels of the data set
labels_pred : np.ndarray
The labels as predicted by a clustering algorithm
Returns
-------
fnmi : float
The score between the two input label sets.
References
-------
Amelio, Alessia, and Clara Pizzuti. "Is normalized mutual information a fair measure for comparing community detection methods?."
Proceedings of the 2015 IEEE/ACM international conference on advances in social networks analysis and mining 2015. 2015.
"""
_check_number_of_points(labels_true, labels_pred)
# Get the normalized mutual information
my_nmi = nmi(labels_true, labels_pred)
# Get number of clusters
n_clusters_true = len(np.unique(labels_true))
n_clusters_pred = len(np.unique(labels_pred))
# Calculate FNMI
factor = np.exp(-abs(n_clusters_true - n_clusters_pred) / n_clusters_true)
fnmi = factor * my_nmi
return fnmi
[docs]def purity(labels_true: np.ndarray, labels_pred: np.ndarray) -> float:
"""
Evaluate the quality of predicted labels by comparing it to the ground truth labels using the
clustering purity.
Returns a value between 1.0 (perfect match) and 0.0 (arbitrary result).
Note that the purity is usually very high when the number of predicted clusters is much larger than the ground truth number of clusters.
Parameters
----------
labels_true : np.ndarray
The ground truth labels of the data set
labels_pred : np.ndarray
The labels as predicted by a clustering algorithm
Returns
-------
purity : float
The purity between the two input label sets.
References
-------
Manning, Christopher D. An introduction to information retrieval. 2009.
"""
_check_number_of_points(labels_true, labels_pred)
conf_matrix = ConfusionMatrix(labels_true, labels_pred).confusion_matrix
best_matches = np.max(conf_matrix, axis=0)
purity = np.sum(best_matches) / labels_true.shape[0]
return purity