import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import linear_sum_assignment
def _rearrange(confusion_matrix: np.ndarray) -> np.ndarray:
"""
Rearrange the confusion matrix in such a way that the sum of the diagonal is maximized.
Thereby, the best matching combination of labels will be shown.
Uses the Hungarian Method to identify the best match.
If parameter inplace is set to True, this method will change the original confusion matrix.
Else the rearranged matrix will only be returned.
Parameters
----------
confusion_matrix : np.ndarray
The original confusion matrix
Returns
-------
rearranged_confusion_matrix : np.ndarray
The rearranged confusion matrix.
If number of ground truth labels is larger than the number of predicted labels, the resulting confusion matrix will be quadradic with multiple 0 columns.
"""
# Change order using the Hungarian Method
max_number_labels = max(confusion_matrix.shape)
rearranged_confusion_matrix = np.zeros((max_number_labels, max_number_labels), dtype=confusion_matrix.dtype)
# Linear sum assignment tries to minimize the diagonal sum -> use negative confusion_matrix
rearranged_confusion_matrix[:confusion_matrix.shape[0], :confusion_matrix.shape[1]] = -confusion_matrix
indices = linear_sum_assignment(rearranged_confusion_matrix)
# Revert values back to positive range, change order of the columns
rearranged_confusion_matrix = -rearranged_confusion_matrix[:, indices[1]]
rearranged_confusion_matrix = rearranged_confusion_matrix[:confusion_matrix.shape[0], :]
# If there are more columns than rows sort remaining columns by highest value
if confusion_matrix.shape[1] > confusion_matrix.shape[0]:
missing_columns = np.arange(confusion_matrix.shape[0], confusion_matrix.shape[1])
missing_columns_order = np.argsort(np.max(rearranged_confusion_matrix[:, missing_columns], axis=0))[::-1]
rearranged_confusion_matrix[:, missing_columns] = rearranged_confusion_matrix[:, missing_columns[missing_columns_order]]
return rearranged_confusion_matrix
def _plot_confusion_matrix(confusion_matrix: np.ndarray, show_text: bool, figsize: tuple, cmap: str, textcolor: str,
vmin: float, vmax: float) -> None:
"""
Plot the confusion matrix.
Parameters
----------
confusion_matrix : np.ndarray
The confusion matrix to plot
show_text : bool
Show the value in each cell as text
figsize : tuple
Tuple indicating the height and width of the plot
cmap : str
Colormap used for the plot
textcolor : str
Color of the text. Only relevant if show_text is True
vmin : float
Minimum possible value within a cell of the confusion matrix.
If None, it will be set as the minimum value within the confusion matrix.
Used to choose the color from the colormap
vmax : float
Maximum possible value within a cell of the confusion matrix.
If None, it will be set as the maximum value within the confusion matrix.
Used to choose the color from the colormap
"""
fig, ax = plt.subplots(figsize=figsize)
# Plot confusion matrix using colors
ax.imshow(confusion_matrix, cmap=cmap, vmin=vmin, vmax=vmax)
# Optional: Add text to the color cells
if show_text:
for i in range(confusion_matrix.shape[0]):
for j in range(confusion_matrix.shape[1]):
ax.text(j, i, confusion_matrix[i, j],
ha="center", va="center", color=textcolor)
plt.show()
[docs]class ConfusionMatrix():
"""
Create a Confusion Matrix of predicted and ground truth labels.
Each row corresponds to a ground truth label and each column to a predicted label.
The number in each cell (i, j) indicates how many objects with ground truth label i have been predicted label j.
Parameters
----------
labels_true : np.ndarray
The ground truth labels of the data set
labels_pred : np.ndarray
The labels as predicted by a clustering algorithm
Attributes
----------
confusion_matrix : np.ndarray
The confusion matrix
"""
def __init__(self, labels_true: np.ndarray, labels_pred: np.ndarray):
assert labels_true.shape[0] == labels_pred.shape[0], "Number of true and predicted labels must match"
self.true_clusters = np.unique(labels_true)
self.pred_clusters = np.unique(labels_pred)
conf_matrix = np.zeros((self.true_clusters.shape[0], self.pred_clusters.shape[0]), dtype=int)
for i, gt_label in enumerate(self.true_clusters):
# Get predictions which should be labeled with corresponding gt label
point_labels = labels_pred[labels_true == gt_label]
# Get different prediction labels
labels, cluster_sizes = np.unique(point_labels, return_counts=True)
for j, pred_label in enumerate(labels):
conf_matrix[i, np.where(self.pred_clusters == pred_label)[0][0]] = cluster_sizes[j]
self.confusion_matrix = conf_matrix
def __str__(self):
"""
Print the confusion matrix.
Returns
-------
str_confusion_matrix : str
The confusion matrix as a string
"""
str_confusion_matrix = str(self.confusion_matrix)
return str_confusion_matrix
[docs] def rearrange(self, inplace: bool = True) -> np.ndarray:
"""
Rearrange the confusion matrix in such a way that the sum of the diagonal is maximized.
Thereby, the best matching combination of labels will be shown.
Uses the Hungarian Method to identify the best match.
If parameter inplace is set to True, this method will change the original confusion matrix.
Else the rearranged matrix will only be returned.
Parameters
----------
inplace : bool
Should the new confusion matrix overwrite the original one (default: True)
Returns
-------
rearranged_confusion_matrix : np.ndarray
The rearranged confusion matrix
If number of ground truth labels is larer than the number of predicted labels, the resulting confusion matrix will be quadradic with multiple 0 columns.
"""
rearranged_confusion_matrix = _rearrange(self.confusion_matrix)
if inplace:
self.confusion_matrix = rearranged_confusion_matrix
return rearranged_confusion_matrix
[docs] def plot(self, show_text: bool = True, figsize: tuple = (10, 10), cmap: str = "YlGn", textcolor: str = "black",
vmin: int = 0, vmax: int = None) -> None:
"""
Plot the confusion matrix.
Parameters
----------
show_text : bool
Show the value in each cell as text (default: True)
figsize : tuple
Tuple indicating the height and width of the plot (default: (10, 10))
cmap : str
Colormap used for the plot (default: "YlGn")
textcolor : str
Color of the text. Only relevant if show_text is True (default: "black")
vmin : int
Minimum possible value within a cell of the confusion matrix.
If None, it will be set as the minimum value within the confusion matrix.
Used to choose the color from the colormap (default: 0)
vmax : int
Maximum possible value within a cell of the confusion matrix.
If None, it will be set as the maximum value within the confusion matrix.
Used to choose the color from the colormap (default: None)
"""
_plot_confusion_matrix(self.confusion_matrix, show_text, figsize, cmap, textcolor, vmin, vmax)