Source code for pyActLearn.performance.event

""" Event-based Performance Metrics

This file implements event-based performance metrics for activity recognition.

Reference:

    - Minnen, David, Tracy Westeyn, Thad Starner, J. Ward, and Paul Lukowicz. Performance metrics and evaluation
      issues for continuous activity recognition. Performance Metrics for Intelligent Systems 4 (2006).
    - Ward, J. A., Lukowicz, P. & Gellersen, H. W. Performance metrics for activity recognition. ACM Trans. Intell. 
      Syst. Technol. 2, 6:16:23 (2011).
"""
import logging
import numpy as np

logger = logging.getLogger(__name__)


[docs]def per_class_event_scoring(num_classes, truth, prediction, truth_scoring, prediction_scoring):
    """Create per-class event scoring to identify the contribution of event-based errors to the traditional recall
    and false-positive rate.
    
    Instead of doing an EAD as proposed in previous two papers, we look at **Recall** and **FPR** separately.
    
    **Recall** is defined as TP/(TP + FN). In another word, how often does it predict yes when it's actually yes?
    The errors in the false negatives, such as Deletion, Fragmenting, and Underfill, adds up to the FP. A Deletion
    means a total miss of an activity. Underfill represents an error on the begin and end boundary of the event.
    Fragmenting represents a glitch in the prediction.
    
    **Precision** is defined as TP/(TP + FP). In another word, how often is it a yes when it is predicted yes?
    The error in the false positives, such as Insertion, Merge and Overfill, adds up to the
    FP. In the task of ADL recognition, insertion may be caused by human error in labeling. Overfill represents a
    disagreement of the begin/end boundary of an activity, but the merge is a glitch in the prediction.
    
    The function goes through the scoring of prediction and ground truth - and returns two dictionary that summaries
    the contribution of all those errors to **Recall** and **False Positive Rate** scores.
    
    Args:
        num_classes (:obj:`int`): Total number of target classes
        truth (:obj:`numpy.ndarray`): Ground truth array, shape (num_samples, )
        prediction (:obj:`numpy.ndarray`): Prediction array, shape (num_samples, )
        truth_scoring (:obj:`numpy.ndarray`): Event scoring with respect to ground truth labels (i.e. false negatives
            are further divided into Deletion, Fragmenting, and Underfill). The information in this array is used to 
            fill **Recall** measurement.
        prediction_scoring (:obj:`numpy.ndarray`): Event scoring with respect to prediction labels (i.e. false positives
            are further divided into Insertion, Merging and Overfill). The information in this array is used to fill
            **Precision** measurement.

    Returns:
        :obj:`tuple` of :obj:`numpy.ndarray`: 
            Tuple of event-based scoring summarie for recall and precision.
            Each summary array has a shape of (num_classes, ).
    """
    recall_array = np.zeros((num_classes,),
                           dtype=np.dtype([
                               ('C', np.int, 1),
                               ('D', np.int, 1),
                               ('F', np.int, 1),
                               ('U', np.int, 1),
                               ('u', np.int, 1)])
                           )
    fpr_array = np.zeros((num_classes,),
                        dtype=np.dtype([
                            ('C', np.int, 1),
                            ('I', np.int, 1),
                            ('M', np.int, 1),
                            ('O', np.int, 1),
                            ('o', np.int, 1)])
                        )
    for i in range(truth_scoring.shape[0]):
        recall_array[truth[i]][truth_scoring[i]] += 1
        fpr_array[prediction[i]][prediction_scoring[i]] += 1
    return recall_array, fpr_array


[docs]def per_class_segment_scoring(num_classes, truth, prediction, truth_scoring, prediction_scoring):
    """Create per-class event scoring to identify the contribution of event-based errors to the traditional recall
    and false-positive rate. The count is based on each event segment instead of each sensor event.

    Args:
        num_classes (:obj:`int`): Total number of target classes
        truth (:obj:`numpy.ndarray`): Ground truth array, shape (num_samples, )
        prediction (:obj:`numpy.ndarray`): Prediction array, shape (num_samples, )
        truth_scoring (:obj:`numpy.ndarray`): Event scoring with respect to ground truth labels (i.e. false negatives
            are further divided into Deletion, Fragmenting, and Underfill). The information in this array is used to 
            fill **Recall** measurement.
        prediction_scoring (:obj:`numpy.ndarray`): Event scoring with respect to prediction labels (i.e. false positives
            are further divided into Insertion, Merging and Overfill). The information in this array is used to fill
            **Precision** measurement.

    Returns:
        :obj:`tuple` of :obj:`numpy.ndarray`: 
            Tuple of event-based scoring summarie for recall and precision.
            Each summary array has a shape of (num_classes, ).
    """
    # Total Segments
    total_segs = 0
    seg_logs = np.zeros((num_classes,))

    recall_array = np.zeros((num_classes,),
                            dtype=np.dtype([
                                ('C', np.int, 1),
                                ('D', np.int, 1),
                                ('F', np.int, 1),
                                ('U', np.int, 1),
                                ('u', np.int, 1)])
                            )
    fpr_array = np.zeros((num_classes,),
                         dtype=np.dtype([
                             ('C', np.int, 1),
                             ('I', np.int, 1),
                             ('M', np.int, 1),
                             ('O', np.int, 1),
                             ('o', np.int, 1)])
                         )
    prev_prediction = prediction[0]
    prev_prediction_scoring = prediction_scoring[0]
    prev_truth = truth[0]
    prev_truth_scoring = truth_scoring[0]
    seg_correct = 0
    seg_delete = 0
    for i in range(truth_scoring.shape[0]):
        cur_prediction = prediction[i]
        cur_prediction_scoring = prediction_scoring[i]
        cur_truth = truth[i]
        cur_truth_scoring = truth_scoring[i]
        # Update Counts
        if cur_truth_scoring != prev_truth_scoring or cur_truth != prev_truth:
            if prev_truth_scoring == 'C':
                seg_correct = 1
            elif prev_truth_scoring == 'D':
                seg_delete = 1
            else:
                recall_array[prev_truth][prev_truth_scoring] += 1
        # Add counts to array
        if cur_prediction != prev_prediction or cur_prediction_scoring != prev_prediction_scoring:
            fpr_array[prev_prediction][prev_prediction_scoring] += 1
        # Update array counts
        if cur_truth != prev_truth:
            # DEBUG
            total_segs += 1
            seg_logs[prev_truth] += 1
            if (seg_correct == 0 and seg_delete == 0) or (seg_correct == 1 and seg_delete == 1):
                if prev_truth != 7:
                    logger.debug('i: %d' % i)
                    logger.debug('truth      : %s' % str(truth[i-10:i+10]))
                    logger.debug('predi      : %s' % str(prediction[i-10:i+10]))
                    logger.debug('truth_score: %s' % str(truth_scoring[i - 10:i + 10]))
            # END_DEBUG
            recall_array[prev_truth]['C'] += seg_correct
            recall_array[prev_truth]['D'] += seg_delete
            seg_correct = 0
            seg_delete = 0
        prev_prediction = cur_prediction
        prev_prediction_scoring = cur_prediction_scoring
        prev_truth = cur_truth
        prev_truth_scoring = cur_truth_scoring
    # Final Update
    recall_array[prev_truth]['C'] += seg_correct
    fpr_array[prev_prediction][prev_prediction_scoring] += 1
    # Clear Underfill, Overfill, Segment and Merge
    for i in range(num_classes):
        recall_array[i]['U'] = 0
        recall_array[i]['u'] = 0
        recall_array[i]['F'] = 0
        fpr_array[i]['O'] = 0
        fpr_array[i]['o'] = 0
        fpr_array[i]['M'] = 0
    #DEBUG
    logger.debug('Total Seg: %d' % total_segs)
    logger.debug('seg_logs: %s' % str(seg_logs))
    return recall_array, fpr_array


[docs]def score_segment(truth, prediction, bg_label=-1):
    r""" Score Segments
    
    According to [Minnen2006]_ and [Ward2011]_, a segment is defined as the largest part of an event on which
    the comparison between the ground truth and the output of recognition system can be made in an unambiguous
    way. However, in this piece of code, we remove the limit where the segment is the largest part of an event.
    As long as there is a match between prediction and ground truth, it is recognized as a segment.
    
    There are four possible outcomes to be scored: TP, TN, FP and FN. In event-based performance scoring, the FP and
    FN are further divided to the following cases:
    
    - Insertion (I): A FP that corresponds exactly to an inserted return.
    - Merge (M): A FP that occurs between two TP segments within a merge return.
    - Overfill (O): A FP that occurs at the start or end of a partially matched return.
    - Deletion (D): A FN that corresponds exactly to a deleted evjmk, ent.
    - Fragmenting (F): A FN that corresponds exactly to a deleted event.
    - Underfill (U): A FN that occurs at the start or end of a detected event.
    
    Args:
        truth (:obj:`numpy.ndarray`): Ground truth
        prediction (:obj:`numpy.ndarray`): prediction
        bg_label (:obj:`numpy.ndarray`): Background label
    
    Returns:
        :obj:`numpy.ndarray`: An array with truth and event-based scoring labels
    """
    # Sanity Check
    assert(truth.shape == prediction.shape)
    # Prepare Scoring
    truth_score = np.empty(
        (truth.shape[0],),
        dtype=np.unicode
    )
    prediction_score = np.empty(
        (truth.shape[0],),
        dtype=np.unicode
    )
    # Find next segmentation
    seg_start = 0
    seg_stop = 0
    while seg_stop < truth.size:
        seg_stop = _next_segment(truth, seg_start)
        # Score the segment
        # 1. Find if there is correct labels there
        _score_specified_segment(truth, prediction, seg_start, seg_stop, truth_score, prediction_score, bg_label)
        seg_start = seg_stop
    return truth_score, prediction_score


def _next_segment(truth, start_index):
    """ Find the end of the segment
    
    Args:
        truth (:obj:`numpy.ndarray`): Ground truth
        start_index (:obj:`int`): start index of current segment
        
    Returns:
        :obj:`int`: end index of current segment
    """
    stop_index = start_index + 1
    while stop_index < truth.size:
        if truth[stop_index] == truth[start_index]:
            stop_index += 1
        else:
            return stop_index
    return stop_index


def _score_specified_segment(truth, prediction, start, stop, truth_score, prediction_score, bg_label=-1):
    """Score a given segment
    """
    # Find if the activity of this segment is correctly picked up by prediction
    seg_label = truth[start]
    # Label correct items
    num_correct_items = 0
    correct_seg_list = []
    correct_seg_start = start
    correct_seg_stop = start
    for i in range(start, stop):
        truth_score[i] = seg_label
        prediction_score[i] = prediction[i]
        if prediction[i] == seg_label:
            truth_score[i] = 'C'.encode('utf-8')
            prediction_score[i] = 'C'.encode('utf-8')
            num_correct_items += 1
            correct_seg_stop = i+1
        else:
            # For the truth class, it is a false negative - default to Deletion (D)
            truth_score[i] = 'D'.encode('utf-8')
            # For the prediction class, it is a false positive - default to Insertion (I)
            prediction_score[i] = 'I'.encode('utf-8')
            # Populate correct segment list
            if correct_seg_stop > correct_seg_start:
                correct_seg_list.append((correct_seg_start, correct_seg_stop))
            correct_seg_start = i + 1
    if truth_score[stop - 1] == 'C':
        # Add correct segment
        correct_seg_list.append((correct_seg_start, correct_seg_stop))
    # If the prediction got the segment completely wrong, scoring finished.
    if num_correct_items == 0 or seg_label == bg_label:
        return
    # Otherwise, go through the second time and identify the cause of the error
    # Overfill (O) is part of false positive (prediction label)
    if prediction[start] == seg_label:  # Check Overfill at the beginning
        i = start - 1
        while i >= 0:
            if prediction[i] == seg_label:
                prediction_score[i] = 'O'.encode('utf-8')
                i -= 1
            else:
                break
    if prediction[stop - 1] == seg_label:  # Check Overfill at the end
        i = stop
        while i < truth.size:
            if prediction[i] == seg_label:
                prediction_score[i] = 'o'.encode('utf-8')
                i += 1
            else:
                break
    # Underfill (U) is part of false negative (related to truth)
    if prediction[start] != seg_label:
        i = start
        while i < stop:
            if prediction[i] == seg_label:
                break
            else:
                truth_score[i] = 'U'.encode('utf-8')
                i += 1
    if prediction[stop - 1] != seg_label:
        i = stop - 1
        while i >= start:
            if prediction[i] == seg_label:
                break
            else:
                truth_score[i] = 'u'.encode('utf-8')
                i -= 1
    # Merge and Fragment occur between two TP segments
    # Handle Fragment and Merge
    if len(correct_seg_list) > 1:
        for i in range(len(correct_seg_list) - 1):
            tmp_start = correct_seg_list[i][1]
            tmp_stop = correct_seg_list[i+1][0]
            for j in range(tmp_start, tmp_stop):
                # For the truth class, it is a false negative - so it is Fragment (F)
                truth_score[j] = 'F'.encode('utf-8')
                # For the prediction class, it is a false positive - so it is Merge (M)
                prediction_score[j] = 'M'.encode('utf-8')