post_processing.py

# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import json
import multiprocessing as mp
from tqdm import tqdm
from collections import defaultdict
import pickle as pkl

from utils import iou_with_anchors


def load_json(file):
    with open(file) as json_file:
        data = json.load(json_file)
        return data


def save_json(file, obj):
    with open(file, 'w') as json_file:
        json.dump(obj, json_file)


def save_pickle(file, obj):
    with open(file, 'wb') as pkl_file:
        pkl.dump(obj, pkl_file)


def get_durations(annotations):
    durations = {}
    for video_id, annot in annotations.items():
        video_name = video_id.split('-')[0]
        if video_id not in durations:
            durations[video_name] = annot['master_duration']
        else:
            assert annot['master_duration'] == durations[video_name]
    return durations


def getDatasetDict(annot_file, split):
    annotations = {
        vdo_id: anno for vdo_id, anno in load_json(annot_file)['database'].items()
        if anno['subset'] == split
    }

    return annotations


def standardize_results_anet(video_dict):
    result_dict = {
        'version': 'ACTIVITY_NET_1.3',
        'external_data': {
            'used': 'true',
            'details': 'Backbone 3D Network are trained on Kinetics training set.'
        },
        'results': {},
    }

    for video_id, results in video_dict.items():
        result_dict['results'][video_id] = results
    return result_dict


def standardize_results_thumos(video_dict):
    result_dict = {}

    for video_id, results in video_dict.items():
        result_dict[video_id] = np.array([[
            r['segment'][0],
            r['segment'][1],
            r['score']] for r in results
        ])
    return result_dict


class PostProcessor(object):
    def __init__(self, cfg, split):
        self.result_path = cfg.DATA.RESULT_PATH

        self.snms_alpha = cfg.BMN.POST_PROCESS.SOFT_NMS_ALPHA
        self.snms_t1 = cfg.BMN.POST_PROCESS.SOFT_NMS_LOW_THRESHOLD
        self.snms_t2 = cfg.BMN.POST_PROCESS.SOFT_NMS_HIGH_THRESHOLD
        self.nms_thresh = cfg.BMN.POST_PROCESS.HARD_NMS_THRESHOLD

        self.max_proposals = cfg.BMN.POST_PROCESS.MAX_PROPOSALS
        self.n_threads = cfg.BMN.POST_PROCESS.NUM_THREADS

        self.video_dict = getDatasetDict(cfg.DATA.ANNOTATION_FILE, split)
        self.video_list = list(self.video_dict.keys())  # [:100]
        self.result_dict = mp.Manager().dict()
        self.dataset = cfg.DATASET
        self.nms_func = self.hard_nms if cfg.BMN.POST_PROCESS.USE_HARD_NMS else self.soft_nms

        if self.dataset == 'anet':
            self.standardize_results = standardize_results_anet
            self.get_duration = self.get_duration_anet
            self.save_result = save_json
        elif self.dataset == 'thumos':
            self.standardize_results = standardize_results_thumos
            self.durations = get_durations(self.video_dict)
            self.get_duration = self.get_duration_thumos
            self.save_result = save_pickle

            self.video_groups = defaultdict(list)
            for video_name in self.video_list:
                self.video_groups[video_name.split('-')[0]].append(video_name)
            for group_name in self.video_groups:
                self.video_groups[group_name] = sorted(self.video_groups[group_name], key=lambda x: int(x.split('-')[-1]))

    def get_duration_anet(self, video_name):
        return self.video_dict[video_name]['duration']

    def get_duration_thumos(self, video_name):
        return self.durations[video_name]

    def hard_nms(self, df):
        '''
        df: proposals generated by network;
        '''
        tstart = list(df.xmin.values[:])
        tend = list(df.xmax.values[:])
        tscore = list(df.score.values[:])

        t1 = np.array(tstart)
        t2 = np.array(tend)
        scores = np.array(tscore)
        durations = t2 - t1
        order = scores.argsort()[::-1]
        keep = []
        while order.size > 0: # and len(keep) < self.max_proposals:
            i = order[0]
            keep.append(i)
            tt1 = np.maximum(t1[i], t1[order[1:]])
            tt2 = np.minimum(t2[i], t2[order[1:]])
            intersection = tt2 - tt1
            IoU = intersection / (durations[i] + durations[order[1:]] - intersection).astype(float)

            inds = np.where(IoU <= self.nms_thresh)[0]
            order = order[inds + 1]

        rscore = [tscore[i] for i in keep]
        rstart = [tstart[i] for i in keep]
        rend = [tend[i] for i in keep]
        newDf = pd.DataFrame()
        newDf['score'] = rscore
        newDf['xmin'] = rstart
        newDf['xmax'] = rend
        return newDf

    def soft_nms(self, df):
        '''
        df: proposals generated by network;
        alpha: alpha value of Gaussian decaying function;
        t1, t2: threshold for soft nms.
        '''
        df = df.sort_values(by="score", ascending=False)
        tstart = list(df.xmin.values[:])
        tend = list(df.xmax.values[:])
        tscore = list(df.score.values[:])

        rstart, rend, rscore = [], [], []

        while len(tscore) > 1 and len(rscore) <= self.max_proposals:
            max_index = tscore.index(max(tscore))
            tmp_iou_list = iou_with_anchors(
                np.array(tstart),
                np.array(tend), tstart[max_index], tend[max_index])
            for idx in range(0, len(tscore)):
                if idx != max_index:
                    tmp_iou = tmp_iou_list[idx]
                    tmp_width = tend[max_index] - tstart[max_index]
                    if tmp_iou > self.snms_t1 + (self.snms_t2 - self.snms_t1) * tmp_width:
                        tscore[idx] = tscore[idx] * np.exp(-np.square(tmp_iou) / self.snms_alpha)

            rstart.append(tstart[max_index])
            rend.append(tend[max_index])
            rscore.append(tscore[max_index])
            tstart.pop(max_index)
            tend.pop(max_index)
            tscore.pop(max_index)

        newDf = pd.DataFrame()
        newDf['score'] = rscore
        newDf['xmin'] = rstart
        newDf['xmax'] = rend
        return newDf

    def video_post_process(self, video_list):
        for video_name in video_list:
            df = pd.read_feather("./results/outputs/" + video_name + ".feather")

            if len(df) > 1:
                df = self.nms_func(df)

            df = df.sort_values(by="score", ascending=False)
            video_duration = self.get_duration(video_name)
            proposal_list = []

            for j in range(min(self.max_proposals, len(df))):
                tmp_proposal = {}
                tmp_proposal["score"] = df.score.values[j]
                tmp_proposal["segment"] = [
                    max(0, df.xmin.values[j]) * video_duration,
                    min(1, df.xmax.values[j]) * video_duration
                ]
                proposal_list.append(tmp_proposal)
            self.result_dict[video_name] = proposal_list

    def __call__(self):
        if self.dataset == 'thumos':
            video_lengths = {}
            for group_name, video_sequence in self.video_groups.items():
                video_df = [
                    pd.read_feather('./results/outputs/' + video_name + '.feather')
                    for video_name in video_sequence]
                video_df = pd.concat(video_df, ignore_index=True)
                video_df.to_feather('./results/outputs/' + group_name + '.feather')
                video_lengths[group_name] = len(video_df)
            video_list = sorted(self.video_groups.keys(), key=lambda name: video_lengths[name], reverse=True)

            processes = []
            for video_name in tqdm(video_list):
                if len(processes) < self.n_threads:
                    processes.append(mp.Process(target=self.video_post_process, args=([video_name],)))
                    processes[-1].start()
                else:
                    process_done = False
                    while not process_done:
                        for j in range(self.n_threads):
                            if not processes[j].is_alive():
                                processes[j].join()
                                processes[j] = mp.Process(target=self.video_post_process, args=([video_name],))
                                processes[j].start()
                                process_done = True
                                break
            for p in processes:
                p.join()

        elif self.dataset == 'anet':
            video_list = self.video_list
            linspace = np.linspace(0, len(video_list), self.n_threads + 1)
            thrd_segms = [(int(linspace[i]), int(linspace[i + 1])) for i in range(self.n_threads)]

            processes = []
            for s_thrd, e_thrd in thrd_segms:
                tmp_video_list = video_list[s_thrd:e_thrd]
                processes.append(mp.Process(target=self.video_post_process, args=(tmp_video_list,)))
                processes[-1].start()
            for p in processes:
                p.join()

        self.result_dict = self.standardize_results(dict(self.result_dict))
        self.save_result(self.result_path, self.result_dict)