import numpy as np from relations import Relations def pretty_print_eval_map(result_map): s,_ = pretty_print_eval_map_rec(result_map, '') return s def pretty_print_eval_map_rec(result_map, spaces): assert type(result_map) is dict if type(list(result_map.values())[0]) is str or type(list(result_map.values())[0]) is int: return '; '.join(str(k) + ': ' + str(v) for k,v in result_map.items()) + '\n', True s = '' for k,v in result_map.items(): pretty_v , is_leaf = pretty_print_eval_map_rec(v, spaces + ' ') if is_leaf: s += spaces + str(k) + ': ' + pretty_v else: s += spaces + str(k) + ': \n' + pretty_v return s, False def eval_classification( logger, task, valid_pos_path, valid_neg_path, test_pos_path, test_neg_path, vocab, score_fn, alphas_to_validate): """ Evaluates edge classification based on a scoring function. :param score_fn(alpha, parent_index, other_indices=None, rel_reversed): Function that scores each edge (u,v), v in other_nodes. The lower score means the higher the chance the edge exists. One example of such function is the score(is-a(u,v)) from the Poincare embeddings" paper. """ if valid_pos_path == test_pos_path: assert valid_neg_path == test_neg_path assert task == 'reconstruction' if task != 'reconstruction': assert valid_neg_path != test_neg_path results = {task: {}} res = results[task] valid_eval_obj = _EvalObj(logger, valid_pos_path, valid_neg_path, vocab) test_eval_obj = _EvalObj(logger, test_pos_path, test_neg_path, vocab) ###### Perform validation best_alpha = None best_valid_f1 = -1 best_optimal_th = None for alpha in alphas_to_validate: logger.info('now validating alpha = ' + str(alpha)) optimal_th, optimal_valid_f1 = \ valid_eval_obj.find_best_classification_thresh_F1(score_fn=score_fn, alpha=alpha) ##### Expensive if optimal_valid_f1 > best_valid_f1: best_valid_f1 = optimal_valid_f1 best_alpha = alpha best_optimal_th = optimal_th ###### Done validation best_key = 'alpha=' + str(best_alpha) res[best_key] = {} res[best_key]['VALID'] =\ valid_eval_obj.evaluate_classification(score_fn=score_fn, alpha=best_alpha, threshold=best_optimal_th) res[best_key]['TEST'] = \ test_eval_obj.evaluate_classification(score_fn=score_fn, alpha=best_alpha, threshold=best_optimal_th) best_test_f1 = res[best_key]['TEST']['f1'] best_valid_f1 = res[best_key]['VALID']['f1'] return results, best_alpha, float(best_optimal_th), float(best_test_f1), float(best_valid_f1) class _EvalObj(object): """Evaluating reconstruction on given network for given embedding.""" def __init__(self, logger, positive_rel_filepath, negative_rel_filepath, vocab): self.logger = logger self.pos_relations_parents = [] self.pos_relations_children = [] rels = Relations(positive_rel_filepath, reverse=False) for node_parent, node_child in rels: assert node_parent != node_child node_parent_idx = vocab[node_parent].index node_child_idx = vocab[node_child].index self.pos_relations_parents.append(node_parent_idx) self.pos_relations_children.append(node_child_idx) self.neg_relations_parents = [] self.neg_relations_children = [] rels = Relations(negative_rel_filepath, reverse=False) for node_parent, node_child in rels: assert node_parent != node_child node_parent_idx = vocab[node_parent].index node_child_idx = vocab[node_child].index self.neg_relations_parents.append(node_parent_idx) self.neg_relations_children.append(node_child_idx) logger.info('eval datasets file pos = ' + positive_rel_filepath + ' neg = ' + negative_rel_filepath + '; eval num rels pos = ' + str(len(self.pos_relations_parents)) + ' neg = ' + str(len(self.neg_relations_parents))) def evaluate_classification(self, score_fn, alpha, threshold): """Evaluates P, R, F1 and Acc for link prediction. Parameters ------- score_fn(alpha, parent_index, other_indices=None, rel_reversed) : Scores each edge (u,v), v in other_nodes. The lower score means the higher the chance the edge exists. The higher, the lower. threshold : Threshold for the scores. What is below is classified as an edge. What is above is not an edge. """ pos_scores = score_fn(alpha, self.pos_relations_parents, self.pos_relations_children, False) tp = (pos_scores <= threshold).sum() fn = (pos_scores > threshold).sum() neg_scores = score_fn(alpha, self.neg_relations_parents, self.neg_relations_children, False) fp = (neg_scores <= threshold).sum() precision = 100 * tp / (tp + fp + 1e-6) recall = 100 * tp / (tp + fn + 1e-6) f1 = 2 * precision * recall / (precision + recall + 1e-6) return {'precision': ('%.1f' % precision), 'recall': ('%.1f' % recall), 'f1': ('%.1f' % f1)} def find_best_classification_thresh_F1(self, score_fn, alpha): """Like in the Order Embeddings paper, we find the best classification threshold Parameters ------- score_fn(alpha, parent_index, other_indices=None, rel_reversed) : Scores each edge (u,v), v in other_nodes. The lower score means the higher the chance the edge exists. The higher, the lower. """ # Vector of type (label, score) for each edge or non-edge in our dataset. all_labels_and_scores = [] num_grd_trth_pos = len(self.pos_relations_parents) pos_scores = score_fn(alpha, self.pos_relations_parents, self.pos_relations_children, False) neg_scores = score_fn(alpha, self.neg_relations_parents, self.neg_relations_children, False) all_labels_and_scores.extend(zip(pos_scores, np.ones(len(pos_scores)))) all_labels_and_scores.extend(zip(neg_scores, np.zeros(len(neg_scores)))) # Sort scores. In case of equal scores, put the negatives (0-labels) first. all_labels_and_scores = sorted(all_labels_and_scores) #### Expensive, O(n * log n) tp = 0.0 fp = 0.0 best_th = 0.0 best_f1 = -1 for score,label in all_labels_and_scores: tp += label fp += (1.0 - label) precision = 100 * tp / (tp + fp + 1e-6) recall = 100 * tp / (num_grd_trth_pos) f1 = 2 * precision * recall / (precision + recall + 1e-6) if f1 > best_f1: best_f1 = f1 best_th = score return best_th, best_f1