Spaces:
Runtime error
Runtime error
| from nltk.translate.bleu_score import corpus_bleu | |
| from nltk.translate.meteor_score import meteor_score | |
| from rouge_score import rouge_scorer | |
| from tqdm import tqdm | |
| import numpy as np | |
| import json | |
| from transformers import AutoTokenizer | |
| def caption_evaluate(predictions, targets, tokenizer, text_trunc_length): | |
| meteor_scores = [] | |
| references = [] | |
| hypotheses = [] | |
| for gt, out in tqdm(zip(targets, predictions)): | |
| gt_tokens = tokenizer.tokenize(gt, truncation=True, max_length=text_trunc_length, | |
| padding='max_length') | |
| gt_tokens = list(filter(('[PAD]').__ne__, gt_tokens)) | |
| gt_tokens = list(filter(('[CLS]').__ne__, gt_tokens)) | |
| gt_tokens = list(filter(('[SEP]').__ne__, gt_tokens)) | |
| out_tokens = tokenizer.tokenize(out, truncation=True, max_length=text_trunc_length, | |
| padding='max_length') | |
| out_tokens = list(filter(('[PAD]').__ne__, out_tokens)) | |
| out_tokens = list(filter(('[CLS]').__ne__, out_tokens)) | |
| out_tokens = list(filter(('[SEP]').__ne__, out_tokens)) | |
| references.append([gt_tokens]) | |
| hypotheses.append(out_tokens) | |
| mscore = meteor_score([gt_tokens], out_tokens) | |
| meteor_scores.append(mscore) | |
| bleu2 = corpus_bleu(references, hypotheses, weights=(.5,.5)) | |
| bleu4 = corpus_bleu(references, hypotheses, weights=(.25,.25,.25,.25)) | |
| bleu2 *= 100 | |
| bleu4 *= 100 | |
| print('BLEU-2 score:', bleu2) | |
| print('BLEU-4 score:', bleu4) | |
| _meteor_score = np.mean(meteor_scores) | |
| _meteor_score *= 100 | |
| print('Average Meteor score:', _meteor_score) | |
| scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL']) | |
| rouge_scores = [] | |
| references = [] | |
| hypotheses = [] | |
| for gt, out in tqdm(zip(targets, predictions)): | |
| rs = scorer.score(out, gt) | |
| rouge_scores.append(rs) | |
| print('ROUGE score:') | |
| rouge_1 = np.mean([rs['rouge1'].fmeasure for rs in rouge_scores]) * 100 | |
| rouge_2 = np.mean([rs['rouge2'].fmeasure for rs in rouge_scores]) * 100 | |
| rouge_l = np.mean([rs['rougeL'].fmeasure for rs in rouge_scores]) * 100 | |
| print('rouge1:', rouge_1) | |
| print('rouge2:', rouge_2) | |
| print('rougeL:', rouge_l) | |
| return bleu2, bleu4, rouge_1, rouge_2, rouge_l, _meteor_score | |
| class AttrDict(dict): | |
| def __init__(self, *args, **kwargs): | |
| super(AttrDict, self).__init__(*args, **kwargs) | |
| self.__dict__ = self | |
| def get_tokens_as_list(tokenizer, word_list): | |
| "Converts a sequence of words into a list of tokens" | |
| "Source: https://huggingface.co/docs/transformers/internal/generation_utils" | |
| tokens_list = [] | |
| for word in word_list: | |
| tokenized_word = tokenizer([word], add_special_tokens=False).input_ids[0] | |
| tokens_list.extend(tokenized_word) | |
| return tokens_list | |
| def get_not_allowed_tokens_ids(tokenizer_name, allowed_words_file='model/allowed_words.json'): | |
| tokenizer_with_prefix_space = AutoTokenizer.from_pretrained(tokenizer_name, add_prefix_space=True) | |
| with open(allowed_words_file, 'r') as f: | |
| allowed_words = json.load(f) | |
| allowed_words = list(allowed_words.values()) | |
| allowed_tokens_ids = get_tokens_as_list(tokenizer_with_prefix_space, allowed_words) | |
| full_token_space = list(range(tokenizer_with_prefix_space.vocab_size)) | |
| not_allowed_tokens_ids = [[token_id] for token_id in full_token_space if token_id not in allowed_tokens_ids] | |
| return not_allowed_tokens_ids |