File size: 1,047 Bytes
834d42f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import logging

logger = logging.getLogger('stancedatasets')


def format_masked_lm_prompt(stance_df, tokenizer, prompt_type, lang='en', masked_lm_tokens=1):
    masks_str = "".join([tokenizer.mask_token] * masked_lm_tokens)
    if lang == 'pl':
        if prompt_type == 1:
            return list(stance_df['text']), \
                list('Moja postawa w kierunku ' + stance_df['target'] + ' jest: ' + masks_str + '.')
        elif prompt_type == 2:
            return list(stance_df['text']), \
                list('Moja postawa w kierunku ' + stance_df['target'] + ' jest ' + masks_str + '.')
        elif prompt_type == 3:
            return list(stance_df['text']), \
                list('Więc moja postawa w kierunku ' + stance_df['target'] + ' jest: ' + masks_str + '.')
        elif prompt_type == 4:
            return list(stance_df['text']), \
                list('Więc moja postawa w kierunku ' + stance_df['target'] + ' jest ' + masks_str + '.')

    raise ValueError(f'unknown prompt_type: {prompt_type} for language {lang}')