File size: 947 Bytes
fbdcb75
78047da
fbdcb75
 
78047da
151d72b
d016c3b
 
 
1d29ee7
d016c3b
 
 
 
 
 
 
 
1d29ee7
d016c3b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from transformers import BertTokenizer, BertForSequenceClassification

tokenizer = BertTokenizer.from_pretrained('juridics/bertimbaulaw-base-portuguese-sts-scale')
model = BertForSequenceClassification.from_pretrained('juridics/bertimbaulaw-base-portuguese-sts-scale')

def generate_answers(query):
    inputs = tokenizer(query, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
    attention_mask = inputs['attention_mask']
    input_ids = inputs['input_ids']
    
    generated_ids = model.generate(
        input_ids,
        attention_mask=attention_mask,
        max_length=len(input_ids[0]) + 100,  # Aumentar o limite de geração
        temperature=0.7,  # Ajustar a criatividade
        top_p=0.9,  # Usar nucleus sampling
        no_repeat_ngram_size=2  # Evitar repetições desnecessárias
    )
    
    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    return generated_text