Spaces:

5to9
/

bot-royale

Running on Zero

5to9 commited on Sep 30, 2024

Commit

c322f22

1 Parent(s): c20285d

0.48 set eos_token

Files changed (1) hide show

app.py CHANGED Viewed

@@ -110,8 +110,10 @@ def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_token
         if "Pharia" in model_info[0]['id']:
             formatted_conversation = apply_pharia_template(messages=new_messages_a, add_generation_prompt=True)
             tokenized = tokenizer_a(formatted_conversation, return_tensors="pt").to(device)
-            logging.debug(tokenized)
             input_ids_a = tokenized.input_ids
         else:
             input_ids_a = tokenizer_a.apply_chat_template(
                 new_messages_a,
@@ -122,9 +124,11 @@ def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_token
         if "Pharia" in model_info[1]['id']:
             formatted_conversation = apply_pharia_template(messages=new_messages_a, add_generation_prompt=True)
-            tokenized = tokenizer_a(formatted_conversation, return_tensors="pt").to(device)
             logging.debug(tokenized)
             input_ids_b = tokenized.input_ids
         else:
             input_ids_b = tokenizer_b.apply_chat_template(
                 new_messages_b,
@@ -132,6 +136,7 @@ def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_token
                 dtype=torch.float16,
                 return_tensors="pt"
             ).to(device)
         generation_kwargs_a = dict(
             input_ids=input_ids_a,

         if "Pharia" in model_info[0]['id']:
             formatted_conversation = apply_pharia_template(messages=new_messages_a, add_generation_prompt=True)
             tokenized = tokenizer_a(formatted_conversation, return_tensors="pt").to(device)
+            logging.debug(tokenized) #attention_mask
             input_ids_a = tokenized.input_ids
+            logging.debug(f'tokenizer_a.eos_token was: {tokenizer_a.eos_token}')
+            tokenizer_a.eos_token = "<|endoftext|>"
         else:
             input_ids_a = tokenizer_a.apply_chat_template(
                 new_messages_a,
         if "Pharia" in model_info[1]['id']:
             formatted_conversation = apply_pharia_template(messages=new_messages_a, add_generation_prompt=True)
+            tokenized = tokenizer_b(formatted_conversation, return_tensors="pt").to(device)
             logging.debug(tokenized)
             input_ids_b = tokenized.input_ids
+            logging.debug(f'tokenizer_b.eos_token was: {tokenizer_b.eos_token}')
+            tokenizer_b.eos_token = "<|endoftext|>"
         else:
             input_ids_b = tokenizer_b.apply_chat_template(
                 new_messages_b,
                 dtype=torch.float16,
                 return_tensors="pt"
             ).to(device)
         generation_kwargs_a = dict(
             input_ids=input_ids_a,