Spaces:
Sleeping
Sleeping
0.48 set eos_token
Browse files
app.py
CHANGED
@@ -110,8 +110,10 @@ def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_token
|
|
110 |
if "Pharia" in model_info[0]['id']:
|
111 |
formatted_conversation = apply_pharia_template(messages=new_messages_a, add_generation_prompt=True)
|
112 |
tokenized = tokenizer_a(formatted_conversation, return_tensors="pt").to(device)
|
113 |
-
logging.debug(tokenized)
|
114 |
input_ids_a = tokenized.input_ids
|
|
|
|
|
115 |
else:
|
116 |
input_ids_a = tokenizer_a.apply_chat_template(
|
117 |
new_messages_a,
|
@@ -122,9 +124,11 @@ def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_token
|
|
122 |
|
123 |
if "Pharia" in model_info[1]['id']:
|
124 |
formatted_conversation = apply_pharia_template(messages=new_messages_a, add_generation_prompt=True)
|
125 |
-
tokenized =
|
126 |
logging.debug(tokenized)
|
127 |
input_ids_b = tokenized.input_ids
|
|
|
|
|
128 |
else:
|
129 |
input_ids_b = tokenizer_b.apply_chat_template(
|
130 |
new_messages_b,
|
@@ -132,6 +136,7 @@ def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_token
|
|
132 |
dtype=torch.float16,
|
133 |
return_tensors="pt"
|
134 |
).to(device)
|
|
|
135 |
|
136 |
generation_kwargs_a = dict(
|
137 |
input_ids=input_ids_a,
|
|
|
110 |
if "Pharia" in model_info[0]['id']:
|
111 |
formatted_conversation = apply_pharia_template(messages=new_messages_a, add_generation_prompt=True)
|
112 |
tokenized = tokenizer_a(formatted_conversation, return_tensors="pt").to(device)
|
113 |
+
logging.debug(tokenized) #attention_mask
|
114 |
input_ids_a = tokenized.input_ids
|
115 |
+
logging.debug(f'tokenizer_a.eos_token was: {tokenizer_a.eos_token}')
|
116 |
+
tokenizer_a.eos_token = "<|endoftext|>"
|
117 |
else:
|
118 |
input_ids_a = tokenizer_a.apply_chat_template(
|
119 |
new_messages_a,
|
|
|
124 |
|
125 |
if "Pharia" in model_info[1]['id']:
|
126 |
formatted_conversation = apply_pharia_template(messages=new_messages_a, add_generation_prompt=True)
|
127 |
+
tokenized = tokenizer_b(formatted_conversation, return_tensors="pt").to(device)
|
128 |
logging.debug(tokenized)
|
129 |
input_ids_b = tokenized.input_ids
|
130 |
+
logging.debug(f'tokenizer_b.eos_token was: {tokenizer_b.eos_token}')
|
131 |
+
tokenizer_b.eos_token = "<|endoftext|>"
|
132 |
else:
|
133 |
input_ids_b = tokenizer_b.apply_chat_template(
|
134 |
new_messages_b,
|
|
|
136 |
dtype=torch.float16,
|
137 |
return_tensors="pt"
|
138 |
).to(device)
|
139 |
+
|
140 |
|
141 |
generation_kwargs_a = dict(
|
142 |
input_ids=input_ids_a,
|