5to9 commited on
Commit
c322f22
·
1 Parent(s): c20285d

0.48 set eos_token

Browse files
Files changed (1) hide show
  1. app.py +7 -2
app.py CHANGED
@@ -110,8 +110,10 @@ def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_token
110
  if "Pharia" in model_info[0]['id']:
111
  formatted_conversation = apply_pharia_template(messages=new_messages_a, add_generation_prompt=True)
112
  tokenized = tokenizer_a(formatted_conversation, return_tensors="pt").to(device)
113
- logging.debug(tokenized)
114
  input_ids_a = tokenized.input_ids
 
 
115
  else:
116
  input_ids_a = tokenizer_a.apply_chat_template(
117
  new_messages_a,
@@ -122,9 +124,11 @@ def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_token
122
 
123
  if "Pharia" in model_info[1]['id']:
124
  formatted_conversation = apply_pharia_template(messages=new_messages_a, add_generation_prompt=True)
125
- tokenized = tokenizer_a(formatted_conversation, return_tensors="pt").to(device)
126
  logging.debug(tokenized)
127
  input_ids_b = tokenized.input_ids
 
 
128
  else:
129
  input_ids_b = tokenizer_b.apply_chat_template(
130
  new_messages_b,
@@ -132,6 +136,7 @@ def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_token
132
  dtype=torch.float16,
133
  return_tensors="pt"
134
  ).to(device)
 
135
 
136
  generation_kwargs_a = dict(
137
  input_ids=input_ids_a,
 
110
  if "Pharia" in model_info[0]['id']:
111
  formatted_conversation = apply_pharia_template(messages=new_messages_a, add_generation_prompt=True)
112
  tokenized = tokenizer_a(formatted_conversation, return_tensors="pt").to(device)
113
+ logging.debug(tokenized) #attention_mask
114
  input_ids_a = tokenized.input_ids
115
+ logging.debug(f'tokenizer_a.eos_token was: {tokenizer_a.eos_token}')
116
+ tokenizer_a.eos_token = "<|endoftext|>"
117
  else:
118
  input_ids_a = tokenizer_a.apply_chat_template(
119
  new_messages_a,
 
124
 
125
  if "Pharia" in model_info[1]['id']:
126
  formatted_conversation = apply_pharia_template(messages=new_messages_a, add_generation_prompt=True)
127
+ tokenized = tokenizer_b(formatted_conversation, return_tensors="pt").to(device)
128
  logging.debug(tokenized)
129
  input_ids_b = tokenized.input_ids
130
+ logging.debug(f'tokenizer_b.eos_token was: {tokenizer_b.eos_token}')
131
+ tokenizer_b.eos_token = "<|endoftext|>"
132
  else:
133
  input_ids_b = tokenizer_b.apply_chat_template(
134
  new_messages_b,
 
136
  dtype=torch.float16,
137
  return_tensors="pt"
138
  ).to(device)
139
+
140
 
141
  generation_kwargs_a = dict(
142
  input_ids=input_ids_a,