Spaces:

Amossofer
/

test2

Runtime error

App Files Files Community

Amossofer commited on Aug 3

Commit

e45fce6

1 Parent(s): 8756636

tt

Browse files

Files changed (1) hide show

app.py +16 -28

app.py CHANGED Viewed

@@ -11,40 +11,28 @@ model = AutoModelForCausalLM.from_pretrained(
 )
 model.eval()
-def blend_generate(sysA, sysB, wA, wB, user_message, max_new_tokens, temperature, top_p):
-    promptA = f"<|system|>{sysA}\n<|user|>{user_message}\n<|assistant|>"
-    promptB = f"<|system|>{sysB}\n<|user|>{user_message}\n<|assistant|>"
-    idsA = tokenizer(promptA, return_tensors="pt").input_ids.to(model.device)
-    idsB = tokenizer(promptB, return_tensors="pt").input_ids.to(model.device)
-    outA, outB = idsA.clone(), idsB.clone()
-    response = ""
-    for _ in range(max_new_tokens):
-        with torch.no_grad():
-            logitsA = model(input_ids=outA).logits[:, -1, :]
-            logitsB = model(input_ids=outB).logits[:, -1, :]
-        blended = wA * logitsA + wB * logitsB
-        blended = blended / temperature
-        probs = F.softmax(blended, dim=-1)
-        sorted_probs, sorted_idx = torch.sort(probs, descending=True)
-        cum = torch.cumsum(sorted_probs, dim=-1)
-        sorted_probs[cum > top_p] = 0
-        sorted_probs = sorted_probs / sorted_probs.sum(dim=-1, keepdim=True)
-        token = sorted_idx[:, torch.multinomial(sorted_probs, 1)].squeeze()
-        outA = torch.cat([outA, token.unsqueeze(0).unsqueeze(0)], dim=1)
-        outB = torch.cat([outB, token.unsqueeze(0).unsqueeze(0)], dim=1)
-        token_str = tokenizer.decode(token)
-        response += token_str
-        yield response
-        if token.item() == tokenizer.eos_token_id:
-            break
 with gr.Blocks() as demo:
     gr.Markdown("## Blended Prompt Chat (TinyLlama)")

 )
 model.eval()
+def blend_generate(prompt, wa, wb):
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+    with torch.no_grad():
+        output_a = model_a(input_ids)
+        output_b = model_b(input_ids)
+    logits_a = output_a.logits[:, -1, :]
+    logits_b = output_b.logits[:, -1, :]
+    # Weighted sum of raw logits (before softmax)
+    blended_logits = wa * logits_a + wb * logits_b
+    # Apply softmax safely to get valid probability distribution
+    probs = torch.softmax(blended_logits, dim=-1)
+    # Sample token from valid probability distribution
+    token = torch.multinomial(probs, 1)
+    next_token_id = token.item()
+    next_token = tokenizer.decode([next_token_id])
+    return next_token
 with gr.Blocks() as demo:
     gr.Markdown("## Blended Prompt Chat (TinyLlama)")