Spaces:

Chris4K
/

chained

Runtime error

App Files Files Community

Chris4K commited on Jun 28, 2023

Commit

a0dfe96

•

1 Parent(s): 2bca17a

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -1

app.py CHANGED Viewed

@@ -243,6 +243,84 @@ app = gr.Interface(
     inputs=["text", "checkbox", gr.Slider(0, 100)],
     outputs=["text", "number"],
 )
 #app.launch()
 ####################
@@ -270,11 +348,12 @@ def topic_sale_inform (text):
 #conversation = Conversation("Welcome")
 def callChains(current_message):
     sentiment_analysis_result = pipeline_predict_sentiment(current_message)
     topic_sale_inform_result = topic_sale_inform(current_message)
     #conversation.append_response("The Big lebowski.")
     #conversation.add_user_input("Is it good?")
-    final_answer = func(current_message)
     return final_answer, sentiment_analysis_result, topic_sale_inform_result

     inputs=["text", "checkbox", gr.Slider(0, 100)],
     outputs=["text", "number"],
 )
+####
+from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, TextIteratorStreamer
+model_id = "philschmid/instruct-igel-001"
+model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+prompt_template = f"### Anweisung:\n{{input}}\n\n### Antwort:"
+def generate(instruction, temperature=1.0, max_new_tokens=256, top_p=0.9, length_penalty=1.0):
+    formatted_instruction = prompt_template.format(input=instruction)
+    # make sure temperature top_p and length_penalty are floats
+    temperature = float(temperature)
+    top_p = float(top_p)
+    length_penalty = float(length_penalty)
+    # COMMENT IN FOR NON STREAMING
+    # generation_config = GenerationConfig(
+    #     do_sample=True,
+    #     top_p=top_p,
+    #     top_k=0,
+    #     temperature=temperature,
+    #     max_new_tokens=max_new_tokens,
+    #     early_stopping=True,
+    #     length_penalty=length_penalty,
+    #     eos_token_id=tokenizer.eos_token_id,
+    #     pad_token_id=tokenizer.pad_token_id,
+    # )
+    # input_ids = tokenizer(
+    #     formatted_instruction, return_tensors="pt", truncation=True, max_length=2048
+    # ).input_ids.cuda()
+    # with torch.inference_mode(), torch.autocast("cuda"):
+    #     outputs = model.generate(input_ids=input_ids, generation_config=generation_config)[0]
+    # output = tokenizer.decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)
+    # return output.split("### Antwort:\n")[1]
+    # STREAMING BASED ON git+https://github.com/gante/transformers.git@streamer_iterator
+    # streaming
+    streamer = TextIteratorStreamer(tokenizer)
+    model_inputs = tokenizer(formatted_instruction, return_tensors="pt", truncation=True, max_length=2048)
+    # move to gpu
+    model_inputs = {k: v.to(device) for k, v in model_inputs.items()}
+    generate_kwargs = dict(
+        top_p=top_p,
+        top_k=0,
+        temperature=temperature,
+        do_sample=True,
+        max_new_tokens=max_new_tokens,
+        early_stopping=True,
+        length_penalty=length_penalty,
+        eos_token_id=tokenizer.eos_token_id,
+        pad_token_id=tokenizer.eos_token_id,
+    )
+    t = Thread(target=model.generate, kwargs={**dict(model_inputs, streamer=streamer), **generate_kwargs})
+    t.start()
+    output = ""
+    hidden_output = ""
+    for new_text in streamer:
+        # skip streaming until new text is available
+        if len(hidden_output) <= len(formatted_instruction):
+            hidden_output += new_text
+            continue
+        # replace eos token
+        if tokenizer.eos_token in new_text:
+            new_text = new_text.replace(tokenizer.eos_token, "")
+        output += new_text
+        yield output
+#    if HF_TOKEN:
+#        save_inputs_and_outputs(formatted_instruction, output, generate_kwargs)
+    return output
 #app.launch()
 ####################
 #conversation = Conversation("Welcome")
 def callChains(current_message):
+    final_answer = generate(current_message,  1.0,  256,  0.9,  1.0)
     sentiment_analysis_result = pipeline_predict_sentiment(current_message)
     topic_sale_inform_result = topic_sale_inform(current_message)
     #conversation.append_response("The Big lebowski.")
     #conversation.add_user_input("Is it good?")
+    #final_answer = func(current_message)
     return final_answer, sentiment_analysis_result, topic_sale_inform_result