Spaces:

Mykes
/

rumed-phi3-mini

Sleeping

App Files Files Community

Mykes commited on Jul 13, 2024

Commit

43906f9

verified ·

1 Parent(s): 9334c42

Upload app_stream_working.py

Browse files

Files changed (1) hide show

app_stream_working.py +55 -0

app_stream_working.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import streamlit as st
+from llama_cpp import Llama
+# llm = Llama.from_pretrained(
+#     repo_id="Mykes/med_gemma7b_gguf",
+#     filename="*Q4_K_M.gguf",
+#     verbose=False,
+#     n_ctx=512,
+#     n_batch=512,
+#     n_threads=4
+# )
+@st.cache_resource
+def load_model():
+    return Llama.from_pretrained(
+        # repo_id="Mykes/med_gemma7b_gguf",
+        # filename="*Q4_K_M.gguf",
+        repo_id="Mykes/med_phi3-mini-4k-GGUF",
+        filename="*Q4_K_M.gguf",
+        verbose=False,
+        n_ctx=256,
+        n_batch=256,
+        n_threads=4
+    )
+llm = load_model()
+# basic_prompt = "Below is the context which is your conversation history and the last user question. Write a response according the context and question. ### Context: user: Ответь мне на вопрос о моем здоровье. assistant: Конечно! Какой у Вас вопрос? ### Question: {question} ### Response:"
+basic_prompt = "Q: {question}\nA:"
+input_text = st.text_input('text')
+model_input = basic_prompt.format(question=input_text)
+if input_text:
+    # Create an empty placeholder for the output
+    output_placeholder = st.empty()
+    # Initialize an empty string to store the generated text
+    generated_text = ""
+    # Stream the output
+    for token in llm(
+        model_input,
+        # max_tokens=32,
+        max_tokens=None,
+        stop=["<end_of_turn>"],
+        echo=True,
+        stream=True  # Enable streaming
+    ):
+        # Append the new token to the generated text
+        generated_text += token['choices'][0]['text']
+        # Update the placeholder with the current generated text
+        output_placeholder.write(generated_text)
+    # After the generation is complete, you can do any final processing if needed
+    st.write("Generation complete!")