Spaces:

rajivmehtapy
/

hf-smug-demo

Paused

App Files Files Community

rajivmehtapy commited on Jun 4, 2024

Commit

eefe540

1 Parent(s): b5d9a88

update in code.

Browse files

Files changed (2) hide show

app.py +28 -2
models/Phi-3-mini-4k-instruct-q4.gguf +1 -0

app.py CHANGED Viewed

@@ -1,7 +1,11 @@
 import gradio as gr
 import os
 from loguru import logger
 # Create a directory for logs if it doesn't exist
 if not os.path.exists('logs'):
     os.makedirs('logs')
@@ -12,9 +16,31 @@ log_file = 'logs/file_{time}.log'
 # Configure the logger to write to the log file
 logger.add(log_file, rotation="500 MB")
 def greet(name):
     logger.info("This is an info message")
-    return "Hi , " + name + "!!"
 demo = gr.Interface(fn=greet, inputs="text", outputs="text")
 demo.launch()

 import gradio as gr
 import os
 from loguru import logger
+from langchain_community.llms import LlamaCpp
+from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
+from langchain_core.prompts import PromptTemplate
+import spaces
+import json
 # Create a directory for logs if it doesn't exist
 if not os.path.exists('logs'):
     os.makedirs('logs')
 # Configure the logger to write to the log file
 logger.add(log_file, rotation="500 MB")
+template = """Question: {question}
+Answer: Let's work this out in a step by step way to be sure we have the right answer."""
+prompt = PromptTemplate.from_template(template)
+# Callbacks support token-wise streaming
+callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
+# n_gpu_layers = -1  # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU.
+# n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
+# Make sure the model path is correct for your system!
+llm = LlamaCpp(
+    model_path="/home/user/app/models/Phi-3-mini-4k-instruct-q4.gguf",
+    callback_manager=callback_manager,
+    verbose=True,  # Verbose is required to pass to the callback manager
+)
+llm_chain = prompt | llm
+@spaces.GPU(duration=120)
 def greet(name):
     logger.info("This is an info message")
+    question = name
+    # print(llm_chain.invoke({"question": question}))
+    return llm_chain.invoke({"question": question})
 demo = gr.Interface(fn=greet, inputs="text", outputs="text")
 demo.launch()

models/Phi-3-mini-4k-instruct-q4.gguf ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct-gguf/blobs/8a83c7fb9049a9b2e92266fa7ad04933bb53aa1e85136b7b30f1b8000ff2edef