Spaces:
Paused
Paused
| import gradio as gr | |
| import os | |
| from loguru import logger | |
| from langchain_community.llms import LlamaCpp | |
| from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler | |
| from langchain_core.prompts import PromptTemplate | |
| import spaces | |
| import json | |
| # Create a directory for logs if it doesn't exist | |
| if not os.path.exists('logs'): | |
| os.makedirs('logs') | |
| # Define the log file path | |
| log_file = 'logs/file_{time}.log' | |
| # Configure the logger to write to the log file | |
| logger.add(log_file, rotation="500 MB") | |
| template = """Question: {question} | |
| Answer: Let's work this out in a step by step way to be sure we have the right answer.""" | |
| prompt = PromptTemplate.from_template(template) | |
| # Callbacks support token-wise streaming | |
| callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) | |
| # n_gpu_layers = -1 # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU. | |
| # n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU. | |
| # Make sure the model path is correct for your system! | |
| llm = LlamaCpp( | |
| model_path="/home/user/app/models/Phi-3-mini-4k-instruct-q4.gguf", | |
| callback_manager=callback_manager, | |
| verbose=True, # Verbose is required to pass to the callback manager | |
| ) | |
| llm_chain = prompt | llm | |
| def greet(name): | |
| logger.info("This is an info message") | |
| question = name | |
| # print(llm_chain.invoke({"question": question})) | |
| return llm_chain.invoke({"question": question}) | |
| demo = gr.Interface(fn=greet, inputs="text", outputs="text") | |
| demo.launch() | |