Spaces:
Runtime error
Runtime error
| from typing import Union | |
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from llama_cpp import Llama | |
| class InferenceRequest(BaseModel): | |
| input: Union[str, None] = None | |
| max_tokens: Union[int, None] = 0 | |
| app = FastAPI() | |
| llm = Llama(model_path="./models/mistral-7b-openorca.Q4_K_S.gguf", | |
| verbose=False, n_ctx=4096) | |
| async def root(): | |
| return {"message": "Hello World"} | |
| async def inference(request: InferenceRequest): | |
| input_text = request.input | |
| max_tokens = 256 | |
| try: | |
| max_tokens = int(request.max_tokens) | |
| except: | |
| pass | |
| # process request | |
| try: | |
| result = llm(input_text, temperature=0.2, | |
| top_k=5, max_tokens=max_tokens, stop=["<|im_end|>"]) | |
| return result | |
| except: | |
| pass | |
| # create response | |
| return {} | |