Spaces:
Runtime error
Runtime error
import os | |
import json | |
import numpy as np | |
import pandas as pd | |
from huggingface_hub import hf_hub_download | |
from llama_cpp import Llama | |
import gradio as gr | |
hub_model_path = hf_hub_download( | |
repo_id='TheBloke/h2ogpt-4096-llama2-13B-GGML', | |
filename='h2ogpt-4096-llama2-13b.ggmlv3.q2_K.bin' | |
) | |
model = Llama( | |
model_path=hub_model_path, | |
n_ctx=220, # Maximum context size. TODO: Increase this later. | |
use_mlock=True, # Force the system to keep the model in RAM. | |
seed=77, | |
n_batch=64 | |
) | |
def generate(prompt): | |
output = model(prompt, max_tokens=64, stop=['Q:', '\n'], echo=True) | |
return json.dumps(output, indent=4) | |
iface = gr.Interface(fn=generate, inputs='text', outputs='text') | |
iface.launch() | |