Spaces:

herMaster
/

poweinfer-inference

Sleeping

File size: 1,877 Bytes

import subprocess
import os
import urllib.request
import gradio as gr


def clone_power_infer():
    repo_url = "https://github.com/SJTU-IPADS/PowerInfer.git"
    subprocess.run(["git", "clone", repo_url])

def install_requirements():
    subprocess.run(["pip", "install", "-r", "requirements.txt"])

def cmake_builds():
    subprocess.run(["cmake", "-S", ".", "-B", "build"])

    # Run cmake --build build --config Release
    subprocess.run(["cmake", "--build", "build", "--config", "Release"])

clone_power_infer()
os.chdir("PowerInfer")
install_requirements()
cmake_builds()

os.mkdir("ReluLLaMA-7B-PowerInfer-GGUF")


# URL to download the file from
url = "https://huggingface.co/PowerInfer/ReluLLaMA-7B-PowerInfer-GGUF/resolve/main/llama-7b-relu.powerinfer.gguf"

# Destination path to save the downloaded file
destination_path = "ReluLLaMA-7B-PowerInfer-GGUF/llama-7b-relu.powerinfer.gguf"

# Download the file
urllib.request.urlretrieve(url, destination_path)

def chat(question):
    command = [
        "./build/bin/main",
        "-m", "./ReluLLaMA-7B-PowerInfer-GGUF/llama-7b-relu.powerinfer.gguf",
        "-n", "128",
        "-t", "8",
        "-p", question
    ]

    # Run the command
    return subprocess.run(command, capture_output=True, text=True).stdout
    # if output.returncode == 0:
    #     return output.stdout
    # else:
    #     return "Error" + output.stderrt

screen = gr.Interface(
    fn = chat,
    inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here 👉"),
    outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon 🚀"),
    title="Inference with Powerinfer 👩🏻‍💻📓✍🏻💡",
    description="This app aims to facilitate the inference of LLMs using Powerinfer💡",
    theme="soft",
    # examples=["Hello", "what is the speed of human nerve impulses?"],
)

screen.launch()