Spaces:
Sleeping
Sleeping
Commit
·
433a189
1
Parent(s):
f98f8ce
Fix build issues and optimize Dockerfile
Browse files- Dockerfile +5 -10
- README.md +4 -3
- app.py +8 -18
- packages.txt +1 -1
- requirements.txt +5 -5
Dockerfile
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
-
FROM python:3.
|
2 |
|
3 |
WORKDIR /code
|
4 |
|
5 |
-
#
|
6 |
COPY packages.txt /root/packages.txt
|
7 |
RUN apt-get update && \
|
8 |
xargs -r -a /root/packages.txt apt-get install -y && \
|
@@ -12,20 +12,15 @@ RUN apt-get update && \
|
|
12 |
COPY requirements.txt .
|
13 |
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
14 |
|
15 |
-
#
|
16 |
-
RUN pip install --no-cache-dir llama-cpp-python
|
17 |
-
|
18 |
-
# Set Hugging Face cache directory to a writable location
|
19 |
ENV HF_HOME=/code/.cache/huggingface
|
20 |
RUN mkdir -p /code/.cache/huggingface && \
|
21 |
-
|
|
|
22 |
|
23 |
# Copy application code
|
24 |
COPY . .
|
25 |
|
26 |
-
# Ensure correct permissions for the working directory
|
27 |
-
RUN chmod -R 777 /code
|
28 |
-
|
29 |
# Expose port
|
30 |
EXPOSE 7860
|
31 |
|
|
|
1 |
+
FROM python:3.12
|
2 |
|
3 |
WORKDIR /code
|
4 |
|
5 |
+
# Install system dependencies
|
6 |
COPY packages.txt /root/packages.txt
|
7 |
RUN apt-get update && \
|
8 |
xargs -r -a /root/packages.txt apt-get install -y && \
|
|
|
12 |
COPY requirements.txt .
|
13 |
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
14 |
|
15 |
+
# Pre-download the model
|
|
|
|
|
|
|
16 |
ENV HF_HOME=/code/.cache/huggingface
|
17 |
RUN mkdir -p /code/.cache/huggingface && \
|
18 |
+
pip install huggingface_hub && \
|
19 |
+
python -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='muhammadnoman76/cortex_q4', filename='unsloth.Q4_K_M.gguf', local_dir='/code', local_dir_use_symlinks=False)"
|
20 |
|
21 |
# Copy application code
|
22 |
COPY . .
|
23 |
|
|
|
|
|
|
|
24 |
# Expose port
|
25 |
EXPOSE 7860
|
26 |
|
README.md
CHANGED
@@ -10,10 +10,11 @@ license: afl-3.0
|
|
10 |
|
11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
12 |
|
13 |
-
|
14 |
# LLM Streaming API
|
15 |
|
16 |
This Space provides a FastAPI application that streams responses from the Cortex LLM model.
|
17 |
|
18 |
-
-
|
19 |
-
-
|
|
|
|
|
|
10 |
|
11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
12 |
|
|
|
13 |
# LLM Streaming API
|
14 |
|
15 |
This Space provides a FastAPI application that streams responses from the Cortex LLM model.
|
16 |
|
17 |
+
- Send GET requests to `/stream?task=<your_task>` to receive a streamed response from the model.
|
18 |
+
- Example: `/stream?task=make an agent which send mail by searching top 5 website from google`
|
19 |
+
|
20 |
+
**Note**: The `/ui` endpoint is not implemented in the current version.
|
app.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
from fastapi import FastAPI
|
2 |
from fastapi.responses import StreamingResponse
|
3 |
-
from huggingface_hub import hf_hub_download
|
4 |
from llama_cpp import Llama
|
5 |
import asyncio
|
6 |
from fastapi.middleware.cors import CORSMiddleware
|
@@ -15,14 +14,13 @@ app.add_middleware(
|
|
15 |
allow_headers=["*"],
|
16 |
)
|
17 |
|
18 |
-
#
|
19 |
-
|
20 |
-
|
21 |
-
model_path
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
local_dir_use_symlinks=False
|
26 |
)
|
27 |
|
28 |
alpaca_prompt = """
|
@@ -51,14 +49,6 @@ Important notes:
|
|
51 |
### Response:
|
52 |
"""
|
53 |
|
54 |
-
# Load model from local file in the copied folder
|
55 |
-
llm = Llama(
|
56 |
-
model_path= r'.//unsloth.Q4_K_M.gguf',
|
57 |
-
n_ctx=2048,
|
58 |
-
n_batch=512,
|
59 |
-
verbose=False
|
60 |
-
)
|
61 |
-
|
62 |
async def stream_llm_response(task_description: str):
|
63 |
prompt = alpaca_prompt.format(task_description)
|
64 |
stream = llm(
|
@@ -77,4 +67,4 @@ async def stream_response(task: str = "make an agent which send mail by searchin
|
|
77 |
|
78 |
if __name__ == "__main__":
|
79 |
import uvicorn
|
80 |
-
uvicorn.run(app, host="0.0.0.0", port=
|
|
|
1 |
from fastapi import FastAPI
|
2 |
from fastapi.responses import StreamingResponse
|
|
|
3 |
from llama_cpp import Llama
|
4 |
import asyncio
|
5 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
14 |
allow_headers=["*"],
|
15 |
)
|
16 |
|
17 |
+
# Load model from local file
|
18 |
+
model_path = "./unsloth.Q4_K_M.gguf"
|
19 |
+
llm = Llama(
|
20 |
+
model_path=model_path,
|
21 |
+
n_ctx=2048,
|
22 |
+
n_batch=512,
|
23 |
+
verbose=False
|
|
|
24 |
)
|
25 |
|
26 |
alpaca_prompt = """
|
|
|
49 |
### Response:
|
50 |
"""
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
async def stream_llm_response(task_description: str):
|
53 |
prompt = alpaca_prompt.format(task_description)
|
54 |
stream = llm(
|
|
|
67 |
|
68 |
if __name__ == "__main__":
|
69 |
import uvicorn
|
70 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
packages.txt
CHANGED
@@ -2,4 +2,4 @@ build-essential
|
|
2 |
cmake
|
3 |
git
|
4 |
libopenblas-dev
|
5 |
-
libomp-dev
|
|
|
2 |
cmake
|
3 |
git
|
4 |
libopenblas-dev
|
5 |
+
libomp-dev
|
requirements.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
fastapi
|
2 |
-
uvicorn
|
3 |
-
pydantic
|
4 |
-
llama-cpp-python
|
5 |
-
huggingface_hub
|
|
|
1 |
+
fastapi==0.115.12
|
2 |
+
uvicorn==0.34.2
|
3 |
+
pydantic==2.11.4
|
4 |
+
llama-cpp-python==0.3.8
|
5 |
+
huggingface_hub==0.30.2
|