muhammadnoman76 commited on
Commit
433a189
·
1 Parent(s): f98f8ce

Fix build issues and optimize Dockerfile

Browse files
Files changed (5) hide show
  1. Dockerfile +5 -10
  2. README.md +4 -3
  3. app.py +8 -18
  4. packages.txt +1 -1
  5. requirements.txt +5 -5
Dockerfile CHANGED
@@ -1,8 +1,8 @@
1
- FROM python:3.10-slim
2
 
3
  WORKDIR /code
4
 
5
- # Copy packages.txt and install system dependencies
6
  COPY packages.txt /root/packages.txt
7
  RUN apt-get update && \
8
  xargs -r -a /root/packages.txt apt-get install -y && \
@@ -12,20 +12,15 @@ RUN apt-get update && \
12
  COPY requirements.txt .
13
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
 
15
- # Install llama-cpp-python separately to handle potential issues
16
- RUN pip install --no-cache-dir llama-cpp-python
17
-
18
- # Set Hugging Face cache directory to a writable location
19
  ENV HF_HOME=/code/.cache/huggingface
20
  RUN mkdir -p /code/.cache/huggingface && \
21
- chmod -R 777 /code/.cache
 
22
 
23
  # Copy application code
24
  COPY . .
25
 
26
- # Ensure correct permissions for the working directory
27
- RUN chmod -R 777 /code
28
-
29
  # Expose port
30
  EXPOSE 7860
31
 
 
1
+ FROM python:3.12
2
 
3
  WORKDIR /code
4
 
5
+ # Install system dependencies
6
  COPY packages.txt /root/packages.txt
7
  RUN apt-get update && \
8
  xargs -r -a /root/packages.txt apt-get install -y && \
 
12
  COPY requirements.txt .
13
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
 
15
+ # Pre-download the model
 
 
 
16
  ENV HF_HOME=/code/.cache/huggingface
17
  RUN mkdir -p /code/.cache/huggingface && \
18
+ pip install huggingface_hub && \
19
+ python -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='muhammadnoman76/cortex_q4', filename='unsloth.Q4_K_M.gguf', local_dir='/code', local_dir_use_symlinks=False)"
20
 
21
  # Copy application code
22
  COPY . .
23
 
 
 
 
24
  # Expose port
25
  EXPOSE 7860
26
 
README.md CHANGED
@@ -10,10 +10,11 @@ license: afl-3.0
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
12
 
13
-
14
  # LLM Streaming API
15
 
16
  This Space provides a FastAPI application that streams responses from the Cortex LLM model.
17
 
18
- - Visit `/ui` for a simple interface to test the model
19
- - Send POST requests to `/generate` with JSON body containing `task_description`, `max_tokens` (optional), and `temperature` (optional)
 
 
 
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
12
 
 
13
  # LLM Streaming API
14
 
15
  This Space provides a FastAPI application that streams responses from the Cortex LLM model.
16
 
17
+ - Send GET requests to `/stream?task=<your_task>` to receive a streamed response from the model.
18
+ - Example: `/stream?task=make an agent which send mail by searching top 5 website from google`
19
+
20
+ **Note**: The `/ui` endpoint is not implemented in the current version.
app.py CHANGED
@@ -1,6 +1,5 @@
1
  from fastapi import FastAPI
2
  from fastapi.responses import StreamingResponse
3
- from huggingface_hub import hf_hub_download
4
  from llama_cpp import Llama
5
  import asyncio
6
  from fastapi.middleware.cors import CORSMiddleware
@@ -15,14 +14,13 @@ app.add_middleware(
15
  allow_headers=["*"],
16
  )
17
 
18
- # Download the GGUF file
19
- model_id = "muhammadnoman76/cortex_q4"
20
- gguf_filename = "unsloth.Q4_K_M.gguf" # Replace with the correct filename
21
- model_path = hf_hub_download(
22
- repo_id=model_id,
23
- filename=gguf_filename,
24
- local_dir=".",
25
- local_dir_use_symlinks=False
26
  )
27
 
28
  alpaca_prompt = """
@@ -51,14 +49,6 @@ Important notes:
51
  ### Response:
52
  """
53
 
54
- # Load model from local file in the copied folder
55
- llm = Llama(
56
- model_path= r'.//unsloth.Q4_K_M.gguf',
57
- n_ctx=2048,
58
- n_batch=512,
59
- verbose=False
60
- )
61
-
62
  async def stream_llm_response(task_description: str):
63
  prompt = alpaca_prompt.format(task_description)
64
  stream = llm(
@@ -77,4 +67,4 @@ async def stream_response(task: str = "make an agent which send mail by searchin
77
 
78
  if __name__ == "__main__":
79
  import uvicorn
80
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
1
  from fastapi import FastAPI
2
  from fastapi.responses import StreamingResponse
 
3
  from llama_cpp import Llama
4
  import asyncio
5
  from fastapi.middleware.cors import CORSMiddleware
 
14
  allow_headers=["*"],
15
  )
16
 
17
+ # Load model from local file
18
+ model_path = "./unsloth.Q4_K_M.gguf"
19
+ llm = Llama(
20
+ model_path=model_path,
21
+ n_ctx=2048,
22
+ n_batch=512,
23
+ verbose=False
 
24
  )
25
 
26
  alpaca_prompt = """
 
49
  ### Response:
50
  """
51
 
 
 
 
 
 
 
 
 
52
  async def stream_llm_response(task_description: str):
53
  prompt = alpaca_prompt.format(task_description)
54
  stream = llm(
 
67
 
68
  if __name__ == "__main__":
69
  import uvicorn
70
+ uvicorn.run(app, host="0.0.0.0", port=7860)
packages.txt CHANGED
@@ -2,4 +2,4 @@ build-essential
2
  cmake
3
  git
4
  libopenblas-dev
5
- libomp-dev
 
2
  cmake
3
  git
4
  libopenblas-dev
5
+ libomp-dev
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
- fastapi>=0.115.12
2
- uvicorn>=0.34.2
3
- pydantic>=2.11.4
4
- llama-cpp-python>=0.3.8
5
- huggingface_hub>=0.25.0
 
1
+ fastapi==0.115.12
2
+ uvicorn==0.34.2
3
+ pydantic==2.11.4
4
+ llama-cpp-python==0.3.8
5
+ huggingface_hub==0.30.2