usag1e commited on
Commit
92c4a10
·
1 Parent(s): 4bb97af

Integrate meta-llama model with FastAPI

Browse files
Files changed (3) hide show
  1. Dockerfile +8 -3
  2. app.py +21 -2
  3. requirements.txt +2 -0
Dockerfile CHANGED
@@ -1,6 +1,3 @@
1
- # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
- # you will also find guides on how best to write your Dockerfile
3
-
4
  FROM python:3.9
5
 
6
  RUN useradd -m -u 1000 user
@@ -9,8 +6,16 @@ ENV PATH="/home/user/.local/bin:$PATH"
9
 
10
  WORKDIR /app
11
 
 
12
  COPY --chown=user ./requirements.txt requirements.txt
13
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
 
 
 
 
 
 
15
  COPY --chown=user . /app
 
 
16
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
 
1
  FROM python:3.9
2
 
3
  RUN useradd -m -u 1000 user
 
6
 
7
  WORKDIR /app
8
 
9
+ # Copy dependencies and install them
10
  COPY --chown=user ./requirements.txt requirements.txt
11
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
12
 
13
+ # Pre-load the model to save time during runtime
14
+ RUN python -c "from transformers import AutoTokenizer, AutoModelForCausalLM; \
15
+ AutoTokenizer.from_pretrained('meta-llama/Llama-3.1-8B-Instruct'); \
16
+ AutoModelForCausalLM.from_pretrained('meta-llama/Llama-3.1-8B-Instruct')"
17
+
18
  COPY --chown=user . /app
19
+
20
+ # Run the app using uvicorn
21
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py CHANGED
@@ -1,3 +1,22 @@
1
- import gradio as gr
 
 
2
 
3
- gr.load("models/meta-llama/Llama-3.1-8B-Instruct").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
5
+ # Load the model and tokenizer
6
+ model_name = "meta-llama/Llama-3.1-8B-Instruct"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForCausalLM.from_pretrained(model_name)
9
+
10
+ # Initialize FastAPI
11
+ app = FastAPI()
12
+
13
+ # Define request and response models
14
+ class Prompt(BaseModel):
15
+ text: str
16
+
17
+ @app.post("/generate")
18
+ def generate_text(prompt: Prompt):
19
+ inputs = tokenizer(prompt.text, return_tensors="pt")
20
+ outputs = model.generate(**inputs, max_length=100)
21
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
22
+ return {"generated_text": generated_text}
requirements.txt CHANGED
@@ -1,2 +1,4 @@
1
  fastapi
2
  uvicorn[standard]
 
 
 
1
  fastapi
2
  uvicorn[standard]
3
+ transformers
4
+ torch