harry85 commited on
Commit
e3f4a50
β€’
1 Parent(s): 2a6e87a

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +27 -0
  2. README.md +4 -4
  3. app.py +57 -0
  4. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python 3.9 image
2
+ FROM python:3.9
3
+
4
+ # Set the working directory to /code
5
+ WORKDIR /code
6
+
7
+ # Copy the current directory contents into the container at /code
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ # Install requirements.txt
11
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
12
+
13
+ # Set up a new user named "user" with user ID 1000
14
+ RUN useradd -m -u 1000 user
15
+ # Switch to the "user" user
16
+ USER user
17
+ # Set home to the user's home directory
18
+ ENV HOME=/home/user \
19
+ PATH=/home/user/.local/bin:$PATH
20
+
21
+ # Set the working directory to the user's home directory
22
+ WORKDIR $HOME/app
23
+
24
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
25
+ COPY --chown=user . $HOME/app
26
+
27
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: QWEN Chat QA 0.5B
3
- emoji: πŸ’»
4
- colorFrom: red
5
- colorTo: green
6
  sdk: docker
7
  pinned: false
8
  license: mit
 
1
  ---
2
+ title: Text Generation
3
+ emoji: 🌍
4
+ colorFrom: green
5
+ colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
  license: mit
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Install the necessary packages
2
+ # pip install accelerate transformers fastapi pydantic torch
3
+
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ import torch
6
+ from pydantic import BaseModel
7
+ from fastapi import FastAPI
8
+
9
+ # Initialize the FastAPI app
10
+ app = FastAPI(docs_url="/")
11
+
12
+ # Load the model and tokenizer once at startup
13
+ device = "cuda" # the device to load the model onto
14
+
15
+ model = AutoModelForCausalLM.from_pretrained(
16
+ "Qwen/Qwen1.5-0.5B-Chat",
17
+ torch_dtype="auto",
18
+ device_map="auto"
19
+ )
20
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B-Chat")
21
+
22
+ # Define the request model
23
+ class RequestModel(BaseModel):
24
+ input: str
25
+
26
+ # Define a greeting endpoint
27
+ @app.get("/")
28
+ def greet_json():
29
+ return {"message": "working..."}
30
+
31
+ # Define the text generation endpoint
32
+ @app.post("/prompt")
33
+ def get_response(request: RequestModel):
34
+ prompt = request.input
35
+ messages = [
36
+ {"role": "system", "content": "You are a helpful assistant."},
37
+ {"role": "user", "content": prompt}
38
+ ]
39
+ text = tokenizer.apply_chat_template(
40
+ messages,
41
+ tokenize=False,
42
+ add_generation_prompt=True
43
+ )
44
+ model_inputs = tokenizer([text], return_tensors="pt").to(device)
45
+
46
+ generated_ids = model.generate(
47
+ model_inputs.input_ids,
48
+ max_new_tokens=512
49
+ )
50
+ generated_ids = [
51
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
52
+ ]
53
+
54
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
55
+ return {"generated_text": response}
56
+
57
+ # To run the FastAPI app, use the command: uvicorn <filename>:app --reload
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi==0.74.*
2
+ requests==2.27.*
3
+ uvicorn[standard]==0.17.*
4
+ sentencepiece==0.1.*
5
+ torch==1.11.*
6
+ transformers==4.*
7
+ accelerate