bogeumkim commited on
Commit
ddb508a
·
verified ·
1 Parent(s): 9f6487b

Delete modal

Browse files
Files changed (3) hide show
  1. modal/config.py +0 -20
  2. modal/engine.py +0 -103
  3. modal/requirements.txt +0 -0
modal/config.py DELETED
@@ -1,20 +0,0 @@
1
- MODEL_DIR = "/model"
2
- BASE_MODEL="mistralai/Mistral-7B-Instruct-v0.1"
3
-
4
-
5
- # Name the stub (it should all be in lower case)
6
- STUB_NAME=f"{BASE_MODEL.lower()}-deployement"
7
-
8
- ### Server level default configs
9
- # Keep warm: is the warm pool size or the minimum number of containers that will always be up for your serverless function to get executed (Modal will scale up more containers from there based on need or demand)
10
-
11
- KEEP_WARM = 1
12
-
13
- # num of concurrent requests: is the number of concurrent requests a container should handle
14
- NUM_CONCURRENT_REQUESTS = 10
15
-
16
- # timeout: This is the server timeout after which it would be shutdown the server.
17
- TIMEOUT = 600
18
-
19
- # Number of GPUs to use
20
- GPU_COUNT = 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
modal/engine.py DELETED
@@ -1,103 +0,0 @@
1
- import os
2
- import asyncio
3
-
4
- from queue import Empty
5
- from typing import List, Union, List
6
-
7
- # Import modal's required imports
8
- from modal import Image, Stub, gpu, method, enter, exit
9
-
10
- # Import the constants defined in constants.py
11
- from config import (
12
- MODEL_DIR,
13
- BASE_MODEL,
14
- STUB_NAME,
15
- NUM_CONCURRENT_REQUESTS,
16
- TIMEOUT,
17
- GPU_COUNT
18
- )
19
-
20
- # Define our GPU Config
21
-
22
- if BASE_MODEL == "mistralai/Mistral-7B-Instruct-v0.1":
23
- GPU_CONFIG = gpu.A100(count=GPU_COUNT, memory=80)
24
- else:
25
- GPU_CONFIG = gpu.Any(count=GPU_COUNT)
26
-
27
- stub = Stub(name=STUB_NAME)
28
-
29
- def download_model_to_folder():
30
- from transformers.utils import move_cache
31
- from huggingface_hub import snapshot_download
32
-
33
- os.makedirs(MODEL_DIR, exist_ok=True)
34
-
35
- snapshot_download(
36
- BASE_MODEL,
37
- local_dir=MODEL_DIR,
38
- ignore_patterns=["*.pt"], # Using safetensors
39
- )
40
-
41
- move_cache()
42
-
43
- HF_DOCKER_IMAGE = (
44
- Image.from_registry("nvidia/cuda:12.1.0-base-ubuntu22.04", add_python="3.10").pip_install_from_requirements("./requirements.txt")
45
- .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
46
- .run_function(download_model_to_folder)
47
- )
48
-
49
- @stub.cls(
50
- gpu=GPU_CONFIG,
51
- timeout=TIMEOUT,
52
- container_idle_timeout=TIMEOUT,
53
- allow_concurrent_inputs=NUM_CONCURRENT_REQUESTS,
54
- image=HF_DOCKER_IMAGE,
55
- )
56
- class HFEngine:
57
- model_name_or_path: str = MODEL_DIR
58
- device: str = "cuda"
59
-
60
- @enter()
61
- def start_engine(self):
62
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
63
-
64
- self.model = AutoModelForCausalLM.from_pretrained(self.model_name_or_path, trust_remote_code=True).to(self.device)
65
- self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path, trust_remote_code=True)
66
- self.streamer = TextIteratorStreamer(self.tokenizer)
67
- return self
68
-
69
- @exit()
70
- def terminate_engine(self):
71
- import gc
72
- import torch
73
-
74
- del self.model
75
- torch.cuda.synchronize()
76
- gc.collect()
77
-
78
- @method()
79
- async def stream(self, chat_input: Union[str, List[dict]], generation_kwargs: dict):
80
- from threading import Thread
81
-
82
- if isinstance(chat_input, str):
83
- chat_input = [{"role": "user", "content": chat_input}]
84
- input_ids = self.tokenizer.apply_chat_template(
85
- conversation=chat_input, tokenize=True, return_tensors="pt"
86
- ).to(self.device)
87
-
88
- gen_kwargs = dict(
89
- input_ids=input_ids,
90
- streamer=self.streamer,
91
- pad_token_id=self.tokenizer.eos_token_id,
92
- **generation_kwargs
93
- )
94
-
95
- thread = Thread(target=self.model.generate, kwargs=gen_kwargs)
96
- thread.start()
97
-
98
- for next_token in self.streamer:
99
- try:
100
- if next_token is not None:
101
- yield next_token
102
- except Empty:
103
- await asyncio.sleep(0.001)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
modal/requirements.txt DELETED
File without changes