Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -39,7 +39,7 @@ class ChatCompletionRequest(BaseModel):
|
|
39 |
messages: List[Message]
|
40 |
|
41 |
# ------------------------------------------------------------
|
42 |
-
# 3. Lazy MedGemma loader
|
43 |
# ------------------------------------------------------------
|
44 |
DEVICE = "cpu"
|
45 |
DTYPE = torch.float32
|
@@ -49,20 +49,23 @@ def get_medgemma():
|
|
49 |
global medgemma_pipe
|
50 |
if medgemma_pipe is None:
|
51 |
try:
|
52 |
-
print("Loading MedGemma-4B-IT
|
53 |
print(f"Using cache directory: {CACHE_DIR}")
|
54 |
|
55 |
medgemma_pipe = pipeline(
|
56 |
"text-generation",
|
57 |
model="google/medgemma-4b-it",
|
58 |
-
torch_dtype=
|
59 |
-
device_map="
|
60 |
token=HF_TOKEN,
|
61 |
cache_dir=CACHE_DIR,
|
62 |
trust_remote_code=True,
|
63 |
-
|
|
|
|
|
64 |
)
|
65 |
-
print("✅ MedGemma loaded successfully")
|
|
|
66 |
except Exception as e:
|
67 |
print(f"❌ Error loading MedGemma: {e}")
|
68 |
print(f"Cache directory exists: {os.path.exists(CACHE_DIR)}")
|
|
|
39 |
messages: List[Message]
|
40 |
|
41 |
# ------------------------------------------------------------
|
42 |
+
# 3. Lazy MedGemma loader with memory optimization
|
43 |
# ------------------------------------------------------------
|
44 |
DEVICE = "cpu"
|
45 |
DTYPE = torch.float32
|
|
|
49 |
global medgemma_pipe
|
50 |
if medgemma_pipe is None:
|
51 |
try:
|
52 |
+
print("🚀 Loading MedGemma-4B-IT with memory optimization...")
|
53 |
print(f"Using cache directory: {CACHE_DIR}")
|
54 |
|
55 |
medgemma_pipe = pipeline(
|
56 |
"text-generation",
|
57 |
model="google/medgemma-4b-it",
|
58 |
+
torch_dtype=torch.float16, # Use float16 to reduce memory
|
59 |
+
device_map="auto",
|
60 |
token=HF_TOKEN,
|
61 |
cache_dir=CACHE_DIR,
|
62 |
trust_remote_code=True,
|
63 |
+
# Memory optimization
|
64 |
+
low_cpu_mem_usage=True,
|
65 |
+
max_memory={0: "6GB", "cpu": "8GB"}, # Limit memory usage
|
66 |
)
|
67 |
+
print("✅ MedGemma loaded successfully!")
|
68 |
+
|
69 |
except Exception as e:
|
70 |
print(f"❌ Error loading MedGemma: {e}")
|
71 |
print(f"Cache directory exists: {os.path.exists(CACHE_DIR)}")
|