AmeyaKawthalkar commited on
Commit
3bec787
·
verified ·
1 Parent(s): e72aab1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -39,7 +39,7 @@ class ChatCompletionRequest(BaseModel):
39
  messages: List[Message]
40
 
41
  # ------------------------------------------------------------
42
- # 3. Lazy MedGemma loader (CPU; float32)
43
  # ------------------------------------------------------------
44
  DEVICE = "cpu"
45
  DTYPE = torch.float32
@@ -49,20 +49,23 @@ def get_medgemma():
49
  global medgemma_pipe
50
  if medgemma_pipe is None:
51
  try:
52
- print("Loading MedGemma-4B-IT ")
53
  print(f"Using cache directory: {CACHE_DIR}")
54
 
55
  medgemma_pipe = pipeline(
56
  "text-generation",
57
  model="google/medgemma-4b-it",
58
- torch_dtype=DTYPE,
59
- device_map="cpu", # Force CPU to avoid device issues
60
  token=HF_TOKEN,
61
  cache_dir=CACHE_DIR,
62
  trust_remote_code=True,
63
- local_files_only=False, # Allow downloads
 
 
64
  )
65
- print("✅ MedGemma loaded successfully")
 
66
  except Exception as e:
67
  print(f"❌ Error loading MedGemma: {e}")
68
  print(f"Cache directory exists: {os.path.exists(CACHE_DIR)}")
 
39
  messages: List[Message]
40
 
41
  # ------------------------------------------------------------
42
+ # 3. Lazy MedGemma loader with memory optimization
43
  # ------------------------------------------------------------
44
  DEVICE = "cpu"
45
  DTYPE = torch.float32
 
49
  global medgemma_pipe
50
  if medgemma_pipe is None:
51
  try:
52
+ print("🚀 Loading MedGemma-4B-IT with memory optimization...")
53
  print(f"Using cache directory: {CACHE_DIR}")
54
 
55
  medgemma_pipe = pipeline(
56
  "text-generation",
57
  model="google/medgemma-4b-it",
58
+ torch_dtype=torch.float16, # Use float16 to reduce memory
59
+ device_map="auto",
60
  token=HF_TOKEN,
61
  cache_dir=CACHE_DIR,
62
  trust_remote_code=True,
63
+ # Memory optimization
64
+ low_cpu_mem_usage=True,
65
+ max_memory={0: "6GB", "cpu": "8GB"}, # Limit memory usage
66
  )
67
+ print("✅ MedGemma loaded successfully!")
68
+
69
  except Exception as e:
70
  print(f"❌ Error loading MedGemma: {e}")
71
  print(f"Cache directory exists: {os.path.exists(CACHE_DIR)}")