Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
from fastapi import FastAPI, HTTPException, Request
|
| 2 |
-
from pydantic import BaseModel
|
| 3 |
import uvicorn
|
| 4 |
import requests
|
| 5 |
import os
|
|
@@ -8,7 +7,7 @@ import time
|
|
| 8 |
import asyncio
|
| 9 |
from typing import List, Dict, Any
|
| 10 |
from tqdm import tqdm
|
| 11 |
-
from llama_cpp import Llama
|
| 12 |
|
| 13 |
app = FastAPI()
|
| 14 |
|
|
@@ -69,11 +68,10 @@ class ModelManager:
|
|
| 69 |
temp_filename = await self.save_model_to_temp_file(model_config)
|
| 70 |
start_time = time.time()
|
| 71 |
print(f"Cargando modelo desde {temp_filename}")
|
| 72 |
-
# Aseg煤rate de usar el m茅todo correcto para cargar el modelo
|
| 73 |
llama = Llama.load(temp_filename)
|
| 74 |
end_time = time.time()
|
| 75 |
load_duration = end_time - start_time
|
| 76 |
-
if load_duration > 0:
|
| 77 |
print(f"Modelo {model_config['name']} tard贸 {load_duration:.2f} segundos en cargar, dividiendo autom谩ticamente")
|
| 78 |
await self.handle_large_model(temp_filename, model_config)
|
| 79 |
else:
|
|
|
|
| 1 |
from fastapi import FastAPI, HTTPException, Request
|
|
|
|
| 2 |
import uvicorn
|
| 3 |
import requests
|
| 4 |
import os
|
|
|
|
| 7 |
import asyncio
|
| 8 |
from typing import List, Dict, Any
|
| 9 |
from tqdm import tqdm
|
| 10 |
+
from llama_cpp import Llama
|
| 11 |
|
| 12 |
app = FastAPI()
|
| 13 |
|
|
|
|
| 68 |
temp_filename = await self.save_model_to_temp_file(model_config)
|
| 69 |
start_time = time.time()
|
| 70 |
print(f"Cargando modelo desde {temp_filename}")
|
|
|
|
| 71 |
llama = Llama.load(temp_filename)
|
| 72 |
end_time = time.time()
|
| 73 |
load_duration = end_time - start_time
|
| 74 |
+
if load_duration > 0.5:
|
| 75 |
print(f"Modelo {model_config['name']} tard贸 {load_duration:.2f} segundos en cargar, dividiendo autom谩ticamente")
|
| 76 |
await self.handle_large_model(temp_filename, model_config)
|
| 77 |
else:
|