ndc8
commited on
Commit
·
8d962fd
1
Parent(s):
1f4eabe
Set gemma-3n-E4B-it-GGUF as main model for all text generation endpoints
Browse files- backend_service.py +2 -2
- requirements.txt +2 -1
backend_service.py
CHANGED
|
@@ -70,7 +70,7 @@ class ChatMessage(BaseModel):
|
|
| 70 |
return v
|
| 71 |
|
| 72 |
class ChatCompletionRequest(BaseModel):
|
| 73 |
-
model: str = Field(default="
|
| 74 |
messages: List[ChatMessage] = Field(..., description="List of messages in the conversation")
|
| 75 |
max_tokens: Optional[int] = Field(default=512, ge=1, le=2048, description="Maximum tokens to generate")
|
| 76 |
temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0, description="Sampling temperature")
|
|
@@ -119,7 +119,7 @@ class CompletionRequest(BaseModel):
|
|
| 119 |
# Global variables for model management
|
| 120 |
inference_client: Optional[InferenceClient] = None
|
| 121 |
image_text_pipeline = None # type: ignore
|
| 122 |
-
current_model = "
|
| 123 |
vision_model = "Salesforce/blip-image-captioning-base" # Working model for image captioning
|
| 124 |
tokenizer = None
|
| 125 |
|
|
|
|
| 70 |
return v
|
| 71 |
|
| 72 |
class ChatCompletionRequest(BaseModel):
|
| 73 |
+
model: str = Field(default="gemma-3n-E4B-it-GGUF", description="The model to use for completion")
|
| 74 |
messages: List[ChatMessage] = Field(..., description="List of messages in the conversation")
|
| 75 |
max_tokens: Optional[int] = Field(default=512, ge=1, le=2048, description="Maximum tokens to generate")
|
| 76 |
temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0, description="Sampling temperature")
|
|
|
|
| 119 |
# Global variables for model management
|
| 120 |
inference_client: Optional[InferenceClient] = None
|
| 121 |
image_text_pipeline = None # type: ignore
|
| 122 |
+
current_model = "gemma-3n-E4B-it-GGUF"
|
| 123 |
vision_model = "Salesforce/blip-image-captioning-base" # Working model for image captioning
|
| 124 |
tokenizer = None
|
| 125 |
|
requirements.txt
CHANGED
|
@@ -3,8 +3,9 @@ huggingface_hub>=0.34.0
|
|
| 3 |
transformers>=4.36.0
|
| 4 |
torch>=2.0.0
|
| 5 |
Pillow>=10.0.0
|
| 6 |
-
requests>=2.31.0
|
| 7 |
accelerate>=0.24.0
|
|
|
|
|
|
|
| 8 |
fastapi>=0.100.0
|
| 9 |
uvicorn[standard]>=0.23.0
|
| 10 |
pydantic>=2.0.0
|
|
|
|
| 3 |
transformers>=4.36.0
|
| 4 |
torch>=2.0.0
|
| 5 |
Pillow>=10.0.0
|
|
|
|
| 6 |
accelerate>=0.24.0
|
| 7 |
+
requests>=2.31.0
|
| 8 |
+
# NOTE: GGUF models like 'gemma-3n-E4B-it-GGUF' must be downloaded manually or referenced from HuggingFace, not pip-installed.
|
| 9 |
fastapi>=0.100.0
|
| 10 |
uvicorn[standard]>=0.23.0
|
| 11 |
pydantic>=2.0.0
|