Spaces:

Vaibhav-Singh
/

SmolLM2-135M

Runtime error

App Files Files Community

Vaibhav-Singh commited on Jan 23

Commit

a771038

1 Parent(s): 25b680f

remove console log in ai api

Browse files

Files changed (1) hide show

app.py +158 -39

app.py CHANGED Viewed

@@ -1,64 +1,183 @@
 # from fastapi import FastAPI, HTTPException
 # from pydantic import BaseModel
-# from transformers import AutoModelForCausalLM, AutoTokenizer
-# from typing import List
 # import torch
-# app = FastAPI(title="Language Model API")
-# # Model configuration
-# CHECKPOINT = "HuggingFaceTB/SmolLM2-135M-Instruct"
-# DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-# # Initialize model and tokenizer
-# try:
-#     tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT)
-#     model = AutoModelForCausalLM.from_pretrained(CHECKPOINT).to(DEVICE)
-# except Exception as e:
-#     raise RuntimeError(f"Failed to load model: {str(e)}")
-# class ChatMessage(BaseModel):
 #     role: str
 #     content: str
 # class ChatRequest(BaseModel):
-#     messages: List[ChatMessage]
-#     max_new_tokens: int = 50
-#     temperature: float = 0.2
-#     top_p: float = 0.9
-# @app.post("/generate")
-# async def generate_response(request: ChatRequest):
 #     try:
-#         # Convert messages to the format expected by the model
-#         messages = [{"role": msg.role, "content": msg.content} for msg in request.messages]
-#         # Prepare input
-#         input_text = tokenizer.apply_chat_template(messages, tokenize=False)
-#         inputs = tokenizer.encode(input_text, return_tensors="pt").to(DEVICE)
-#         # Generate response
-#         outputs = model.generate(
-#             inputs,
 #             max_new_tokens=request.max_new_tokens,
-#             temperature=request.temperature,
-#             top_p=request.top_p,
-#             do_sample=True
 #         )
-#         # Decode and return response
-#         response_text = tokenizer.decode(outputs[0])
-#         return {
-#             "generated_text": response_text
-#         }
 #     except Exception as e:
 #         raise HTTPException(status_code=500, detail=str(e))
 # if __name__ == "__main__":
 #     import uvicorn
-#     uvicorn.run(app, host="0.0.0.0", port=7860)

+# # from fastapi import FastAPI, HTTPException
+# # from pydantic import BaseModel
+# # from transformers import AutoModelForCausalLM, AutoTokenizer
+# # from typing import List
+# # import torch
+# # app = FastAPI(title="Language Model API")
+# # # Model configuration
+# # CHECKPOINT = "HuggingFaceTB/SmolLM2-135M-Instruct"
+# # DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# # # Initialize model and tokenizer
+# # try:
+# #     tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT)
+# #     model = AutoModelForCausalLM.from_pretrained(CHECKPOINT).to(DEVICE)
+# # except Exception as e:
+# #     raise RuntimeError(f"Failed to load model: {str(e)}")
+# # class ChatMessage(BaseModel):
+# #     role: str
+# #     content: str
+# # class ChatRequest(BaseModel):
+# #     messages: List[ChatMessage]
+# #     max_new_tokens: int = 50
+# #     temperature: float = 0.2
+# #     top_p: float = 0.9
+# # @app.post("/generate")
+# # async def generate_response(request: ChatRequest):
+# #     try:
+# #         # Convert messages to the format expected by the model
+# #         messages = [{"role": msg.role, "content": msg.content} for msg in request.messages]
+# #         # Prepare input
+# #         input_text = tokenizer.apply_chat_template(messages, tokenize=False)
+# #         inputs = tokenizer.encode(input_text, return_tensors="pt").to(DEVICE)
+# #         # Generate response
+# #         outputs = model.generate(
+# #             inputs,
+# #             max_new_tokens=request.max_new_tokens,
+# #             temperature=request.temperature,
+# #             top_p=request.top_p,
+# #             do_sample=True
+# #         )
+# #         # Decode and return response
+# #         response_text = tokenizer.decode(outputs[0])
+# #         return {
+# #             "generated_text": response_text
+# #         }
+# #     except Exception as e:
+# #         raise HTTPException(status_code=500, detail=str(e))
+# # if __name__ == "__main__":
+# #     import uvicorn
+# #     uvicorn.run(app, host="0.0.0.0", port=7860)
 # from fastapi import FastAPI, HTTPException
 # from pydantic import BaseModel
+# from typing import List, Dict
+# import transformers
 # import torch
+# app = FastAPI(title="LLaMA API")
+# # Initialize the model and pipeline at startup
+# model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
+# pipeline = transformers.pipeline(
+#     "text-generation",
+#     model=model_id,
+#     model_kwargs={"torch_dtype": torch.bfloat16},
+#     device_map="auto",
+# )
+# class Message(BaseModel):
 #     role: str
 #     content: str
 # class ChatRequest(BaseModel):
+#     messages: List[Message]
+#     max_new_tokens: int = 256
+# class ChatResponse(BaseModel):
+#     generated_text: str
+# @app.post("/generate", response_model=ChatResponse)
+# async def chat(request: ChatRequest):
 #     try:
+#         outputs = pipeline(
+#             [{"role": msg.role, "content": msg.content} for msg in request.messages],
 #             max_new_tokens=request.max_new_tokens,
 #         )
+#         # Extract the last generated message
+#         generated_text = outputs[0]["generated_text"][-1]
+#         return ChatResponse(generated_text=generated_text)
 #     except Exception as e:
 #         raise HTTPException(status_code=500, detail=str(e))
+# # Health check endpoint
+# @app.get("/")
+# async def health_check():
+#     return {"status": "healthy"}
 # if __name__ == "__main__":
 #     import uvicorn
+#     uvicorn.run(app, host="0.0.0.0", port=8000)
+# # from fastapi import FastAPI, HTTPException
+# # from pydantic import BaseModel
+# # from transformers import AutoModelForCausalLM, AutoTokenizer
+# # from typing import List
+# # import torch
+# # app = FastAPI(title="Language Model API")
+# # # Model configuration
+# # CHECKPOINT = "HuggingFaceTB/SmolLM2-135M-Instruct"
+# # DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# # # Initialize model and tokenizer
+# # try:
+# #     tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT)
+# #     model = AutoModelForCausalLM.from_pretrained(CHECKPOINT).to(DEVICE)
+# # except Exception as e:
+# #     raise RuntimeError(f"Failed to load model: {str(e)}")
+# # class ChatMessage(BaseModel):
+# #     role: str
+# #     content: str
+# # class ChatRequest(BaseModel):
+# #     messages: List[ChatMessage]
+# #     max_new_tokens: int = 50
+# #     temperature: float = 0.2
+# #     top_p: float = 0.9
+# # @app.post("/generate")
+# # async def generate_response(request: ChatRequest):
+# #     try:
+# #         # Convert messages to the format expected by the model
+# #         messages = [{"role": msg.role, "content": msg.content} for msg in request.messages]
+# #         # Prepare input
+# #         input_text = tokenizer.apply_chat_template(messages, tokenize=False)
+# #         inputs = tokenizer.encode(input_text, return_tensors="pt").to(DEVICE)
+# #         # Generate response
+# #         outputs = model.generate(
+# #             inputs,
+# #             max_new_tokens=request.max_new_tokens,
+# #             temperature=request.temperature,
+# #             top_p=request.top_p,
+# #             do_sample=True
+# #         )
+# #         # Decode and return response
+# #         response_text = tokenizer.decode(outputs[0])
+# #         return {
+# #             "generated_text": response_text
+# #         }
+# #     except Exception as e:
+# #         raise HTTPException(status_code=500, detail=str(e))
+# # if __name__ == "__main__":
+# #     import uvicorn
+# #     uvicorn.run(app, host="0.0.0.0", port=7860)