Spaces:

SeyhaLite
/

aihavin

Sleeping

App Files Files Community

SeyhaLite commited on Apr 1

Commit

41245b0

verified ·

1 Parent(s): 4ca4acd

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -8

app.py CHANGED Viewed

@@ -34,13 +34,13 @@ if not firebase_admin._apps:
     firebase_admin.initialize_app(cred)
 db = firestore.client()
-# Load the model and tokenizer without quantization
-model_name = "SeaLLMs/SeaLLM3-7B-Chat"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
-    torch_dtype=torch.float16,  # Use FP16 for lower memory usage on CPU
-    device_map="auto"  # Automatically map to CPU
 )
 device = torch.device("cpu")  # Explicitly set to CPU
@@ -104,13 +104,13 @@ async def generate_response(context, chat_id, user_input, character, scenario, h
     """
     prompt = system_prompt + "\nអ្នកប្រើបានសួរ: " + user_input + "\n" + character + ": "
-    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=2048)
     inputs = {key: value.to(device) for key, value in inputs.items()}
     outputs = model.generate(
         input_ids=inputs["input_ids"],
         attention_mask=inputs["attention_mask"],
-        max_length=2048,
         pad_token_id=tokenizer.eos_token_id,
         temperature=0.7,
         do_sample=True,
@@ -119,7 +119,7 @@ async def generate_response(context, chat_id, user_input, character, scenario, h
     )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True).split(character + ": ")[-1].strip()
-    if not any(ord(c) >= 0x1780 and ord(c) <= 0x17FF for c in response):
         if "ជួយ" in user_input:
             response = "ខ្ញុំរីករាយនឹងជួយលោកអ្នក! សូមប្រាប់ខ្ញុំថា តើខ្ញុំអាចជួយអ្វីបាន?"
         elif "អរគុណ" in user_input:
@@ -546,7 +546,7 @@ def webhook():
 @app.route("/", methods=["GET"])
 def setup_webhook():
     # Replace with your Hugging Face Space URL once known
-    space_url = os.getenv("SPACE_URL", "https://your-username-your-space-name.hf.space")
     webhook_url = f"{space_url}/webhook"
     application.bot.set_webhook(webhook_url)
     return f"Webhook set to {webhook_url}!", 200

     firebase_admin.initialize_app(cred)
 db = firestore.client()
+# Load a smaller model and tokenizer
+model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # 1.1B parameters, ~2.2GB in FP16
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
+    torch_dtype=torch.float16,  # Use FP16 for lower memory usage
+    device_map="auto"  # Map to CPU
 )
 device = torch.device("cpu")  # Explicitly set to CPU
     """
     prompt = system_prompt + "\nអ្នកប្រើបានសួរ: " + user_input + "\n" + character + ": "
+    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=1024)  # Reduced max_length for memory
     inputs = {key: value.to(device) for key, value in inputs.items()}
     outputs = model.generate(
         input_ids=inputs["input_ids"],
         attention_mask=inputs["attention_mask"],
+        max_length=1024,  # Reduced for faster CPU processing
         pad_token_id=tokenizer.eos_token_id,
         temperature=0.7,
         do_sample=True,
     )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True).split(character + ": ")[-1].strip()
+    if not any(ord(c) >= 0x1780 and ord(c) <= 0x17FF for c in response):  # Check for Khmer characters
         if "ជួយ" in user_input:
             response = "ខ្ញុំរីករាយនឹងជួយលោកអ្នក! សូមប្រាប់ខ្ញុំថា តើខ្ញុំអាចជួយអ្វីបាន?"
         elif "អរគុណ" in user_input:
 @app.route("/", methods=["GET"])
 def setup_webhook():
     # Replace with your Hugging Face Space URL once known
+    space_url = os.getenv("SPACE_URL", "https://seyhalite-aihavin.hf.space")
     webhook_url = f"{space_url}/webhook"
     application.bot.set_webhook(webhook_url)
     return f"Webhook set to {webhook_url}!", 200