SeyhaLite commited on
Commit
41245b0
·
verified ·
1 Parent(s): 4ca4acd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -34,13 +34,13 @@ if not firebase_admin._apps:
34
  firebase_admin.initialize_app(cred)
35
  db = firestore.client()
36
 
37
- # Load the model and tokenizer without quantization
38
- model_name = "SeaLLMs/SeaLLM3-7B-Chat"
39
  tokenizer = AutoTokenizer.from_pretrained(model_name)
40
  model = AutoModelForCausalLM.from_pretrained(
41
  model_name,
42
- torch_dtype=torch.float16, # Use FP16 for lower memory usage on CPU
43
- device_map="auto" # Automatically map to CPU
44
  )
45
  device = torch.device("cpu") # Explicitly set to CPU
46
 
@@ -104,13 +104,13 @@ async def generate_response(context, chat_id, user_input, character, scenario, h
104
  """
105
  prompt = system_prompt + "\nអ្នកប្រើបានសួរ: " + user_input + "\n" + character + ": "
106
 
107
- inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=2048)
108
  inputs = {key: value.to(device) for key, value in inputs.items()}
109
 
110
  outputs = model.generate(
111
  input_ids=inputs["input_ids"],
112
  attention_mask=inputs["attention_mask"],
113
- max_length=2048,
114
  pad_token_id=tokenizer.eos_token_id,
115
  temperature=0.7,
116
  do_sample=True,
@@ -119,7 +119,7 @@ async def generate_response(context, chat_id, user_input, character, scenario, h
119
  )
120
  response = tokenizer.decode(outputs[0], skip_special_tokens=True).split(character + ": ")[-1].strip()
121
 
122
- if not any(ord(c) >= 0x1780 and ord(c) <= 0x17FF for c in response):
123
  if "ជួយ" in user_input:
124
  response = "ខ្ញុំរីករាយនឹងជួយលោកអ្នក! សូមប្រាប់ខ្ញុំថា តើខ្ញុំអាចជួយអ្វីបាន?"
125
  elif "អរគុណ" in user_input:
@@ -546,7 +546,7 @@ def webhook():
546
  @app.route("/", methods=["GET"])
547
  def setup_webhook():
548
  # Replace with your Hugging Face Space URL once known
549
- space_url = os.getenv("SPACE_URL", "https://your-username-your-space-name.hf.space")
550
  webhook_url = f"{space_url}/webhook"
551
  application.bot.set_webhook(webhook_url)
552
  return f"Webhook set to {webhook_url}!", 200
 
34
  firebase_admin.initialize_app(cred)
35
  db = firestore.client()
36
 
37
+ # Load a smaller model and tokenizer
38
+ model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # 1.1B parameters, ~2.2GB in FP16
39
  tokenizer = AutoTokenizer.from_pretrained(model_name)
40
  model = AutoModelForCausalLM.from_pretrained(
41
  model_name,
42
+ torch_dtype=torch.float16, # Use FP16 for lower memory usage
43
+ device_map="auto" # Map to CPU
44
  )
45
  device = torch.device("cpu") # Explicitly set to CPU
46
 
 
104
  """
105
  prompt = system_prompt + "\nអ្នកប្រើបានសួរ: " + user_input + "\n" + character + ": "
106
 
107
+ inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=1024) # Reduced max_length for memory
108
  inputs = {key: value.to(device) for key, value in inputs.items()}
109
 
110
  outputs = model.generate(
111
  input_ids=inputs["input_ids"],
112
  attention_mask=inputs["attention_mask"],
113
+ max_length=1024, # Reduced for faster CPU processing
114
  pad_token_id=tokenizer.eos_token_id,
115
  temperature=0.7,
116
  do_sample=True,
 
119
  )
120
  response = tokenizer.decode(outputs[0], skip_special_tokens=True).split(character + ": ")[-1].strip()
121
 
122
+ if not any(ord(c) >= 0x1780 and ord(c) <= 0x17FF for c in response): # Check for Khmer characters
123
  if "ជួយ" in user_input:
124
  response = "ខ្ញុំរីករាយនឹងជួយលោកអ្នក! សូមប្រាប់ខ្ញុំថា តើខ្ញុំអាចជួយអ្វីបាន?"
125
  elif "អរគុណ" in user_input:
 
546
  @app.route("/", methods=["GET"])
547
  def setup_webhook():
548
  # Replace with your Hugging Face Space URL once known
549
+ space_url = os.getenv("SPACE_URL", "https://seyhalite-aihavin.hf.space")
550
  webhook_url = f"{space_url}/webhook"
551
  application.bot.set_webhook(webhook_url)
552
  return f"Webhook set to {webhook_url}!", 200