Update app.py
Browse files
app.py
CHANGED
@@ -34,13 +34,13 @@ if not firebase_admin._apps:
|
|
34 |
firebase_admin.initialize_app(cred)
|
35 |
db = firestore.client()
|
36 |
|
37 |
-
# Load
|
38 |
-
model_name = "
|
39 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
40 |
model = AutoModelForCausalLM.from_pretrained(
|
41 |
model_name,
|
42 |
-
torch_dtype=torch.float16, # Use FP16 for lower memory usage
|
43 |
-
device_map="auto" #
|
44 |
)
|
45 |
device = torch.device("cpu") # Explicitly set to CPU
|
46 |
|
@@ -104,13 +104,13 @@ async def generate_response(context, chat_id, user_input, character, scenario, h
|
|
104 |
"""
|
105 |
prompt = system_prompt + "\nអ្នកប្រើបានសួរ: " + user_input + "\n" + character + ": "
|
106 |
|
107 |
-
inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=
|
108 |
inputs = {key: value.to(device) for key, value in inputs.items()}
|
109 |
|
110 |
outputs = model.generate(
|
111 |
input_ids=inputs["input_ids"],
|
112 |
attention_mask=inputs["attention_mask"],
|
113 |
-
max_length=
|
114 |
pad_token_id=tokenizer.eos_token_id,
|
115 |
temperature=0.7,
|
116 |
do_sample=True,
|
@@ -119,7 +119,7 @@ async def generate_response(context, chat_id, user_input, character, scenario, h
|
|
119 |
)
|
120 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True).split(character + ": ")[-1].strip()
|
121 |
|
122 |
-
if not any(ord(c) >= 0x1780 and ord(c) <= 0x17FF for c in response):
|
123 |
if "ជួយ" in user_input:
|
124 |
response = "ខ្ញុំរីករាយនឹងជួយលោកអ្នក! សូមប្រាប់ខ្ញុំថា តើខ្ញុំអាចជួយអ្វីបាន?"
|
125 |
elif "អរគុណ" in user_input:
|
@@ -546,7 +546,7 @@ def webhook():
|
|
546 |
@app.route("/", methods=["GET"])
|
547 |
def setup_webhook():
|
548 |
# Replace with your Hugging Face Space URL once known
|
549 |
-
space_url = os.getenv("SPACE_URL", "https://
|
550 |
webhook_url = f"{space_url}/webhook"
|
551 |
application.bot.set_webhook(webhook_url)
|
552 |
return f"Webhook set to {webhook_url}!", 200
|
|
|
34 |
firebase_admin.initialize_app(cred)
|
35 |
db = firestore.client()
|
36 |
|
37 |
+
# Load a smaller model and tokenizer
|
38 |
+
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # 1.1B parameters, ~2.2GB in FP16
|
39 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
40 |
model = AutoModelForCausalLM.from_pretrained(
|
41 |
model_name,
|
42 |
+
torch_dtype=torch.float16, # Use FP16 for lower memory usage
|
43 |
+
device_map="auto" # Map to CPU
|
44 |
)
|
45 |
device = torch.device("cpu") # Explicitly set to CPU
|
46 |
|
|
|
104 |
"""
|
105 |
prompt = system_prompt + "\nអ្នកប្រើបានសួរ: " + user_input + "\n" + character + ": "
|
106 |
|
107 |
+
inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=1024) # Reduced max_length for memory
|
108 |
inputs = {key: value.to(device) for key, value in inputs.items()}
|
109 |
|
110 |
outputs = model.generate(
|
111 |
input_ids=inputs["input_ids"],
|
112 |
attention_mask=inputs["attention_mask"],
|
113 |
+
max_length=1024, # Reduced for faster CPU processing
|
114 |
pad_token_id=tokenizer.eos_token_id,
|
115 |
temperature=0.7,
|
116 |
do_sample=True,
|
|
|
119 |
)
|
120 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True).split(character + ": ")[-1].strip()
|
121 |
|
122 |
+
if not any(ord(c) >= 0x1780 and ord(c) <= 0x17FF for c in response): # Check for Khmer characters
|
123 |
if "ជួយ" in user_input:
|
124 |
response = "ខ្ញុំរីករាយនឹងជួយលោកអ្នក! សូមប្រាប់ខ្ញុំថា តើខ្ញុំអាចជួយអ្វីបាន?"
|
125 |
elif "អរគុណ" in user_input:
|
|
|
546 |
@app.route("/", methods=["GET"])
|
547 |
def setup_webhook():
|
548 |
# Replace with your Hugging Face Space URL once known
|
549 |
+
space_url = os.getenv("SPACE_URL", "https://seyhalite-aihavin.hf.space")
|
550 |
webhook_url = f"{space_url}/webhook"
|
551 |
application.bot.set_webhook(webhook_url)
|
552 |
return f"Webhook set to {webhook_url}!", 200
|