import os os.environ["HF_HOME"] = "/app/hf_cache" # Or any writable folder in your container from transformers import AutoTokenizer from flask import Flask, request from transformers import AutoTokenizer, AutoModelForCausalLM from gtts import gTTS from twilio.twiml.messaging_response import MessagingResponse import torch app = Flask(__name__) model_id = "sarvamai/sarvam-m" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True) @app.route("/") def home(): return "Sarvam WhatsApp Malayalam Chatbot is running!" @app.route("/whatsapp", methods=["POST"]) def whatsapp(): msg = request.form.get('Body') sender = request.form.get('From') inputs = tokenizer(msg, return_tensors="pt").to(model.device) outputs = model.generate(**inputs, max_new_tokens=100) reply = tokenizer.decode(outputs[0], skip_special_tokens=True) # Convert reply to Malayalam speech (optional audio hosting needed) tts = gTTS(reply, lang="ml") tts.save("reply.mp3") resp = MessagingResponse() resp.message(reply) return str(resp) if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)