import time
from app import predict

# Basic smoke tests for each mode.
# Note: Real mode will load the model weights; keep max_new_tokens small.

def run():
    instruction = "ازموینه"  # Pashto for test
    print("=== Echo Mode ===")
    print(predict(instruction, "", 8, 2, True, 1.0, 0.9, 1, "echo"))
    print("\n=== Useless Mode ===")
    print(predict(instruction, "", 8, 2, True, 1.0, 0.9, 1, "useless"))
    print("\n=== Real Mode (off) ===")
    t0 = time.time()
    out = predict(instruction, "", 8, 2, True, 1.0, 0.9, 1, "off")
    dt = time.time() - t0
    print(out)
    print(f"\n[Latency real mode: {dt:.2f}s]")

if __name__ == "__main__":
    run()