import time from app import predict # Basic smoke tests for each mode. # Note: Real mode will load the model weights; keep max_new_tokens small. def run(): instruction = "ازموینه" # Pashto for test print("=== Echo Mode ===") print(predict(instruction, "", 8, 2, True, 1.0, 0.9, 1, "echo")) print("\n=== Useless Mode ===") print(predict(instruction, "", 8, 2, True, 1.0, 0.9, 1, "useless")) print("\n=== Real Mode (off) ===") t0 = time.time() out = predict(instruction, "", 8, 2, True, 1.0, 0.9, 1, "off") dt = time.time() - t0 print(out) print(f"\n[Latency real mode: {dt:.2f}s]") if __name__ == "__main__": run()