Spaces:

marcosremar2
/

llama-omni

Build error

App Files Files Community

marcosremar2 commited on 26 days ago

Commit

cc0c580

1 Parent(s): ad298ab

ddssdsds

Browse files

Files changed (9) hide show

Dockerfile +0 -83
debug_llama_omni2.py +0 -168
extract_llama_omni2_scripts.py +0 -215
run_controller_directly.py +0 -192
run_gradio_directly.py +0 -191
run_model_worker_directly.py +0 -208
test_llama_omni_api.py +0 -84
tests/README.md +46 -18
tests/test_llama_omni_api.py +114 -46

Dockerfile DELETED Viewed

@@ -1,83 +0,0 @@
-# Use an official Python runtime as a parent image
-FROM python:3.10-slim
-# Set the working directory in the container
-WORKDIR /code
-# Set environment variables for pip
-ENV PIP_NO_CACHE_DIR=off \
-    PIP_DISABLE_PIP_VERSION_CHECK=on
-# Install system dependencies (git for cloning, build-essential for compiling C/C++ extensions)
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    git \
-    build-essential \
-    curl \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
-# Copy all files from your Hugging Face Space repo
-COPY . /code/
-# Clone LLaMA-Omni2 and install it (WITHOUT editable flag)
-RUN git clone https://github.com/ICTNLP/LLaMA-Omni2.git /tmp/LLaMA-Omni2 \
-    && cd /tmp/LLaMA-Omni2 \
-    && pip install . \
-    && echo "--- PIP LIST AFTER LLaMA-Omni2 INSTALL --- " \
-    && pip list | grep -i llama \
-    && echo "--- PYTHON SYS.PATH AFTER LLaMA-Omni2 INSTALL --- " \
-    && python -c "import sys; print(sys.path)" \
-    && echo "--- TRYING TO IMPORT LLaMA-Omni2 --- " \
-    && python -c "import llama_omni2; print(f'LLaMA-Omni2 imported successfully from {llama_omni2.__file__}')" \
-    && echo "--- CHECKING WHERE LLAMA_OMNI2 IS INSTALLED --- " \
-    && pip show llama-omni2 \
-    && echo "--- DIAGNOSTICS END --- "
-# Copy the LLaMA-Omni2 source code to /code as well for direct file access
-RUN cp -r /tmp/LLaMA-Omni2/llama_omni2 /code/ \
-    && echo "--- COPIED LLAMA_OMNI2 SOURCE TO /code ---" \
-    && ls -la /code/llama_omni2 \
-    && echo "--- CHECKING SERVE SCRIPTS ---" \
-    && ls -la /code/llama_omni2/serve || echo "serve directory not found!"
-# Make sure PYTHONPATH includes both /code and site-packages
-ENV PYTHONPATH "${PYTHONPATH}:/code"
-# Install any other explicit dependencies from requirements.txt
-RUN pip install -r requirements.txt
-# Make debug and extraction scripts executable
-RUN chmod +x /code/debug_llama_omni2.py \
-    && chmod +x /code/extract_llama_omni2_scripts.py
-# Create startup script with enhanced diagnostics and fallbacks
-RUN echo '#!/bin/bash\n\
-echo "--- CONTAINER STARTING ---"\n\
-echo "PYTHONPATH: $PYTHONPATH"\n\
-echo "Python sys.path:"\n\
-python -c "import sys; print(sys.path)"\n\
-\n\
-echo "Running diagnostic script..."\n\
-python /code/debug_llama_omni2.py\n\
-\n\
-# Check if llama_omni2 module is importable\n\
-if ! python -c "import llama_omni2" > /dev/null 2>&1; then\n\
-    echo "WARNING: llama_omni2 module cannot be imported. Extracting scripts as fallback..."\n\
-    python /code/extract_llama_omni2_scripts.py\n\
-    \n\
-    # Add the extracted directory to PYTHONPATH\n\
-    if [ -d "/code/llama_omni2_extracted" ]; then\n\
-        export PYTHONPATH="$PYTHONPATH:/code/llama_omni2_extracted"\n\
-        echo "Added /code/llama_omni2_extracted to PYTHONPATH: $PYTHONPATH"\n\
-    fi\n\
-fi\n\
-\n\
-echo "Starting LLaMA-Omni2 application..."\n\
-python app.py\n' > /code/startup.sh \
-    && chmod +x /code/startup.sh
-# Expose the port Gradio will run on
-EXPOSE 7860
-# Command to run the application
-CMD ["/code/startup.sh"]

debug_llama_omni2.py DELETED Viewed

@@ -1,168 +0,0 @@
-#!/usr/bin/env python3
-"""
-LLaMA-Omni2 Debug Script
--------------------
-This script helps diagnose issues with LLaMA-Omni2 installation and imports.
-It checks:
-1. Python environment
-2. Module locations
-3. Import capabilities
-4. Script locations
-"""
-import os
-import sys
-import importlib
-import subprocess
-def print_section(title):
-    """Print a section header for better readability"""
-    print("\n" + "=" * 50)
-    print(f" {title} ".center(50, "="))
-    print("=" * 50)
-def find_module_in_paths(module_name, paths=None):
-    """Find all occurrences of a module in the specified paths"""
-    if paths is None:
-        paths = sys.path
-    found_locations = []
-    for path in paths:
-        potential_path = os.path.join(path, module_name)
-        if os.path.exists(potential_path):
-            found_locations.append(potential_path)
-    return found_locations
-def find_scripts(script_name, search_dirs=None):
-    """Find scripts by name in the specified directories"""
-    if search_dirs is None:
-        search_dirs = [
-            '/code',
-            '/tmp/LLaMA-Omni2',
-            '/usr/local/lib/python3.10/site-packages',
-            '/home/user'
-        ]
-    found_scripts = []
-    for search_dir in search_dirs:
-        if not os.path.exists(search_dir):
-            continue
-        for root, dirs, files in os.walk(search_dir):
-            # Skip .git and other large dirs
-            dirs[:] = [d for d in dirs if d not in ('.git', 'node_modules')]
-            if script_name in files:
-                found_scripts.append(os.path.join(root, script_name))
-    return found_scripts
-def check_pip_installed():
-    """Check if llama_omni2 is properly installed via pip"""
-    try:
-        result = subprocess.run(['pip', 'show', 'llama-omni2'],
-                                 capture_output=True, text=True)
-        if result.returncode == 0:
-            print("LLaMA-Omni2 is installed via pip:")
-            print(result.stdout)
-        else:
-            print("LLaMA-Omni2 is NOT installed via pip")
-    except Exception as e:
-        print(f"Error checking pip installation: {e}")
-def main():
-    # 1. Environment Information
-    print_section("ENVIRONMENT INFORMATION")
-    print(f"Python Executable: {sys.executable}")
-    print(f"Python Version: {sys.version}")
-    print(f"Working Directory: {os.getcwd()}")
-    # 2. PYTHONPATH
-    print_section("PYTHONPATH")
-    pythonpath = os.environ.get('PYTHONPATH', 'Not set')
-    print(f"PYTHONPATH Environment Variable: {pythonpath}")
-    # 3. sys.path
-    print_section("sys.path")
-    for i, path in enumerate(sys.path):
-        print(f"{i}: {path}")
-    # 4. Check if llama_omni2 is pip-installed
-    print_section("PIP INSTALLATION")
-    check_pip_installed()
-    # 5. Find llama_omni2 in sys.path
-    print_section("LLAMA_OMNI2 MODULE LOCATIONS")
-    found_locations = find_module_in_paths('llama_omni2')
-    if found_locations:
-        print("Found llama_omni2 module in the following locations:")
-        for loc in found_locations:
-            print(f"  - {loc}")
-    else:
-        print("Could not find llama_omni2 module in sys.path!")
-    # 6. Try to import llama_omni2
-    print_section("IMPORT TEST")
-    try:
-        import llama_omni2
-        print(f"Successfully imported llama_omni2 from: {llama_omni2.__file__}")
-        # Check if key modules exist
-        modules_to_check = [
-            'llama_omni2.serve.controller',
-            'llama_omni2.serve.model_worker',
-            'llama_omni2.serve.gradio_web_server'
-        ]
-        for module in modules_to_check:
-            try:
-                importlib.import_module(module)
-                print(f"✅ Successfully imported {module}")
-            except ImportError as e:
-                print(f"❌ Failed to import {module}: {e}")
-    except ImportError as e:
-        print(f"Failed to import llama_omni2: {e}")
-    # 7. Find core script files
-    print_section("SCRIPT LOCATIONS")
-    scripts_to_find = ['controller.py', 'model_worker.py', 'gradio_web_server.py']
-    for script in scripts_to_find:
-        found_scripts = find_scripts(script)
-        if found_scripts:
-            print(f"Found {script} at:")
-            for path in found_scripts:
-                print(f"  - {path}")
-        else:
-            print(f"Could not find {script}")
-    # 8. Test running the scripts directly
-    print_section("DIRECT SCRIPT EXECUTION TEST")
-    for script in scripts_to_find:
-        found_scripts = find_scripts(script)
-        if found_scripts:
-            script_path = found_scripts[0]
-            print(f"Testing if {script_path} can be executed...")
-            try:
-                # Just import the script module directly to see if it loads
-                result = subprocess.run(
-                    [sys.executable, '-c', f"import importlib.util; spec = importlib.util.spec_from_file_location('test', '{script_path}'); module = importlib.util.module_from_spec(spec); spec.loader.exec_module(module); print('Successfully loaded {script}')"],
-                    capture_output=True, text=True, timeout=5
-                )
-                if result.returncode == 0:
-                    print(f"✅ Script can be imported: {script_path}")
-                    print(result.stdout)
-                else:
-                    print(f"❌ Script import failed: {script_path}")
-                    print(result.stderr)
-            except subprocess.TimeoutExpired:
-                print(f"⚠️ Script import timed out: {script_path}")
-            except Exception as e:
-                print(f"❌ Error testing script: {e}")
-if __name__ == "__main__":
-    main()

extract_llama_omni2_scripts.py DELETED Viewed

@@ -1,215 +0,0 @@
-#!/usr/bin/env python3
-"""
-Extract LLaMA-Omni2 Serve Scripts
----------------------------------
-This script downloads and extracts just the necessary serve scripts from
-the LLaMA-Omni2 GitHub repository to enable running the server components
-without a full package installation.
-"""
-import os
-import sys
-import subprocess
-import shutil
-import tempfile
-import importlib.util
-def print_section(title):
-    """Print a section header"""
-    print("\n" + "=" * 60)
-    print(f" {title} ".center(60, "="))
-    print("=" * 60)
-def clone_repo(target_dir):
-    """Clone the LLaMA-Omni2 repository to a temporary location"""
-    print_section("CLONING REPOSITORY")
-    print(f"Cloning LLaMA-Omni2 repository to {target_dir}...")
-    try:
-        subprocess.run(
-            ["git", "clone", "https://github.com/ICTNLP/LLaMA-Omni2.git", target_dir],
-            check=True
-        )
-        print(f"Successfully cloned repository to {target_dir}")
-        return True
-    except subprocess.CalledProcessError as e:
-        print(f"Failed to clone repository: {e}")
-        return False
-def extract_serve_scripts(repo_dir, output_dir):
-    """Extract the serve scripts and dependencies to the output directory"""
-    print_section("EXTRACTING SERVE SCRIPTS")
-    # Ensure output directories exist
-    serve_dir = os.path.join(output_dir, "llama_omni2", "serve")
-    os.makedirs(serve_dir, exist_ok=True)
-    # Copy serve scripts
-    source_serve_dir = os.path.join(repo_dir, "llama_omni2", "serve")
-    if not os.path.exists(source_serve_dir):
-        print(f"Error: Source serve directory not found at {source_serve_dir}")
-        return False
-    print(f"Copying serve scripts from {source_serve_dir} to {serve_dir}")
-    # Copy all files from serve directory
-    for filename in os.listdir(source_serve_dir):
-        source_file = os.path.join(source_serve_dir, filename)
-        if os.path.isfile(source_file):
-            shutil.copy2(source_file, serve_dir)
-            print(f"Copied {filename}")
-    # Copy __init__.py files to make the modules importable
-    init_files = [
-        os.path.join(output_dir, "llama_omni2", "__init__.py"),
-        os.path.join(serve_dir, "__init__.py")
-    ]
-    for init_file in init_files:
-        if not os.path.exists(init_file):
-            with open(init_file, 'w') as f:
-                f.write("# Auto-generated __init__.py file\n")
-            print(f"Created {init_file}")
-    # Also copy key dependencies from the llama_omni2 module
-    modules_to_copy = [
-        "model",
-        "common"
-    ]
-    for module in modules_to_copy:
-        source_module_dir = os.path.join(repo_dir, "llama_omni2", module)
-        if os.path.exists(source_module_dir):
-            target_module_dir = os.path.join(output_dir, "llama_omni2", module)
-            print(f"Copying {module} module to {target_module_dir}")
-            shutil.copytree(source_module_dir, target_module_dir, dirs_exist_ok=True)
-            # Add __init__.py file if it doesn't exist
-            init_file = os.path.join(target_module_dir, "__init__.py")
-            if not os.path.exists(init_file):
-                with open(init_file, 'w') as f:
-                    f.write("# Auto-generated __init__.py file\n")
-    print("Extraction completed successfully")
-    return True
-def test_scripts(scripts_dir):
-    """Test if the extracted scripts can be imported"""
-    print_section("TESTING EXTRACTED SCRIPTS")
-    # Make sure the scripts directory is in the Python path
-    parent_dir = os.path.dirname(scripts_dir)
-    if parent_dir not in sys.path:
-        sys.path.insert(0, parent_dir)
-    # Try to import each script
-    script_paths = [
-        os.path.join(scripts_dir, "llama_omni2", "serve", "controller.py"),
-        os.path.join(scripts_dir, "llama_omni2", "serve", "model_worker.py"),
-        os.path.join(scripts_dir, "llama_omni2", "serve", "gradio_web_server.py")
-    ]
-    for script_path in script_paths:
-        if not os.path.exists(script_path):
-            print(f"❌ Script not found: {script_path}")
-            continue
-        try:
-            script_name = os.path.basename(script_path).replace(".py", "")
-            spec = importlib.util.spec_from_file_location(script_name, script_path)
-            module = importlib.util.module_from_spec(spec)
-            spec.loader.exec_module(module)
-            print(f"✅ Successfully imported {script_path}")
-        except Exception as e:
-            print(f"❌ Failed to import {script_path}: {e}")
-def create_usage_instructions(output_dir, scripts_dir):
-    """Create usage instructions for the extracted scripts"""
-    print_section("CREATING USAGE INSTRUCTIONS")
-    instruction_file = os.path.join(output_dir, "README.md")
-    with open(instruction_file, 'w') as f:
-        f.write("""# LLaMA-Omni2 Extracted Serve Scripts
-This directory contains the extracted serve scripts from LLaMA-Omni2 to run without a full package installation.
-## Usage
-### 1. Make sure Python can find these modules:
-```bash
-export PYTHONPATH=$PYTHONPATH:/path/to/this/directory
-```
-### 2. Run the controller:
-```bash
-python -m llama_omni2.serve.controller --host 0.0.0.0 --port 10000
-```
-### 3. Run the model worker:
-```bash
-python -m llama_omni2.serve.model_worker \\
-    --host 0.0.0.0 \\
-    --controller http://localhost:10000 \\
-    --port 40000 \\
-    --worker http://localhost:40000 \\
-    --model-path /path/to/model \\
-    --model-name MODEL_NAME
-```
-### 4. Run the Gradio web server:
-```bash
-python -m llama_omni2.serve.gradio_web_server \\
-    --host 0.0.0.0 \\
-    --port 7860 \\
-    --controller-url http://localhost:10000 \\
-    --model-list-mode reload \\
-    --vocoder-dir /path/to/vocoder
-```
-Alternatively, you can run these scripts directly:
-```bash
-python llama_omni2/serve/controller.py --host 0.0.0.0 --port 10000
-```
-""")
-    print(f"Created usage instructions at {instruction_file}")
-def main():
-    """Main function to extract and test LLaMA-Omni2 scripts"""
-    output_dir = "/home/user/app/llama_omni2_extracted"
-    print(f"This script will extract LLaMA-Omni2 serve scripts to {output_dir}")
-    # Create temporary directory for cloning
-    with tempfile.TemporaryDirectory() as temp_dir:
-        # Clone repository
-        if not clone_repo(temp_dir):
-            print("Failed to clone repository. Exiting.")
-            return 1
-        # Extract serve scripts
-        if not extract_serve_scripts(temp_dir, output_dir):
-            print("Failed to extract serve scripts. Exiting.")
-            return 1
-        # Create usage instructions
-        create_usage_instructions(output_dir, output_dir)
-        # Test scripts
-        test_scripts(output_dir)
-    print_section("EXTRACTION COMPLETED")
-    print(f"LLaMA-Omni2 serve scripts have been extracted to {output_dir}")
-    print(f"Add this directory to PYTHONPATH: export PYTHONPATH=$PYTHONPATH:{output_dir}")
-    print("See README.md for usage instructions")
-    return 0
-if __name__ == "__main__":
-    sys.exit(main())

run_controller_directly.py DELETED Viewed

@@ -1,192 +0,0 @@
-#!/usr/bin/env python3
-"""
-A standalone implementation of the LLaMA-Omni2 controller
-that doesn't rely on any LLaMA-Omni2 imports.
-"""
-import argparse
-import asyncio
-import dataclasses
-import json
-import logging
-import time
-from typing import Dict, List, Optional, Set, Tuple, Union
-import fastapi
-from fastapi import BackgroundTasks, Request
-from fastapi.responses import JSONResponse, Response, StreamingResponse
-import uvicorn
-# Define constants
-CONTROLLER_HEART_BEAT_EXPIRATION = 120
-MODEL_WORKER_API_TIMEOUT = 100
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# Define data models using dataclasses instead of pydantic
-@dataclasses.dataclass
-class ModelInfo:
-    id: str
-    name: str
-    worker_names: List[str]
-    time: float = dataclasses.field(default_factory=time.time)
-@dataclasses.dataclass
-class WorkerInfo:
-    worker_name: str
-    model_names: List[str]
-    check_heart_beat: bool
-    last_heart_beat: float = dataclasses.field(default_factory=time.time)
-# Global state
-worker_info: Dict[str, WorkerInfo] = {}
-model_info: Dict[str, ModelInfo] = {}
-worker_addr: Dict[str, str] = {}
-# FastAPI app
-app = fastapi.FastAPI()
-@app.post("/register_worker")
-async def register_worker(request: Request):
-    data = await request.json()
-    worker_name = data.get("worker_name")
-    worker_url = data.get("worker_url")
-    model_names = data.get("model_names", [])
-    check_heart_beat = data.get("check_heart_beat", True)
-    logger.info(f"Registering worker {worker_name} at {worker_url}")
-    worker_info[worker_name] = WorkerInfo(
-        worker_name=worker_name,
-        model_names=model_names,
-        check_heart_beat=check_heart_beat,
-        last_heart_beat=time.time()
-    )
-    worker_addr[worker_name] = worker_url
-    # Register models
-    for model_name in model_names:
-        if model_name in model_info:
-            model_info[model_name].worker_names.append(worker_name)
-        else:
-            model_id = f"model-{len(model_info)}"
-            model_info[model_name] = ModelInfo(
-                id=model_id,
-                name=model_name,
-                worker_names=[worker_name]
-            )
-    return {"result": "success"}
-@app.post("/unregister_worker")
-async def unregister_worker(request: Request):
-    data = await request.json()
-    worker_name = data.get("worker_name")
-    logger.info(f"Unregistering worker {worker_name}")
-    if worker_name in worker_info:
-        for model_name in worker_info[worker_name].model_names:
-            if model_name in model_info:
-                if worker_name in model_info[model_name].worker_names:
-                    model_info[model_name].worker_names.remove(worker_name)
-                if len(model_info[model_name].worker_names) == 0:
-                    del model_info[model_name]
-        del worker_info[worker_name]
-    if worker_name in worker_addr:
-        del worker_addr[worker_name]
-    return {"result": "success"}
-@app.post("/heart_beat")
-async def heart_beat(request: Request):
-    data = await request.json()
-    worker_name = data.get("worker_name")
-    if worker_name not in worker_info or worker_name not in worker_addr:
-        return {"result": "failure", "error": f"Worker {worker_name} not found"}
-    worker_info[worker_name].last_heart_beat = time.time()
-    return {"result": "success"}
-@app.get("/list_models")
-async def list_models():
-    models = []
-    for name, info in model_info.items():
-        models.append({
-            "id": info.id,
-            "name": name
-        })
-    return {"models": models}
-@app.get("/get_worker_address")
-async def get_worker_address(model_name: str):
-    if model_name not in model_info or not model_info[model_name].worker_names:
-        return JSONResponse(
-            {"error": f"No available workers for model {model_name}"},
-            status_code=400
-        )
-    # Simple round-robin selection among available workers
-    workers = model_info[model_name].worker_names
-    selected_worker = workers[int(time.time()) % len(workers)]
-    return {"address": worker_addr.get(selected_worker)}
-@app.get("/worker_status")
-async def worker_status():
-    return {"worker_info": [
-        {
-            "name": name,
-            "address": worker_addr.get(name),
-            "models": info.model_names,
-            "last_heart_beat": info.last_heart_beat,
-            "status": "alive" if not info.check_heart_beat or
-                     (time.time() - info.last_heart_beat) < CONTROLLER_HEART_BEAT_EXPIRATION
-                     else "dead"
-        }
-        for name, info in worker_info.items()
-    ]}
-@app.get("/status")
-async def status():
-    return {
-        "model_info": [
-            {
-                "name": name,
-                "id": info.id,
-                "workers": info.worker_names
-            }
-            for name, info in model_info.items()
-        ],
-        "worker_info": [
-            {
-                "name": name,
-                "address": worker_addr.get(name),
-                "models": info.model_names,
-                "last_heart_beat": info.last_heart_beat,
-                "status": "alive" if not info.check_heart_beat or
-                         (time.time() - info.last_heart_beat) < CONTROLLER_HEART_BEAT_EXPIRATION
-                         else "dead"
-            }
-            for name, info in worker_info.items()
-        ]
-    }
-# Run the server
-def main():
-    parser = argparse.ArgumentParser(description="Controller for LLaMA-Omni2")
-    parser.add_argument("--host", type=str, default="0.0.0.0")
-    parser.add_argument("--port", type=int, default=10000)
-    args = parser.parse_args()
-    logger.info(f"Starting controller server at http://{args.host}:{args.port}")
-    uvicorn.run(app, host=args.host, port=args.port)
-if __name__ == "__main__":
-    main()

run_gradio_directly.py DELETED Viewed

@@ -1,191 +0,0 @@
-#!/usr/bin/env python3
-"""
-A minimal Gradio web interface for LLaMA-Omni2 that doesn't rely on
-importing from the LLaMA-Omni2 package.
-"""
-import argparse
-import asyncio
-import json
-import logging
-import os
-import time
-from typing import Dict, List, Optional
-import aiohttp
-import gradio as gr
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-class LLaMA_Omni2_UI:
-    def __init__(
-        self,
-        controller_url: str,
-        vocoder_dir: str
-    ):
-        self.controller_url = controller_url
-        self.vocoder_dir = vocoder_dir
-        self.model_list = []
-        self.model_names = []
-        # Verify vocoder directory exists
-        if not os.path.exists(vocoder_dir):
-            logger.warning(f"Vocoder directory not found at {vocoder_dir}")
-            logger.warning("Voice synthesis will not be available")
-        else:
-            logger.info(f"Using vocoder at {vocoder_dir}")
-    async def fetch_model_list(self):
-        """Fetch the list of models from the controller"""
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.get(
-                    f"{self.controller_url}/list_models",
-                    timeout=30
-                ) as response:
-                    if response.status == 200:
-                        data = await response.json()
-                        self.model_list = data.get("models", [])
-                        self.model_names = [model.get("name") for model in self.model_list]
-                        return self.model_names
-                    else:
-                        logger.error(f"Failed to fetch model list: {await response.text()}")
-                        return []
-        except Exception as e:
-            logger.error(f"Error fetching model list: {e}")
-            return []
-    async def get_worker_address(self, model_name: str):
-        """Get the address of a worker serving the specified model"""
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.get(
-                    f"{self.controller_url}/get_worker_address?model_name={model_name}",
-                    timeout=30
-                ) as response:
-                    if response.status == 200:
-                        data = await response.json()
-                        return data.get("address")
-                    else:
-                        logger.error(f"Failed to get worker address: {await response.text()}")
-                        return None
-        except Exception as e:
-            logger.error(f"Error getting worker address: {e}")
-            return None
-    async def generate_text(self, prompt: str, model_name: str):
-        """Generate text using the specified model"""
-        worker_addr = await self.get_worker_address(model_name)
-        if not worker_addr:
-            return f"Error: No worker available for model {model_name}"
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    f"{worker_addr}/generate",
-                    json={"prompt": prompt},
-                    timeout=120
-                ) as response:
-                    if response.status == 200:
-                        data = await response.json()
-                        return data.get("response", "No response received from model")
-                    else:
-                        error_text = await response.text()
-                        logger.error(f"Failed to generate text: {error_text}")
-                        return f"Error: {error_text}"
-        except Exception as e:
-            logger.error(f"Error generating text: {e}")
-            return f"Error: {str(e)}"
-    def build_demo(self):
-        """Build the Gradio interface"""
-        with gr.Blocks(title="LLaMA-Omni2 Web UI") as demo:
-            gr.Markdown("# LLaMA-Omni2 Web UI")
-            with gr.Row():
-                with gr.Column(scale=1):
-                    model_dropdown = gr.Dropdown(
-                        choices=self.model_names or ["No models available"],
-                        label="Model",
-                        value=self.model_names[0] if self.model_names else None
-                    )
-                    refresh_button = gr.Button("Refresh Models")
-            with gr.Row():
-                with gr.Column(scale=3):
-                    text_input = gr.Textbox(
-                        lines=5,
-                        placeholder="Enter text here...",
-                        label="Input Text"
-                    )
-            with gr.Row():
-                with gr.Column(scale=1):
-                    submit_button = gr.Button("Generate", variant="primary")
-                    clear_button = gr.Button("Clear")
-            with gr.Row():
-                with gr.Column(scale=3):
-                    text_output = gr.Textbox(
-                        lines=10,
-                        label="Generated Text",
-                        interactive=False
-                    )
-            async def refresh_models():
-                model_names = await self.fetch_model_list()
-                return gr.Dropdown.update(choices=model_names or ["No models available"])
-            async def generate(text, model):
-                if not text.strip():
-                    return "Please enter some text"
-                if not model or model == "No models available":
-                    return "Please select a model"
-                return await self.generate_text(text, model)
-            def clear():
-                return "", ""
-            refresh_button.click(fn=lambda: asyncio.create_task(refresh_models()), outputs=[model_dropdown])
-            submit_button.click(fn=lambda text, model: asyncio.create_task(generate(text, model)),
-                                inputs=[text_input, model_dropdown],
-                                outputs=[text_output])
-            clear_button.click(fn=clear, outputs=[text_input, text_output])
-        return demo
-def main():
-    parser = argparse.ArgumentParser(description="Gradio web server for LLaMA-Omni2")
-    parser.add_argument("--host", type=str, default="0.0.0.0")
-    parser.add_argument("--port", type=int, default=7860)
-    parser.add_argument("--controller-url", type=str, default="http://localhost:10000")
-    parser.add_argument("--vocoder-dir", type=str, required=True)
-    parser.add_argument("--share", action="store_true", help="Create a public link")
-    args = parser.parse_args()
-    logger.info(f"Using controller at {args.controller_url}")
-    # Create the UI
-    ui = LLaMA_Omni2_UI(
-        controller_url=args.controller_url,
-        vocoder_dir=args.vocoder_dir
-    )
-    # Start by fetching the model list
-    asyncio.run(ui.fetch_model_list())
-    # Build and launch the demo
-    demo = ui.build_demo()
-    demo.queue()
-    demo.launch(
-        server_name=args.host,
-        server_port=args.port,
-        share=args.share
-    )
-if __name__ == "__main__":
-    main()

run_model_worker_directly.py DELETED Viewed

@@ -1,208 +0,0 @@
-#!/usr/bin/env python3
-"""
-A simplified implementation of the LLaMA-Omni2 model worker
-that doesn't rely on deep LLaMA-Omni2 imports.
-"""
-import argparse
-import asyncio
-import json
-import logging
-import os
-import re
-import threading
-import time
-import uuid
-from typing import Dict, List, Optional, Tuple
-import aiohttp
-import fastapi
-from fastapi import BackgroundTasks, Request
-from fastapi.responses import JSONResponse, Response, StreamingResponse
-import gradio as gr
-import uvicorn
-from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# Define constants
-WORKER_HEART_BEAT_INTERVAL = 30
-CONTROLLER_HEART_BEAT_EXPIRATION = 120
-class ModelWorker:
-    def __init__(
-        self,
-        controller_addr: str,
-        worker_addr: str,
-        worker_id: str,
-        model_path: str,
-        model_name: str,
-        device: str = "cpu",
-        limit_worker_concurrency: int = 5,
-    ):
-        self.controller_addr = controller_addr
-        self.worker_addr = worker_addr
-        self.worker_id = worker_id
-        self.model_path = model_path
-        self.model_name = model_name
-        self.device = device
-        self.limit_worker_concurrency = limit_worker_concurrency
-        # Track current requests
-        self.lock = asyncio.Lock()
-        self.messages = {}
-        self.sem = asyncio.Semaphore(limit_worker_concurrency)
-        # Placeholders - the real implementation would load the model
-        logger.info(f"Loading model from {model_path}...")
-        try:
-            self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
-            self.model = None  # In a real implementation, we would load the model here
-            logger.info(f"Model initialization successful (tokenizer only, no model)")
-        except Exception as e:
-            logger.error(f"Failed to load model: {e}")
-            logger.info("Using dummy model instead")
-            self.tokenizer = None
-            self.model = None
-        logger.info(f"Model loaded successfully ({model_name})")
-    async def generate_response(self, request_data):
-        """Generate a response (simulated)"""
-        prompt = request_data.get("prompt", "")
-        response = f"This is a simulated response for prompt: {prompt[:30]}..."
-        return response
-    async def register_to_controller(self):
-        """Register this worker with the controller"""
-        controller_addr = self.controller_addr
-        worker_addr = self.worker_addr
-        worker_id = self.worker_id
-        model_name = self.model_name
-        data = {
-            "worker_name": worker_id,
-            "worker_url": worker_addr,
-            "model_names": [model_name],
-            "check_heart_beat": True,
-        }
-        logger.info(f"Register to controller at {controller_addr}")
-        async with aiohttp.ClientSession() as session:
-            async with session.post(
-                f"{controller_addr}/register_worker",
-                json=data,
-                timeout=30,
-            ) as response:
-                if response.status != 200:
-                    logger.error(f"Failed to register to controller: {await response.text()}")
-                    return False
-                else:
-                    logger.info(f"Registered to controller successfully")
-                    return True
-    async def send_heart_beat(self):
-        """Send a heartbeat to the controller periodically"""
-        controller_addr = self.controller_addr
-        worker_id = self.worker_id
-        data = {
-            "worker_name": worker_id,
-        }
-        async with aiohttp.ClientSession() as session:
-            while True:
-                try:
-                    async with session.post(
-                        f"{controller_addr}/heart_beat",
-                        json=data,
-                        timeout=30,
-                    ) as response:
-                        if response.status != 200:
-                            logger.error(f"Failed to send heart beat: {await response.text()}")
-                except Exception as e:
-                    logger.error(f"Error sending heart beat: {e}")
-                await asyncio.sleep(WORKER_HEART_BEAT_INTERVAL)
-# FastAPI app
-app = fastapi.FastAPI()
-@app.post("/generate")
-async def generate(request: Request):
-    """Generate text based on the prompt"""
-    global model_worker
-    if not model_worker:
-        return JSONResponse(
-            {"error": "Model worker not initialized"},
-            status_code=500,
-        )
-    data = await request.json()
-    response = await model_worker.generate_response(data)
-    return {"response": response}
-@app.get("/status")
-async def status():
-    """Get the status of the worker"""
-    global model_worker
-    if not model_worker:
-        return {"status": "offline"}
-    return {
-        "status": "online",
-        "model_name": model_worker.model_name,
-        "worker_id": model_worker.worker_id,
-    }
-# Global model worker instance
-model_worker = None
-def start_background_tasks(app):
-    """Start background tasks when the app starts"""
-    asyncio.create_task(model_worker.register_to_controller())
-    asyncio.create_task(model_worker.send_heart_beat())
-# Run the server
-def main():
-    global model_worker
-    parser = argparse.ArgumentParser(description="Model worker for LLaMA-Omni2")
-    parser.add_argument("--host", type=str, default="0.0.0.0")
-    parser.add_argument("--port", type=int, default=40000)
-    parser.add_argument("--controller", type=str, default="http://localhost:10000")
-    parser.add_argument("--worker", type=str, default="http://localhost:40000")
-    parser.add_argument("--model-path", type=str, required=True)
-    parser.add_argument("--model-name", type=str, required=True)
-    parser.add_argument("--limit-worker-concurrency", type=int, default=5)
-    parser.add_argument("--device", type=str, default="cpu")
-    args = parser.parse_args()
-    logger.info(f"Initializing model worker with model {args.model_name}")
-    # Initialize the model worker
-    worker_id = f"worker-{str(uuid.uuid4())[:8]}"
-    model_worker = ModelWorker(
-        controller_addr=args.controller,
-        worker_addr=args.worker,
-        worker_id=worker_id,
-        model_path=args.model_path,
-        model_name=args.model_name,
-        device=args.device,
-        limit_worker_concurrency=args.limit_worker_concurrency,
-    )
-    # Start the FastAPI app with background tasks
-    app.add_event_handler("startup", lambda: start_background_tasks(app))
-    logger.info(f"Starting model worker server at http://{args.host}:{args.port}")
-    uvicorn.run(app, host=args.host, port=args.port)
-if __name__ == "__main__":
-    main()

test_llama_omni_api.py DELETED Viewed

@@ -1,84 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script for LLaMA-Omni API on Hugging Face Spaces.
-This script sends a text message to the LLaMA-Omni2-0.5B API and saves the response.
-"""
-import os
-import time
-from pathlib import Path
-from gradio_client import Client
-# API endpoint
-API_URL = "https://marcosremar2-llama-omni.hf.space"  # Gradio Space URL
-# Input and output paths
-INPUT_AUDIO_PATH = "/Users/marcos/Documents/projects/test/whisper-realtime/llama-omni/llama-omni/test.mp3"
-OUTPUT_DIR = "./output"
-OUTPUT_TEXT_PATH = os.path.join(OUTPUT_DIR, f"response_{int(time.time())}.txt")
-def main():
-    """Main function to test the LLaMA-Omni API"""
-    # Ensure output directory exists
-    os.makedirs(OUTPUT_DIR, exist_ok=True)
-    print(f"Audio file path: {INPUT_AUDIO_PATH}")
-    print(f"API URL: {API_URL}")
-    try:
-        # Connect to the Gradio app with increased timeout
-        client = Client(
-            API_URL,
-            httpx_kwargs={"timeout": 300.0}  # Increase timeout to 5 minutes
-        )
-        print("Connected to API successfully")
-        # Inspect the API endpoints
-        print("Available API endpoints:")
-        client.view_api()
-        # Since this is a text-based model (LLaMA-Omni2), we'll send a text prompt
-        # The audio file can't be directly processed by this API
-        print("\nUsing the text generation endpoint (/lambda_1)...")
-        # Create a text prompt describing the audio
-        prompt = """This is a test of the LLaMA-Omni2-0.5B API.
-        Please respond with a sample of what you can do as an AI assistant."""
-        # Submit the text to the API
-        print(f"Sending text prompt: '{prompt[:50]}...'")
-        job = client.submit(
-            prompt,
-            "LLaMA-Omni2-0.5B",  # Updated model name
-            api_name="/lambda_1"
-        )
-        print("Job submitted, waiting for response...")
-        result = job.result()
-        print(f"Response received (length: {len(str(result))} characters)")
-        # Save the text result
-        with open(OUTPUT_TEXT_PATH, "w") as f:
-            f.write(str(result))
-        print(f"Text response saved to: {OUTPUT_TEXT_PATH}")
-        # Also try the model info endpoint
-        try:
-            print("\nQuerying model information...")
-            model_info = client.submit(api_name="/lambda").result()
-            print(f"Model info: {model_info}")
-        except Exception as model_error:
-            print(f"Error getting model info: {str(model_error)}")
-    except Exception as e:
-        print(f"Error during API request: {str(e)}")
-        print("This could be because the Space is currently sleeping and needs time to wake up.")
-        print("Try accessing the Space directly in a browser first: " + API_URL)
-        print("\nNote: This API is for the LLaMA-Omni2-0.5B model and does not directly process audio files.")
-        print("To work with audio, you would need to first transcribe the audio using a service like Whisper,")
-        print("then send the transcribed text to this API.")
-if __name__ == "__main__":
-    main()

tests/README.md CHANGED Viewed

@@ -1,13 +1,21 @@
 # Teste LLaMA-Omni2-0.5B no Hugging Face
-Este diretório contém um script simples para testar o modelo LLaMA-Omni2-0.5B implantado no Hugging Face.
 ## Pré-requisitos
 Antes de executar o script de teste, certifique-se de ter instalado as dependências necessárias:
 ```bash
-pip install requests
 ```
 Para transcrição de áudio (opcional), você pode instalar o Whisper:
@@ -25,23 +33,25 @@ cd tests
 python test_llama_omni_api.py
 ```
-Por padrão, o script irá:
 1. Tentar transcrever o arquivo test.mp3 usando Whisper (se disponível)
-2. Se o Whisper não estiver disponível, usará uma mensagem de teste padrão
-3. Salvar a transcrição em um arquivo de texto para facilitar a cópia
-4. Abrir a interface web do LLaMA-Omni2-0.5B no Hugging Face no seu navegador padrão
-5. Fornecer instruções para teste manual
 ### Parâmetros de linha de comando
 O script aceita os seguintes argumentos de linha de comando:
 - `--api-url`: URL da interface Gradio (padrão: https://marcosremar2-llama-omni.hf.space)
-- `--audio-file`: Caminho para o arquivo de áudio a ser transcrito localmente (padrão: ../test.mp3)
 - `--text`: Texto para usar diretamente (em vez de transcrever áudio)
-- `--output-dir`: Diretório para salvar a transcrição (padrão: ../output)
-Exemplos de uso com parâmetros personalizados:
 ```bash
 # Usando entrada de texto direta
@@ -49,16 +59,33 @@ python test_llama_omni_api.py --text "Olá, esta é uma mensagem de teste para o
 # Usando um arquivo de áudio personalizado para transcrição
 python test_llama_omni_api.py --audio-file /caminho/para/seu/audio.mp3
 ```
-## Processo de Teste Manual
-O script facilita o teste manual com o seguinte fluxo de trabalho:
-1. **Transcrição de Áudio**: Se um arquivo de áudio for fornecido, o script tentará transcrevê-lo localmente usando Whisper
-2. **Preparação do Texto**: O texto transcrito ou fornecido é salvo em um arquivo para fácil cópia
-3. **Abertura do Navegador**: O script abre a interface web no seu navegador padrão
-4. **Interação Manual**: Você precisa manualmente:
    - Copiar o texto do arquivo salvo
    - Colar no campo "Input Text" na interface web
    - Clicar no botão "Generate"
@@ -72,15 +99,16 @@ Se encontrar algum problema:
 1. Verifique se a URL da interface web está correta e o serviço está em execução
 2. Certifique-se de ter uma conexão com a internet
 3. Se estiver usando transcrição de áudio, certifique-se de que o Whisper esteja instalado corretamente
 ## Erros Comuns
 ### Dependências Ausentes
-Se você ver erros como `ModuleNotFoundError: No module named 'torch'`, você precisa instalar os pacotes Python necessários:
 ```bash
-pip install openai-whisper
 ```
 ### Deploy no Hugging Face

 # Teste LLaMA-Omni2-0.5B no Hugging Face
+Este diretório contém um script completo para testar o modelo LLaMA-Omni2-0.5B implantado no Hugging Face.
+## Características do Script
+- Teste da API programaticamente (modo api)
+- Interface de teste manual no navegador (modo manual)
+- Transcrição local de áudio com Whisper
+- Envio de texto diretamente para o modelo
+- Salvamento da transcrição e das respostas para referência
 ## Pré-requisitos
 Antes de executar o script de teste, certifique-se de ter instalado as dependências necessárias:
 ```bash
+pip install requests gradio-client
 ```
 Para transcrição de áudio (opcional), você pode instalar o Whisper:
 python test_llama_omni_api.py
 ```
+Por padrão, o script executará ambos os modos (api e manual) e irá:
 1. Tentar transcrever o arquivo test.mp3 usando Whisper (se disponível)
+2. Se o Whisper não estiver disponível ou o arquivo não existir, usará uma mensagem de teste padrão
+3. Testar a API programaticamente e salvar a resposta
+4. Salvar o texto de entrada em um arquivo para fácil cópia
+5. Abrir a interface web do LLaMA-Omni2-0.5B no Hugging Face no seu navegador
+6. Fornecer instruções para teste manual
 ### Parâmetros de linha de comando
 O script aceita os seguintes argumentos de linha de comando:
 - `--api-url`: URL da interface Gradio (padrão: https://marcosremar2-llama-omni.hf.space)
+- `--audio-file`: Caminho para o arquivo de áudio a ser transcrito localmente (padrão: test.mp3)
 - `--text`: Texto para usar diretamente (em vez de transcrever áudio)
+- `--output-dir`: Diretório para salvar a transcrição e respostas (padrão: ./output)
+- `--mode`: Modo de teste: api (programático), manual (navegador) ou both (ambos) (padrão: both)
+### Exemplos de uso com parâmetros personalizados:
 ```bash
 # Usando entrada de texto direta
 # Usando um arquivo de áudio personalizado para transcrição
 python test_llama_omni_api.py --audio-file /caminho/para/seu/audio.mp3
+# Testando apenas o modo API programaticamente
+python test_llama_omni_api.py --mode api
+# Apenas abrindo a interface web com um texto personalizado
+python test_llama_omni_api.py --mode manual --text "Teste manual do LLaMA-Omni2-0.5B"
 ```
+## Modos de Teste
+### 1. Modo API (Programático)
+Envia diretamente uma solicitação para a API do modelo e salva a resposta em um arquivo:
+- Conecta-se à API do Gradio com timeout aumentado
+- Lista os endpoints disponíveis
+- Envia o texto para o endpoint de geração
+- Salva a resposta recebida em um arquivo
+- Também consulta informações básicas do modelo
+### 2. Modo Manual (Interface Web)
+Facilita o teste manual com o seguinte fluxo de trabalho:
+1. **Preparação do Texto**: O texto de entrada é salvo em um arquivo para fácil cópia
+2. **Abertura do Navegador**: O script abre a interface web no seu navegador padrão
+3. **Interação Manual**: Você precisa manualmente:
    - Copiar o texto do arquivo salvo
    - Colar no campo "Input Text" na interface web
    - Clicar no botão "Generate"
 1. Verifique se a URL da interface web está correta e o serviço está em execução
 2. Certifique-se de ter uma conexão com a internet
 3. Se estiver usando transcrição de áudio, certifique-se de que o Whisper esteja instalado corretamente
+4. No modo API, verifique se o Gradio Space está ativo (às vezes eles "dormem" quando inativos)
 ## Erros Comuns
 ### Dependências Ausentes
+Se você ver erros relacionados a módulos não encontrados, instale as dependências necessárias:
 ```bash
+pip install requests gradio-client openai-whisper
 ```
 ### Deploy no Hugging Face

tests/test_llama_omni_api.py CHANGED Viewed

@@ -1,16 +1,27 @@
 #!/usr/bin/env python3
 """
-Teste simples para o LLaMA-Omni2-0.5B no Hugging Face
-Este script transcreve áudio localmente e facilita o teste manual do modelo.
 """
 import os
 import sys
 import argparse
 import requests
 import subprocess
 import webbrowser
 from pathlib import Path
 def transcribe_audio_locally(audio_file_path):
     """
@@ -36,7 +47,7 @@ def transcribe_audio_locally(audio_file_path):
     # Mensagem padrão
     print("Usando mensagem de teste padrão, já que whisper não está disponível")
-    return "Olá, estou testando o modelo LLaMA-Omni2-0.5B. Você pode me responder em português?"
 def check_url_accessibility(url):
     """Verifica se a URL é acessível"""
@@ -51,58 +62,98 @@ def check_url_accessibility(url):
         print(f"Erro ao acessar URL: {e}")
         return False
-def save_transcription_to_file(text, output_dir, filename="transcription.txt"):
-    """Salva transcrição em arquivo para fácil cópia"""
     os.makedirs(output_dir, exist_ok=True)
     filepath = os.path.join(output_dir, filename)
     with open(filepath, "w") as f:
         f.write(text)
-    print(f"Transcrição salva em: {filepath}")
     return filepath
-def test_llama_omni_manual(api_url, audio_file_path=None, text_input=None, output_dir="./output"):
     """
-    Prepara o teste manual do LLaMA-Omni2-0.5B:
-    1. Transcreve áudio localmente (se fornecido)
-    2. Salva o texto em arquivo para fácil cópia
-    3. Abre a interface web para teste manual
-    Args:
-        api_url: URL da interface Gradio
-        audio_file_path: Caminho para o arquivo de áudio
-        text_input: Texto para usar diretamente (em vez de transcrever áudio)
-        output_dir: Diretório para salvar a transcrição
-    Returns:
-        bool: True se a preparação foi bem-sucedida, False caso contrário
     """
-    # Cria diretório de saída se não existir
     os.makedirs(output_dir, exist_ok=True)
     # Verifica se a URL é acessível
     print(f"Verificando acessibilidade de {api_url}...")
     if not check_url_accessibility(api_url):
         print(f"Aviso: {api_url} não está acessível. Teste manual pode não ser possível.")
-    # Obtém texto de entrada da transcrição ou do parâmetro
-    input_text = text_input
-    if not input_text and audio_file_path:
-        input_text = transcribe_audio_locally(audio_file_path)
-    if not input_text:
-        input_text = "Olá, estou testando o modelo LLaMA-Omni2-0.5B. Você pode me responder em português?"
-    print(f"Texto para usar: {input_text}")
     # Salva o texto em arquivo para fácil cópia
-    transcript_file = save_transcription_to_file(input_text, output_dir)
     # Instruções para teste manual
     print("\n" + "=" * 50)
-    print("INSTRUÇÕES PARA TESTE MANUAL")
     print("=" * 50)
-    print(f"1. A transcrição foi salva em: {transcript_file}")
     print(f"2. Abrindo {api_url} no navegador...")
     print("3. Copie o texto do arquivo salvo e cole no campo 'Input Text'")
     print("4. Clique no botão 'Generate'")
@@ -119,15 +170,17 @@ def test_llama_omni_manual(api_url, audio_file_path=None, text_input=None, outpu
         return False
 def main():
-    parser = argparse.ArgumentParser(description="Teste para LLaMA-Omni2-0.5B no Hugging Face")
-    parser.add_argument("--api-url", type=str, default="https://huggingface.co/ICTNLP/LLaMA-Omni2-0.5B",
-                        help="URL da interface Gradio (padrão: https://huggingface.co/ICTNLP/LLaMA-Omni2-0.5B)")
-    parser.add_argument("--audio-file", type=str, default="../test.mp3",
                         help="Caminho para o arquivo de áudio a ser transcrito localmente (opcional)")
     parser.add_argument("--text", type=str, default=None,
                         help="Texto para usar diretamente (em vez de transcrever áudio)")
-    parser.add_argument("--output-dir", type=str, default="../output",
-                        help="Diretório para salvar a transcrição")
     args = parser.parse_args()
     # Converte caminhos relativos para absolutos
@@ -140,13 +193,28 @@ def main():
         script_dir = os.path.dirname(os.path.abspath(__file__))
         args.output_dir = os.path.join(script_dir, args.output_dir)
-    # Executa o teste
-    success = test_llama_omni_manual(
-        api_url=args.api_url,
-        audio_file_path=args.audio_file if not args.text else None,
-        text_input=args.text,
-        output_dir=args.output_dir
-    )
     # Sai com código apropriado
     sys.exit(0 if success else 1)

 #!/usr/bin/env python3
 """
+Teste completo para o LLaMA-Omni2-0.5B no Hugging Face
+Este script pode:
+1. Transcrever áudio localmente e enviar para o modelo
+2. Enviar texto diretamente para o modelo
+3. Facilita o teste manual com interface web
+4. Testar a API diretamente de modo programático
 """
 import os
 import sys
+import time
 import argparse
 import requests
 import subprocess
 import webbrowser
 from pathlib import Path
+from gradio_client import Client
+# Configurações padrão
+DEFAULT_API_URL = "https://marcosremar2-llama-omni.hf.space"
+DEFAULT_OUTPUT_DIR = "./output"
+MODEL_NAME = "LLaMA-Omni2-0.5B"
 def transcribe_audio_locally(audio_file_path):
     """
     # Mensagem padrão
     print("Usando mensagem de teste padrão, já que whisper não está disponível")
+    return f"Olá, estou testando o modelo {MODEL_NAME}. Você pode me responder em português?"
 def check_url_accessibility(url):
     """Verifica se a URL é acessível"""
         print(f"Erro ao acessar URL: {e}")
         return False
+def save_text_to_file(text, output_dir, filename="text.txt"):
+    """Salva texto em arquivo para fácil cópia"""
     os.makedirs(output_dir, exist_ok=True)
     filepath = os.path.join(output_dir, filename)
     with open(filepath, "w") as f:
         f.write(text)
+    print(f"Texto salvo em: {filepath}")
     return filepath
+def test_api_programmatically(api_url, text_input, output_dir=DEFAULT_OUTPUT_DIR):
     """
+    Testa a API do modelo programaticamente enviando um texto
+    e salvando a resposta
     """
+    output_path = os.path.join(output_dir, f"response_{int(time.time())}.txt")
     os.makedirs(output_dir, exist_ok=True)
+    print(f"Testando API em: {api_url}")
+    print(f"Texto de entrada: {text_input[:50]}..." if len(text_input) > 50 else f"Texto de entrada: {text_input}")
+    try:
+        # Conecta ao app Gradio com timeout aumentado
+        client = Client(
+            api_url,
+            httpx_kwargs={"timeout": 300.0}  # 5 minutos de timeout
+        )
+        print("Conectado à API com sucesso")
+        # Lista os endpoints disponíveis
+        print("Endpoints disponíveis:")
+        client.view_api()
+        # Envia o prompt para o modelo
+        print(f"\nUsando endpoint de geração de texto (/lambda_1)...")
+        print(f"Enviando prompt: '{text_input[:50]}...'")
+        job = client.submit(
+            text_input,
+            MODEL_NAME,
+            api_name="/lambda_1"
+        )
+        print("Requisição enviada, aguardando resposta...")
+        result = job.result()
+        print(f"Resposta recebida (tamanho: {len(str(result))} caracteres)")
+        # Salva a resposta em arquivo
+        with open(output_path, "w") as f:
+            f.write(str(result))
+        print(f"Resposta salva em: {output_path}")
+        # Tenta obter informações do modelo
+        try:
+            print("\nConsultando informações do modelo...")
+            model_info = client.submit(api_name="/lambda").result()
+            print(f"Informações do modelo: {model_info}")
+        except Exception as model_error:
+            print(f"Erro ao obter informações do modelo: {str(model_error)}")
+        return True, result
+    except Exception as e:
+        print(f"Erro durante requisição à API: {str(e)}")
+        print("Isso pode ocorrer porque o Space está dormindo e precisa de tempo para iniciar.")
+        print("Tente acessar o Space diretamente primeiro: " + api_url)
+        print(f"\nNota: Esta API é para o modelo {MODEL_NAME} e não processa áudio diretamente.")
+        print("Para trabalhar com áudio, você precisaria primeiro transcrever o áudio usando Whisper,")
+        print("e então enviar o texto transcrito para esta API.")
+        return False, None
+def test_manual_interface(api_url, text_input, output_dir=DEFAULT_OUTPUT_DIR):
+    """
+    Prepara o teste manual do modelo via interface web:
+    1. Salva o texto em arquivo para fácil cópia
+    2. Abre a interface web para teste manual
+    """
     # Verifica se a URL é acessível
     print(f"Verificando acessibilidade de {api_url}...")
     if not check_url_accessibility(api_url):
         print(f"Aviso: {api_url} não está acessível. Teste manual pode não ser possível.")
     # Salva o texto em arquivo para fácil cópia
+    transcript_file = save_text_to_file(text_input, output_dir, "transcription.txt")
     # Instruções para teste manual
     print("\n" + "=" * 50)
+    print(f"INSTRUÇÕES PARA TESTE MANUAL DO {MODEL_NAME}")
     print("=" * 50)
+    print(f"1. O texto foi salvo em: {transcript_file}")
     print(f"2. Abrindo {api_url} no navegador...")
     print("3. Copie o texto do arquivo salvo e cole no campo 'Input Text'")
     print("4. Clique no botão 'Generate'")
         return False
 def main():
+    parser = argparse.ArgumentParser(description=f"Teste para {MODEL_NAME} no Hugging Face")
+    parser.add_argument("--api-url", type=str, default=DEFAULT_API_URL,
+                        help=f"URL da interface Gradio (padrão: {DEFAULT_API_URL})")
+    parser.add_argument("--audio-file", type=str, default="test.mp3",
                         help="Caminho para o arquivo de áudio a ser transcrito localmente (opcional)")
     parser.add_argument("--text", type=str, default=None,
                         help="Texto para usar diretamente (em vez de transcrever áudio)")
+    parser.add_argument("--output-dir", type=str, default=DEFAULT_OUTPUT_DIR,
+                        help="Diretório para salvar a transcrição e respostas")
+    parser.add_argument("--mode", type=str, choices=["api", "manual", "both"], default="both",
+                        help="Modo de teste: api (programático), manual (navegador) ou both (ambos)")
     args = parser.parse_args()
     # Converte caminhos relativos para absolutos
         script_dir = os.path.dirname(os.path.abspath(__file__))
         args.output_dir = os.path.join(script_dir, args.output_dir)
+    # Obtém texto de entrada da transcrição ou do parâmetro
+    input_text = args.text
+    if not input_text and args.audio_file:
+        if os.path.exists(args.audio_file):
+            input_text = transcribe_audio_locally(args.audio_file)
+        else:
+            print(f"Arquivo de áudio não encontrado: {args.audio_file}")
+            input_text = f"Olá, estou testando o modelo {MODEL_NAME}. Você pode me responder em português?"
+    if not input_text:
+        input_text = f"Olá, estou testando o modelo {MODEL_NAME}. Você pode me responder em português?"
+    print(f"Texto de entrada: {input_text}")
+    # Executa os testes conforme o modo selecionado
+    success = True
+    if args.mode in ["api", "both"]:
+        api_success, _ = test_api_programmatically(args.api_url, input_text, args.output_dir)
+        success = success and api_success
+    if args.mode in ["manual", "both"]:
+        manual_success = test_manual_interface(args.api_url, input_text, args.output_dir)
+        success = success and manual_success
     # Sai com código apropriado
     sys.exit(0 if success else 1)