Spaces:

marcosremar2
/

llama-omni

Build error

App Files Files Community

marcosremar2 commited on 27 days ago

Commit

7ac31cd

1 Parent(s): bef9f8f

dsd

Browse files

Files changed (1) hide show

app.py +93 -102

app.py CHANGED Viewed

@@ -1,59 +1,57 @@
 import os
 import sys
-import time
 import gradio as gr
 import whisper
 from huggingface_hub import snapshot_download
 import torch
 import subprocess
 # --- Aggressively update/install transformers and huggingface_hub BEFORE importing them ---
-print("Attempting to upgrade pip, transformers, and huggingface_hub...")
 try:
-    print("Upgrading pip...")
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "pip"])
-    print("Upgrading transformers and huggingface_hub...")
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "transformers", "huggingface_hub"])
-    print("Attempting to install transformers from main branch for latest features...")
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "git+https://github.com/huggingface/transformers.git"])
-    print("Pip, Transformers, and huggingface_hub update/install process completed.")
 except subprocess.CalledProcessError as e:
-    print(f"ERROR: Failed to upgrade/install packages: {e}")
-    print("Continuing with potentially older versions. This might lead to model loading issues.")
 except Exception as e:
-    print(f"An unexpected error occurred during package upgrades: {e}")
 # --- Now, import from transformers ---
 try:
     from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
-    print("Successfully imported AutoModelForCausalLM, AutoTokenizer, AutoConfig from transformers.")
 except ImportError as e:
-    print(f"CRITICAL ERROR: Failed to import from transformers after attempting upgrades: {e}")
-    print("The application might not work correctly. Please check the environment and dependencies.")
     # As a last resort, define dummy classes if import fails, so the rest of the script doesn't crash immediately
     class AutoModelForCausalLM: pass
     class AutoTokenizer: pass
     class AutoConfig: pass
 except Exception as e:
-    print(f"An unexpected error occurred during transformers import: {e}")
 # --- Configuration ---
-WHISPER_MODEL_SIZE = "small"  # Using small model for faster processing
-SPEECH_ENCODER_PATH = "models/speech_encoder"
-LLAMA_OMNI2_MODEL_NAME = "LLaMA-Omni2-0.5B"
-LLAMA_OMNI2_HF_REPO = f"ICTNLP/{LLAMA_OMNI2_MODEL_NAME}"
-LLAMA_OMNI2_MODEL_PATH = f"models/{LLAMA_OMNI2_MODEL_NAME}"
-COSYVOICE_HF_REPO = "ICTNLP/cosy2_decoder"
-COSYVOICE_PATH = "models/cosy2_decoder"
 # --- Print diagnostics ---
-print("===== Application Startup =====")
-print("Python:", sys.version)
-print("Torch version:", torch.__version__)
-print(f"CUDA available: {torch.cuda.is_available()}")
 if torch.cuda.is_available():
-    print(f"CUDA device: {torch.cuda.get_device_name(0)}")
-    print(f"CUDA memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
 # --- Main models ---
 whisper_model = None
@@ -61,115 +59,112 @@ llama_model = None
 tokenizer = None
 def load_whisper_model():
-    """Load Whisper model for speech recognition"""
     global whisper_model
-    print(f"Loading Whisper {WHISPER_MODEL_SIZE} model...")
     # Create directory if it doesn't exist
     os.makedirs(SPEECH_ENCODER_PATH, exist_ok=True)
     # Load the model (will download if not present)
     whisper_model = whisper.load_model(WHISPER_MODEL_SIZE, download_root=SPEECH_ENCODER_PATH)
-    print(f"Whisper {WHISPER_MODEL_SIZE} model loaded successfully!")
     return whisper_model
 def load_llama_model():
-    """Load LLaMA-Omni2 model"""
     global llama_model, tokenizer
-    print(f"Attempting to load LLaMA-Omni2 model: {LLAMA_OMNI2_HF_REPO}")
     # Ensure local model directory exists for downloads
-    os.makedirs(LLAMA_OMNI2_MODEL_PATH, exist_ok=True)
     # Download model files if they aren't already present locally
     # Check for a common file like config.json to decide if download is needed
-    if not os.path.exists(os.path.join(LLAMA_OMNI2_MODEL_PATH, "config.json")):
-        print(f"Local model files not found. Downloading from Hugging Face Hub: {LLAMA_OMNI2_HF_REPO} to {LLAMA_OMNI2_MODEL_PATH}")
         try:
             snapshot_download(
-                repo_id=LLAMA_OMNI2_HF_REPO,
-                local_dir=LLAMA_OMNI2_MODEL_PATH,
                 local_dir_use_symlinks=False,
                 resume_download=True,
-                # token=os.environ.get("HF_TOKEN") # Optional: use if your model is private
             )
-            print("Model download complete.")
         except Exception as e:
-            print(f"ERROR during model download: {e}")
-            # If download fails, we likely can't proceed with this model.
-            # The function will try to load from local path anyway, but it will likely fail.
-            pass # Allow to proceed to loading attempt, which will then fail more descriptively
     try:
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        torch_dtype = torch.float16 if device == "cuda" else torch.float32
-        print(f"Target device: {device}, dtype: {torch_dtype}")
-        print(f"Attempt 1: Loading tokenizer and model directly from Hub identifier: {LLAMA_OMNI2_HF_REPO} with trust_remote_code=True")
         try:
             tokenizer = AutoTokenizer.from_pretrained(
-                LLAMA_OMNI2_HF_REPO,
                 trust_remote_code=True
             )
-            print("Tokenizer loaded successfully from Hub identifier.")
             config = AutoConfig.from_pretrained(
-                LLAMA_OMNI2_HF_REPO,
                 trust_remote_code=True
             )
-            print("Config loaded successfully from Hub identifier.")
             llama_model = AutoModelForCausalLM.from_pretrained(
-                LLAMA_OMNI2_HF_REPO,
-                config=config, # Pass the loaded config
                 torch_dtype=torch_dtype,
-                device_map=device, # device_map handles moving parts of the model to CPU if OOM on GPU
                 trust_remote_code=True
             )
-            print(f"LLaMA-Omni2 model loaded successfully directly from Hub: {LLAMA_OMNI2_HF_REPO}")
             return llama_model
         except Exception as e1:
-            print(f"Error in Attempt 1 (direct Hub load for {LLAMA_OMNI2_HF_REPO}): {e1}")
-            print("This often means the model requires a specific transformers version or has complex remote code.")
-        print(f"Attempt 2: Loading tokenizer and model from local path: {LLAMA_OMNI2_MODEL_PATH} with trust_remote_code=True (fallback)")
         try:
             tokenizer = AutoTokenizer.from_pretrained(
-                LLAMA_OMNI2_MODEL_PATH, # Fallback to local path
                 trust_remote_code=True
             )
-            print("Tokenizer loaded successfully from local path.")
             config = AutoConfig.from_pretrained(
-                LLAMA_OMNI2_MODEL_PATH,
                 trust_remote_code=True
             )
-            print("Config loaded successfully from local path.")
             llama_model = AutoModelForCausalLM.from_pretrained(
-                LLAMA_OMNI2_MODEL_PATH, # Fallback to local path
                 config=config,
                 torch_dtype=torch_dtype,
                 device_map=device,
                 trust_remote_code=True
             )
-            print(f"LLaMA-Omni2 model loaded successfully from local path: {LLAMA_OMNI2_MODEL_PATH}")
             return llama_model
         except Exception as e2:
-            print(f"Error in Attempt 2 (local path load for {LLAMA_OMNI2_MODEL_PATH}): {e2}")
-        print("All attempts to load the LLaMA-Omni2 model failed.")
-        raise RuntimeError("Failed to load LLaMA-Omni2 model after multiple attempts.")
     except Exception as e_outer:
-        print(f"CRITICAL ERROR loading LLaMA-Omni2 model: {e_outer}")
-        print("Falling back: Text generation will not be available.")
-        llama_model = None # Ensure llama_model is None if loading fails
-        tokenizer = None # Ensure tokenizer is None
         return None
 def transcribe_audio(audio_path):
-    """Transcribe audio using Whisper"""
     global whisper_model
     if whisper_model is None:
@@ -177,12 +172,12 @@ def transcribe_audio(audio_path):
     try:
         result = whisper_model.transcribe(audio_path)
-        return result["text"]
     except Exception as e:
-        return f"Error transcribing audio: {e}"
 def generate_text(input_text):
-    """Generate text using LLaMA-Omni2"""
     global llama_model, tokenizer
     if llama_model is None or tokenizer is None:
@@ -191,10 +186,10 @@ def generate_text(input_text):
     try:
         # If model loading failed, just return a placeholder response
         if llama_model is None:
-            return f"Model could not be loaded. Input was: {input_text}"
         device = next(llama_model.parameters()).device
-        inputs = tokenizer(input_text, return_tensors="pt").to(device)
         outputs = llama_model.generate(
             inputs.input_ids,
@@ -206,10 +201,10 @@ def generate_text(input_text):
         return tokenizer.decode(outputs[0], skip_special_tokens=True)
     except Exception as e:
-        return f"Error generating text: {e}"
 def speech_to_text_to_speech(audio_path):
-    """Pipeline: Speech -> Text -> Response"""
     # First transcribe the audio
     transcription = transcribe_audio(audio_path)
@@ -218,42 +213,38 @@ def speech_to_text_to_speech(audio_path):
     return transcription, response
-# --- Gradio Interface ---
 def create_demo():
-    with gr.Blocks(title="LLaMA-Omni2 Interface") as demo:
-        gr.Markdown("# LLaMA-Omni2 Demo")
-        with gr.Tab("Text Generation"):
             with gr.Row():
-                text_input = gr.Textbox(label="Input Text", placeholder="Enter text here...")
-                text_output = gr.Textbox(label="Generated Response")
-            text_button = gr.Button("Generate Response")
             text_button.click(generate_text, inputs=text_input, outputs=text_output)
-        with gr.Tab("Speech-to-Text"):
-            audio_input = gr.Audio(type="filepath", label="Upload or Record Audio")
-            transcription_output = gr.Textbox(label="Transcription")
-            response_output = gr.Textbox(label="Generated Response")
-            transcribe_button = gr.Button("Transcribe and Respond")
             transcribe_button.click(speech_to_text_to_speech,
                                    inputs=audio_input,
                                    outputs=[transcription_output, response_output])
-        gr.Markdown("### Note: The first run will download models if needed, which may take some time.")
     return demo
 # --- Main entry point ---
-if __name__ == "__main__":
-    print("Starting LLaMA-Omni2 Interface...")
-    # Pre-load models (comment out if you want lazy loading)
-    # print("Pre-loading models...")
-    # load_whisper_model()
-    # load_llama_model()
     # Create and launch the Gradio interface
     demo = create_demo()
-    demo.launch(server_name="0.0.0.0", server_port=7860, share=False)

 import os
 import sys
 import gradio as gr
 import whisper
 from huggingface_hub import snapshot_download
 import torch
 import subprocess
+import transformers; transformers.utils.import_utils.check_dependency_versions()
 # --- Aggressively update/install transformers and huggingface_hub BEFORE importing them ---
+print('Attempting to upgrade pip, transformers, and huggingface_hub...')
 try:
+    print('Upgrading pip...')
+    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-U', 'pip'])
+    print('Upgrading transformers and huggingface_hub...')
+    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-U', 'transformers', 'huggingface_hub'])
+    print('Attempting to install transformers from main branch for latest features...')
+    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'git+https://github.com/huggingface/transformers.git'])
+    print('Pip, Transformers, and huggingface_hub update/install process completed.')
 except subprocess.CalledProcessError as e:
+    print(f'ERROR: Failed to upgrade/install packages: {e}')
+    print('Continuing with potentially older versions. This might lead to model loading issues.')
 except Exception as e:
+    print(f'An unexpected error occurred during package upgrades: {e}')
 # --- Now, import from transformers ---
 try:
     from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
+    print('Successfully imported AutoModelForCausalLM, AutoTokenizer, AutoConfig from transformers.')
 except ImportError as e:
+    print(f'CRITICAL ERROR: Failed to import from transformers after attempting upgrades: {e}')
+    print('The application might not work correctly. Please check the environment and dependencies.')
     # As a last resort, define dummy classes if import fails, so the rest of the script doesn't crash immediately
     class AutoModelForCausalLM: pass
     class AutoTokenizer: pass
     class AutoConfig: pass
 except Exception as e:
+    print(f'An unexpected error occurred during transformers import: {e}')
 # --- Configuration ---
+WHISPER_MODEL_SIZE = 'small'  # Using smallest model for faster processing in testing
+SPEECH_ENCODER_PATH = 'models/speech_encoder'
+MODEL_NAME = 'LLaMA-Omni2-0.5B'
+MODEL_PATH = f'models/{MODEL_NAME}'
+HF_REPO = f'ICTNLP/{MODEL_NAME}'
 # --- Print diagnostics ---
+print('===== Application Startup =====')
+print('Python:', sys.version)
+print('Torch version:', torch.__version__)
+print(f'CUDA available: {torch.cuda.is_available()}')
 if torch.cuda.is_available():
+    print(f'CUDA device: {torch.cuda.get_device_name(0)}')
+    print(f'CUDA memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB')
 # --- Main models ---
 whisper_model = None
 tokenizer = None
 def load_whisper_model():
+    '''Load Whisper model for speech recognition'''
     global whisper_model
+    print(f'Loading Whisper {WHISPER_MODEL_SIZE} model...')
     # Create directory if it doesn't exist
     os.makedirs(SPEECH_ENCODER_PATH, exist_ok=True)
     # Load the model (will download if not present)
     whisper_model = whisper.load_model(WHISPER_MODEL_SIZE, download_root=SPEECH_ENCODER_PATH)
+    print(f'Whisper {WHISPER_MODEL_SIZE} model loaded successfully!')
     return whisper_model
 def load_llama_model():
+    '''Load LLaMA-Omni2 model'''
     global llama_model, tokenizer
+    print(f'Attempting to load LLaMA-Omni2 model: {HF_REPO}')
     # Ensure local model directory exists for downloads
+    os.makedirs(MODEL_PATH, exist_ok=True)
     # Download model files if they aren't already present locally
     # Check for a common file like config.json to decide if download is needed
+    if not os.path.exists(os.path.join(MODEL_PATH, 'config.json')):
+        print(f'Local model files not found. Downloading from Hugging Face Hub: {HF_REPO} to {MODEL_PATH}')
         try:
             snapshot_download(
+                repo_id=HF_REPO,
+                local_dir=MODEL_PATH,
                 local_dir_use_symlinks=False,
                 resume_download=True,
             )
+            print('Model download complete.')
         except Exception as e:
+            print(f'ERROR during model download: {e}')
+            pass  # Allow to proceed to loading attempt, which will then fail more descriptively
     try:
+        device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        torch_dtype = torch.float16 if device == 'cuda' else torch.float32
+        print(f'Target device: {device}, dtype: {torch_dtype}')
+        print(f'Attempt 1: Loading tokenizer and model directly from Hub identifier: {HF_REPO} with trust_remote_code=True')
         try:
             tokenizer = AutoTokenizer.from_pretrained(
+                HF_REPO,
                 trust_remote_code=True
             )
+            print('Tokenizer loaded successfully from Hub identifier.')
             config = AutoConfig.from_pretrained(
+                HF_REPO,
                 trust_remote_code=True
             )
+            print('Config loaded successfully from Hub identifier.')
             llama_model = AutoModelForCausalLM.from_pretrained(
+                HF_REPO,
+                config=config,  # Pass the loaded config
                 torch_dtype=torch_dtype,
+                device_map=device,  # device_map handles moving parts of the model to CPU if OOM on GPU
                 trust_remote_code=True
             )
+            print(f'LLaMA-Omni2 model loaded successfully directly from Hub: {HF_REPO}')
             return llama_model
         except Exception as e1:
+            print(f'Error in Attempt 1 (direct Hub load for {HF_REPO}): {e1}')
+            print('This often means the model requires a specific transformers version or has complex remote code.')
+        print(f'Attempt 2: Loading tokenizer and model from local path: {MODEL_PATH} with trust_remote_code=True (fallback)')
         try:
             tokenizer = AutoTokenizer.from_pretrained(
+                MODEL_PATH,  # Fallback to local path
                 trust_remote_code=True
             )
+            print('Tokenizer loaded successfully from local path.')
             config = AutoConfig.from_pretrained(
+                MODEL_PATH,
                 trust_remote_code=True
             )
+            print('Config loaded successfully from local path.')
             llama_model = AutoModelForCausalLM.from_pretrained(
+                MODEL_PATH,  # Fallback to local path
                 config=config,
                 torch_dtype=torch_dtype,
                 device_map=device,
                 trust_remote_code=True
             )
+            print(f'LLaMA-Omni2 model loaded successfully from local path: {MODEL_PATH}')
             return llama_model
         except Exception as e2:
+            print(f'Error in Attempt 2 (local path load for {MODEL_PATH}): {e2}')
+        print('All attempts to load the LLaMA-Omni2 model failed.')
+        raise RuntimeError('Failed to load LLaMA-Omni2 model after multiple attempts.')
     except Exception as e_outer:
+        print(f'CRITICAL ERROR loading LLaMA-Omni2 model: {e_outer}')
+        print('Falling back: Text generation will not be available.')
+        llama_model = None  # Ensure llama_model is None if loading fails
+        tokenizer = None  # Ensure tokenizer is None
         return None
 def transcribe_audio(audio_path):
+    '''Transcribe audio using Whisper'''
     global whisper_model
     if whisper_model is None:
     try:
         result = whisper_model.transcribe(audio_path)
+        return result['text']
     except Exception as e:
+        return f'Error transcribing audio: {e}'
 def generate_text(input_text):
+    '''Generate text using LLaMA-Omni2'''
     global llama_model, tokenizer
     if llama_model is None or tokenizer is None:
     try:
         # If model loading failed, just return a placeholder response
         if llama_model is None:
+            return f'Model could not be loaded. Input was: {input_text}'
         device = next(llama_model.parameters()).device
+        inputs = tokenizer(input_text, return_tensors='pt').to(device)
         outputs = llama_model.generate(
             inputs.input_ids,
         return tokenizer.decode(outputs[0], skip_special_tokens=True)
     except Exception as e:
+        return f'Error generating text: {e}'
 def speech_to_text_to_speech(audio_path):
+    '''Pipeline: Speech -> Text -> Response'''
     # First transcribe the audio
     transcription = transcribe_audio(audio_path)
     return transcription, response
+# --- Gradio Interface for Hugging Face Spaces ---
 def create_demo():
+    with gr.Blocks(title='LLaMA-Omni2 Demo on Hugging Face Spaces') as demo:
+        gr.Markdown('# LLaMA-Omni2 Demo')
+        gr.Markdown('This demo uses the smallest Whisper model and LLaMA-Omni2-0.5B for testing purposes.')
+        with gr.Tab('Text Generation'):
             with gr.Row():
+                text_input = gr.Textbox(label='Input Text', placeholder='Enter text here...')
+                text_output = gr.Textbox(label='Generated Response')
+            text_button = gr.Button('Generate Response')
             text_button.click(generate_text, inputs=text_input, outputs=text_output)
+        with gr.Tab('Speech-to-Text'):
+            audio_input = gr.Audio(type='filepath', label='Upload or Record Audio')
+            transcription_output = gr.Textbox(label='Transcription')
+            response_output = gr.Textbox(label='Generated Response')
+            transcribe_button = gr.Button('Transcribe and Respond')
             transcribe_button.click(speech_to_text_to_speech,
                                    inputs=audio_input,
                                    outputs=[transcription_output, response_output])
+        gr.Markdown('### Note: The first run will download models if needed, which may take some time.')
     return demo
 # --- Main entry point ---
+if __name__ == '__main__':
+    print('Starting LLaMA-Omni2 Interface for Hugging Face Spaces...')
     # Create and launch the Gradio interface
     demo = create_demo()
+    demo.launch(server_name='0.0.0.0', server_port=7860, share=True)  # share=True for Hugging Face Spaces