Spaces:

prathameshv07
/

Multilingual-Audio-Intelligence-System

Running

App Files Files

xet

Community

Prathamesh Sarjerao Vaidya commited on Aug 17

Commit

94b1ab5

1 Parent(s): 896d872

optimize model-preloader and added github action for syncing it to drive

Browse files

Files changed (5) hide show

.github/workflows/check.yml +124 -2
.github/workflows/main.yml +129 -5
demo_results/film_podcast_results.json +381 -0
demo_results/yuri_kizaki_results.json +109 -0
model_preloader.py +183 -97

.github/workflows/check.yml CHANGED Viewed

@@ -1,5 +1,5 @@
-name: Check file size
-on:
   pull_request:
     branches: [main]
   workflow_dispatch:
@@ -15,3 +15,125 @@ jobs:
         uses: ActionsDesk/[email protected]
         with:
           filesizelimit: 1073741824  # 1GB

+name: Check file size and sync to Google Drive
+on
   pull_request:
     branches: [main]
   workflow_dispatch:
         uses: ActionsDesk/[email protected]
         with:
           filesizelimit: 1073741824  # 1GB
+  sync-to-drive-on-pr:
+    runs-on: ubuntu-latest
+    needs: check-file-size
+    if: github.event_name == 'pull_request'
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          lfs: true
+      # Pull LFS files
+      - name: Pull LFS files
+        run: |
+          git lfs install
+          git lfs pull
+      # Install pandoc for MD to PDF conversion
+      - name: Install pandoc
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y pandoc texlive-latex-base texlive-fonts-recommended texlive-latex-extra
+      # Convert MD files to PDF
+      - name: Convert MD to PDF
+        run: |
+          mkdir -p pdf_output
+          find . -name "*.md" -not -path "./.git/*" -not -path "./pdf_output/*" | while read file; do
+            relative_path="${file#./}"
+            pdf_path="pdf_output/${relative_path%.md}.pdf"
+            mkdir -p "$(dirname "$pdf_path")"
+            pandoc "$file" -o "$pdf_path" --pdf-engine=pdflatex
+            echo "Converted $file to $pdf_path"
+          done
+      # Set up Python for Google Drive upload
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.9'
+      # Install Python dependencies
+      - name: Install Python dependencies
+        run: |
+          pip install google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client
+      # Create and run upload script
+      - name: Upload to Google Drive (PR Preview)
+        env:
+          GOOGLE_CREDENTIALS: ${{ secrets.GOOGLE_CREDENTIALS }}
+        run: |
+          cat > upload_to_drive.py << 'EOF'
+          import os
+          import json
+          from google.oauth2 import service_account
+          from googleapiclient.discovery import build
+          from googleapiclient.http import MediaFileUpload
+          import mimetypes
+          credentials_json = os.environ['GOOGLE_CREDENTIALS']
+          credentials_info = json.loads(credentials_json)
+          credentials = service_account.Credentials.from_service_account_info(credentials_info)
+          service = build('drive', 'v3', credentials=credentials)
+          FOLDER_ID = '1-8HJcWxsUUQIj9OMXQeoeULS06RA9Hg9'
+          def get_mime_type(file_path):
+              mime_type, _ = mimetypes.guess_type(file_path)
+              return mime_type or 'application/octet-stream'
+          def upload_file(file_path, parent_folder_id, drive_service):
+              file_name = f"PR_PREVIEW_{os.path.basename(file_path)}"
+              query = f"name='{file_name}' and '{parent_folder_id}' in parents and trashed=false"
+              results = drive_service.files().list(q=query).execute()
+              items = results.get('files', [])
+              media = MediaFileUpload(file_path, mimetype=get_mime_type(file_path), resumable=True)
+              if items:
+                  file_id = items[0]['id']
+                  updated_file = drive_service.files().update(
+                      fileId=file_id,
+                      media_body=media
+                  ).execute()
+                  print(f'Updated PR Preview: {file_name}')
+              else:
+                  file_metadata = {
+                      'name': file_name,
+                      'parents': [parent_folder_id]
+                  }
+                  file = drive_service.files().create(
+                      body=file_metadata,
+                      media_body=media,
+                      fields='id'
+                  ).execute()
+                  print(f'Uploaded PR Preview: {file_name}')
+          def upload_directory(local_path, parent_folder_id, drive_service, exclude_dirs=None):
+              if exclude_dirs is None:
+                  exclude_dirs = ['.git', '.github', 'node_modules', '__pycache__']
+              for root, dirs, files in os.walk(local_path):
+                  dirs[:] = [d for d in dirs if d not in exclude_dirs]
+                  for file in files:
+                      file_path = os.path.join(root, file)
+                      try:
+                          upload_file(file_path, parent_folder_id, drive_service)
+                      except Exception as e:
+                          print(f'Error uploading {file_path}: {e}')
+          print("Starting PR preview upload to Google Drive...")
+          upload_directory('.', FOLDER_ID, service)
+          if os.path.exists('pdf_output'):
+              print("Uploading converted PDF files...")
+              upload_directory('pdf_output', FOLDER_ID, service)
+          print("PR preview upload completed!")
+          EOF
+          python upload_to_drive.py

.github/workflows/main.yml CHANGED Viewed

@@ -1,11 +1,11 @@
-name: Sync to Hugging Face hub
 on:
   push:
     branches: [main]
   workflow_dispatch:
 jobs:
-  sync-to-hub:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
@@ -14,16 +14,140 @@ jobs:
           lfs: true
       # Ensure Git LFS is installed and fetch binary files
-      # Try Pull LFS files
       - name: Pull LFS files
         run: |
           git lfs install
           git lfs pull
-      - name: Push to hub
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
           git config user.name "github-actions"
           git config user.email "[email protected]"
-          git push --force https://prathameshv07:[email protected]/spaces/prathameshv07/Multilingual-Audio-Intelligence-System main

+name: Sync to Hugging Face hub and Google Drive
 on:
   push:
     branches: [main]
   workflow_dispatch:
 jobs:
+  sync-to-hub-and-drive:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
           lfs: true
       # Ensure Git LFS is installed and fetch binary files
       - name: Pull LFS files
         run: |
           git lfs install
           git lfs pull
+      # Install pandoc for MD to PDF conversion
+      - name: Install pandoc
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y pandoc texlive-latex-base texlive-fonts-recommended texlive-latex-extra
+      # Convert MD files to PDF
+      - name: Convert MD to PDF
+        run: |
+          mkdir -p pdf_output
+          find . -name "*.md" -not -path "./.git/*" -not -path "./pdf_output/*" | while read file; do
+            # Get the relative path and change extension to .pdf
+            relative_path="${file#./}"
+            pdf_path="pdf_output/${relative_path%.md}.pdf"
+            # Create directory structure in pdf_output
+            mkdir -p "$(dirname "$pdf_path")"
+            # Convert MD to PDF
+            pandoc "$file" -o "$pdf_path" --pdf-engine=pdflatex
+            echo "Converted $file to $pdf_path"
+          done
+      # Set up Python for Google Drive upload
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.9'
+      # Install Python dependencies
+      - name: Install Python dependencies
+        run: |
+          pip install google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client
+      # Create Google Drive upload script
+      - name: Create upload script
+        run: |
+          cat > upload_to_drive.py << 'EOF'
+          import os
+          import json
+          from google.oauth2 import service_account
+          from googleapiclient.discovery import build
+          from googleapiclient.http import MediaFileUpload
+          import mimetypes
+          # Load credentials from environment
+          credentials_json = os.environ['GOOGLE_CREDENTIALS']
+          credentials_info = json.loads(credentials_json)
+          credentials = service_account.Credentials.from_service_account_info(credentials_info)
+          # Build the Drive service
+          service = build('drive', 'v3', credentials=credentials)
+          # Target folder ID
+          FOLDER_ID = '1-8HJcWxsUUQIj9OMXQeoeULS06RA9Hg9'
+          def get_mime_type(file_path):
+              mime_type, _ = mimetypes.guess_type(file_path)
+              return mime_type or 'application/octet-stream'
+          def upload_file(file_path, parent_folder_id, drive_service):
+              file_name = os.path.basename(file_path)
+              # Check if file already exists
+              query = f"name='{file_name}' and '{parent_folder_id}' in parents and trashed=false"
+              results = drive_service.files().list(q=query).execute()
+              items = results.get('files', [])
+              media = MediaFileUpload(file_path, mimetype=get_mime_type(file_path), resumable=True)
+              if items:
+                  # Update existing file
+                  file_id = items[0]['id']
+                  updated_file = drive_service.files().update(
+                      fileId=file_id,
+                      media_body=media
+                  ).execute()
+                  print(f'Updated: {file_name} (ID: {updated_file.get("id")})')
+              else:
+                  # Create new file
+                  file_metadata = {
+                      'name': file_name,
+                      'parents': [parent_folder_id]
+                  }
+                  file = drive_service.files().create(
+                      body=file_metadata,
+                      media_body=media,
+                      fields='id'
+                  ).execute()
+                  print(f'Uploaded: {file_name} (ID: {file.get("id")})')
+          def upload_directory(local_path, parent_folder_id, drive_service, exclude_dirs=None):
+              if exclude_dirs is None:
+                  exclude_dirs = ['.git', '.github', 'node_modules', '__pycache__']
+              for root, dirs, files in os.walk(local_path):
+                  # Remove excluded directories
+                  dirs[:] = [d for d in dirs if d not in exclude_dirs]
+                  for file in files:
+                      file_path = os.path.join(root, file)
+                      try:
+                          upload_file(file_path, parent_folder_id, drive_service)
+                      except Exception as e:
+                          print(f'Error uploading {file_path}: {e}')
+          # Upload all files to Google Drive
+          print("Starting upload to Google Drive...")
+          upload_directory('.', FOLDER_ID, service)
+          # Upload PDF files if they exist
+          if os.path.exists('pdf_output'):
+              print("Uploading converted PDF files...")
+              upload_directory('pdf_output', FOLDER_ID, service)
+          print("Upload completed!")
+          EOF
+      # Upload to Google Drive
+      - name: Upload to Google Drive
+        env:
+          GOOGLE_CREDENTIALS: ${{ secrets.GOOGLE_CREDENTIALS }}
+        run: python upload_to_drive.py
+      # Push to Hugging Face (original functionality)
+      - name: Push to Hugging Face hub
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
           git config user.name "github-actions"
           git config user.email "[email protected]"
+          git push --force https://prathameshv07:[email protected]/spaces/prathameshv07/Multilingual-Audio-Intelligence-System main

demo_results/film_podcast_results.json ADDED Viewed

	@@ -0,0 +1,381 @@

+{
+  "segments": [
+    {
+      "speaker": "SPEAKER_01",
+      "start_time": 6.308468750000001,
+      "end_time": 13.46346875,
+      "text": "Le film intitulé « The Social Network » traite de la création du site Facebook par Marc Zuckerberg.",
+      "translated_text": "The film entitled \"The Social Network\" deals with the creation of the Facebook site by Marc Zuckerberg.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_01",
+      "start_time": 13.98659375,
+      "end_time": 18.47534375,
+      "text": "et des problèmes judiciaires que cela a comporté pour le créateur de ceci.",
+      "translated_text": "and the judicial problems that this involved for the creator of this.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_01",
+      "start_time": 19.09971875,
+      "end_time": 21.49596875,
+      "text": "Ce film est très réaliste et très intéressant.",
+      "translated_text": "This film is very realistic and very interesting.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_04",
+      "start_time": 25.74846875,
+      "end_time": 30.405968750000003,
+      "text": "La semaine dernière, j'ai été au cinéma voir Paranormal Activity 2.",
+      "translated_text": "Last week, I went to the movies to see Paranormal Activity 2.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_04",
+      "start_time": 31.08096875,
+      "end_time": 33.35909375,
+      "text": "Ce film est un film d'horreur.",
+      "translated_text": "This movie is a horror movie.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_04",
+      "start_time": 34.28721875,
+      "end_time": 42.032843750000005,
+      "text": "Même s'il s'agit du deuxième film, il se déroule avant le premier et nous importe des informations sur celui-ci.",
+      "translated_text": "Even if it is the second film, it takes place before the first one and imports information about it.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_03",
+      "start_time": 46.43721875,
+      "end_time": 48.86721875,
+      "text": "Récemment, j'ai vu le film V-Battery",
+      "translated_text": "Recently, I saw the movie V-Battery",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_03",
+      "start_time": 49.15409375,
+      "end_time": 50.63909375,
+      "text": "qui raconte l'histoire des 4 Rébouins.",
+      "translated_text": "which tells the story of the 4 Rebouins.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_03",
+      "start_time": 51.212843750000005,
+      "end_time": 52.39409375,
+      "text": "Part pour les égages.",
+      "translated_text": "Get out of here for the lights.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_03",
+      "start_time": 53.05221875,
+      "end_time": 55.07721875,
+      "text": "pour intérer la vie de garçon de l'un d'autre.",
+      "translated_text": "to interest each other's boy's life.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_03",
+      "start_time": 56.12346875,
+      "end_time": 57.439718750000004,
+      "text": "qui va se marier prochainement.",
+      "translated_text": "who's about to get married.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_03",
+      "start_time": 58.65471875,
+      "end_time": 60.84846875,
+      "text": "Histoire se déroule donc à Las Vegas.",
+      "translated_text": "History takes place in Las Vegas.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_03",
+      "start_time": 60.899093750000006,
+      "end_time": 62.299718750000004,
+      "text": "Et après une folle nuit.",
+      "translated_text": "And after a crazy night.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_03",
+      "start_time": 62.721593750000004,
+      "end_time": 65.23596875,
+      "text": "Il se réveille au petit matin sans se souvenir de l'aveil.",
+      "translated_text": "He wakes up in the early morning without remembering the confession.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_03",
+      "start_time": 67.58159375000001,
+      "end_time": 70.43346875,
+      "text": "Le problème est qu'ils ont perdu leur...",
+      "translated_text": "The problem is, they lost their...",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_03",
+      "start_time": 70.68659375,
+      "end_time": 72.44159375000001,
+      "text": "Leur ami qui doit se marier prochainement.",
+      "translated_text": "Their friend who's due to get married soon.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_03",
+      "start_time": 73.25159375,
+      "end_time": 75.24284375,
+      "text": "Je vous laisse donc imaginer la suite.",
+      "translated_text": "So I'll let you imagine the next one.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_03",
+      "start_time": 76.08659375,
+      "end_time": 76.64346875000001,
+      "text": "qui est...",
+      "translated_text": "Which is...",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_03",
+      "start_time": 76.89659375000001,
+      "end_time": 78.41534375,
+      "text": "pour ma part très amusante.",
+      "translated_text": "For my part, very amusing.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_03",
+      "start_time": 79.39409375000001,
+      "end_time": 81.01409375,
+      "text": "Ce n'est pas le film que je préfère.",
+      "translated_text": "It's not the movie I like best.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_03",
+      "start_time": 81.45284375,
+      "end_time": 82.92096875,
+      "text": "Mais c'est un moment agréable.",
+      "translated_text": "But it's a nice time.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_00",
+      "start_time": 87.52784375,
+      "end_time": 94.93596875,
+      "text": "Dernièrement, j'ai vu un film qui s'appelle Paranormal Activity 2.",
+      "translated_text": "Recently, I saw a movie called Paranormal Activity 2.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_00",
+      "start_time": 95.25659375000001,
+      "end_time": 97.04534375,
+      "text": "Il s'agit d'un film d'horreur qui est accessible aux jeunes publics.",
+      "translated_text": "It is a horror film that is accessible to young audiences.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_00",
+      "start_time": 97.80471875,
+      "end_time": 106.81596875000001,
+      "text": "Gros ne trouve pas de scène choquante.",
+      "translated_text": "Fat doesn't find a shocking scene.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_00",
+      "start_time": 107.59221875,
+      "end_time": 115.96221875,
+      "text": "Il s'agit de l'histoire d'une famille américaine qui vit près de l'eau sans pacifique sur la côte ouest",
+      "translated_text": "This is the story of an American family living near the unpeaceful water on the west coast.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_00",
+      "start_time": 116.72159375000001,
+      "end_time": 122.84721875000001,
+      "text": "et qui se trouve en proie à des phénomènes paranormaux.",
+      "translated_text": "and is plagued by paranormal phenomena.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_07",
+      "start_time": 127.62284375,
+      "end_time": 128.95596875,
+      "text": "Peu à peu, tous les membres de cette famille vont disparaître mystérieusement, les uns après les autres, sans que l'on y trouve de véritables explications.",
+      "translated_text": "Gradually, all the members of this family will disappear mysteriously, one after another, without any real explanation.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_07",
+      "start_time": 129.14159375,
+      "end_time": 130.71096875,
+      "text": "Il s'agit d'un bon film, rythmé et agréable à suivre, devant lequel on ne s'ennuie pas une seule seconde.",
+      "translated_text": "It is a good film, rhythmic and pleasant to follow, before which one does not miss one second.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_07",
+      "start_time": 131.40284375000002,
+      "end_time": 135.79034375,
+      "text": "La semaine dernière",
+      "translated_text": "Last week",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_07",
+      "start_time": 136.39784375000002,
+      "end_time": 139.14846875,
+      "text": "J'ai vu le film La Ravre.",
+      "translated_text": "I saw the movie La Ravre.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_02",
+      "start_time": 143.48534375,
+      "end_time": 145.47659375,
+      "text": "il parle de la 2e guerre mondiale et de la vie des Juifs en France.",
+      "translated_text": "He talks about the Second World War and the life of the Jews in France.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_02",
+      "start_time": 145.71284375000002,
+      "end_time": 147.04596875000001,
+      "text": "Ce film est très intéressant et très réaliste.",
+      "translated_text": "This film is very interesting and very realistic.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_02",
+      "start_time": 147.21471875,
+      "end_time": 148.91909375,
+      "text": "Récemment, j'ai vu Inception.",
+      "translated_text": "Recently, I saw Inception.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_02",
+      "start_time": 149.20596875,
+      "end_time": 153.76221875000002,
+      "text": "Ce film est intéressant.",
+      "translated_text": "This movie is interesting.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_02",
+      "start_time": 154.52159375000002,
+      "end_time": 155.88846875000002,
+      "text": "pour ces graphismes et ce qu'on scénario.",
+      "translated_text": "for these graphics and what we're scenarioing.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_02",
+      "start_time": 156.58034375,
+      "end_time": 158.38596875000002,
+      "text": "L'idée de directrice est tendance d'entrer dans les rêves d'une personne pour y implanter une idée.",
+      "translated_text": "The idea of a director tends to enter a person's dreams to implant an idea.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_05",
+      "start_time": 162.73971875,
+      "end_time": 166.85721875000002,
+      "text": "Les acteurs sont très bons.",
+      "translated_text": "The actors are very good.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_05",
+      "start_time": 167.49846875,
+      "end_time": 177.04971875,
+      "text": "Et le film regarde les bannes hittées.",
+      "translated_text": "And the movie looks at the hit banners.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_06",
+      "start_time": 181.31909375,
+      "end_time": 184.94721875000002,
+      "text": "La semaine dernière, je suis allée au cinéma pour voir paranormal activité.",
+      "translated_text": "Last week, I went to the movies to see paranormal activity.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_06",
+      "start_time": 185.50409375,
+      "end_time": 191.19096875000002,
+      "text": "Ce film d'horreur apportait me le paranormal.",
+      "translated_text": "This horror movie brought me the paranormal.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_06",
+      "start_time": 191.88284375,
+      "end_time": 198.12659375,
+      "text": "En effet, une famille subit des phénomènes paranormaux.",
+      "translated_text": "Indeed, a family undergoes paranormal phenomena.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_08",
+      "start_time": 202.81784375,
+      "end_time": 205.07909375000003,
+      "text": "Tout au long du film, l'angoisse et le suspect, Reign.",
+      "translated_text": "Throughout the movie, anxiety and the suspect, Reign.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_08",
+      "start_time": 205.31534375,
+      "end_time": 207.27284375000002,
+      "text": "Le dernier film que j'ai vu au cinéma était Wall Street.",
+      "translated_text": "The last movie I saw in the movies was Wall Street.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_08",
+      "start_time": 208.23471875,
+      "end_time": 211.66034375,
+      "text": "J'ai trouvé ce film très intéressant car il parlait de l'univers financier pendant la crise.",
+      "translated_text": "I found this film very interesting because it was about the financial universe during the crisis.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_08",
+      "start_time": 212.53784375,
+      "end_time": 214.07346875000002,
+      "text": "Il y avait aussi une histoire de famille qui rajoutait de la romance dans ce film qui décrit un monde très masculin.",
+      "translated_text": "There was also a family story that added romance in this film that describes a very masculine world.",
+      "language": "fr"
+    },
+    {
+      "speaker": "SPEAKER_08",
+      "start_time": 214.27596875,
+      "end_time": 215.40659375,
+      "text": "La semaine dernière, je suis allée au cinéma.",
+      "translated_text": "Last week, I went to the movies.",
+      "language": "fr"
+    }
+  ],
+  "summary": {
+    "total_duration": 230.478,
+    "num_speakers": 9,
+    "num_segments": 46,
+    "languages": [
+      "fr"
+    ],
+    "processing_time": 401.4734380245209
+  }
+}

demo_results/yuri_kizaki_results.json ADDED Viewed

	@@ -0,0 +1,109 @@

+{
+  "segments": [
+    {
+      "speaker": "SPEAKER_00",
+      "start_time": 0.40221875,
+      "end_time": 4.77284375,
+      "text": "音声メッセージが既存のウェブサイトを超えたコミュニケーションを実現。",
+      "translated_text": "The audio message will bring out communication beyond the existing website.",
+      "language": "ja"
+    },
+    {
+      "speaker": "SPEAKER_00",
+      "start_time": 5.5153437499999995,
+      "end_time": 7.388468750000001,
+      "text": "目で見るだけだったウェブサイトに",
+      "translated_text": "I'm going to show you what I'm doing.",
+      "language": "ja"
+    },
+    {
+      "speaker": "SPEAKER_00",
+      "start_time": 7.624718750000001,
+      "end_time": 9.852218750000002,
+      "text": "音声情報をインクルードすることで",
+      "translated_text": "We're going to be able to do that in the next video.",
+      "language": "ja"
+    },
+    {
+      "speaker": "SPEAKER_00",
+      "start_time": 10.274093750000002,
+      "end_time": 12.31596875,
+      "text": "情報に新しい価値を与え",
+      "translated_text": "And that's what we're going to do.",
+      "language": "ja"
+    },
+    {
+      "speaker": "SPEAKER_00",
+      "start_time": 12.36659375,
+      "end_time": 14.72909375,
+      "text": "他者との差別化に効果を発揮します",
+      "translated_text": "It's not just about being different from other people.",
+      "language": "ja"
+    },
+    {
+      "speaker": "SPEAKER_00",
+      "start_time": 15.67409375,
+      "end_time": 16.06221875,
+      "text": "また!",
+      "translated_text": "Again!",
+      "language": "ja"
+    },
+    {
+      "speaker": "SPEAKER_00",
+      "start_time": 16.33221875,
+      "end_time": 21.58034375,
+      "text": "文字やグラフィックだけでは伝えることの難しかった感情やニュアンスを表現し",
+      "translated_text": "It's not just writing, it's graphic.",
+      "language": "ja"
+    },
+    {
+      "speaker": "SPEAKER_00",
+      "start_time": 22.06971875,
+      "end_time": 24.44909375,
+      "text": "ユーザーの興味と理解を深めます。",
+      "translated_text": "It will enhance the user's interest and understanding.",
+      "language": "ja"
+    },
+    {
+      "speaker": "SPEAKER_00",
+      "start_time": 25.47846875,
+      "end_time": 25.832843750000002,
+      "text": "見る",
+      "translated_text": "See.",
+      "language": "ja"
+    },
+    {
+      "speaker": "SPEAKER_00",
+      "start_time": 26.204093750000002,
+      "end_time": 26.65971875,
+      "text": "聞く",
+      "translated_text": "Listen.",
+      "language": "ja"
+    },
+    {
+      "speaker": "SPEAKER_00",
+      "start_time": 26.96346875,
+      "end_time": 28.617218750000003,
+      "text": "理解するウェブサイトへ",
+      "translated_text": "To a website that understands.",
+      "language": "ja"
+    },
+    {
+      "speaker": "SPEAKER_00",
+      "start_time": 29.24159375,
+      "end_time": 31.90784375,
+      "text": "音声メッセージが人の心を動かします",
+      "translated_text": "And that's what I'm talking about.",
+      "language": "ja"
+    }
+  ],
+  "summary": {
+    "total_duration": 32.366,
+    "num_speakers": 1,
+    "num_segments": 12,
+    "languages": [
+      "ja"
+    ],
+    "processing_time": 88.7896044254303
+  }
+}

model_preloader.py CHANGED Viewed

@@ -1,14 +1,12 @@
 #!/usr/bin/env python3
 """
-Model Preloader for Multilingual Audio Intelligence System
-This module handles downloading and initializing all AI models before the application starts.
-It provides progress tracking, caching, and error handling for model loading.
-Models loaded:
-- pyannote.audio for speaker diarization
-- faster-whisper for speech recognition
-- mBART50 for neural machine translation
 """
 import os
@@ -41,7 +39,7 @@ logger = logging.getLogger(__name__)
 console = Console()
 class ModelPreloader:
-    """Comprehensive model preloader with progress tracking and caching."""
     def __init__(self, cache_dir: str = "./model_cache", device: str = "auto"):
         self.cache_dir = Path(cache_dir)
@@ -96,6 +94,154 @@ class ModelPreloader:
             }
         }
     def get_system_info(self) -> Dict[str, Any]:
         """Get system information for optimal model loading."""
         return {
@@ -173,18 +319,28 @@ class ModelPreloader:
             return None
     def load_whisper_model(self, task_id: str) -> Optional[WhisperModel]:
-        """Load Whisper speech recognition model."""
         try:
             console.print(f"[yellow]Loading Whisper model (small)...[/yellow]")
             # Determine compute type based on device
             compute_type = "int8" if self.device == "cpu" else "float16"
             model = WhisperModel(
                 "small",
                 device=self.device,
                 compute_type=compute_type,
-                download_root=str(self.cache_dir / "whisper")
             )
             # Test the model with a dummy audio array
@@ -203,93 +359,23 @@ class ModelPreloader:
             return None
     def load_mbart_model(self, task_id: str) -> Optional[Dict[str, Any]]:
-        """Load mBART translation model."""
-        try:
-            console.print(f"[yellow]Loading mBART translation model...[/yellow]")
-            model_name = "facebook/mbart-large-50-many-to-many-mmt"
-            cache_path = self.cache_dir / "mbart"
-            cache_path.mkdir(exist_ok=True)
-            # Load tokenizer
-            tokenizer = AutoTokenizer.from_pretrained(
-                model_name,
-                cache_dir=str(cache_path)
-            )
-            # Load model
-            model = AutoModelForSeq2SeqLM.from_pretrained(
-                model_name,
-                cache_dir=str(cache_path),
-                torch_dtype=torch.float32 if self.device == "cpu" else torch.float16
-            )
-            if self.device != "cpu":
-                model = model.to(self.device)
-            # Test the model
-            test_input = tokenizer("Hello world", return_tensors="pt")
-            if self.device != "cpu":
-                test_input = {k: v.to(self.device) for k, v in test_input.items()}
-            with torch.no_grad():
-                output = model.generate(**test_input, max_length=10)
-            console.print(f"[green]✓ mBART model loaded successfully on {self.device}[/green]")
-            return {
-                "model": model,
-                "tokenizer": tokenizer
-            }
-        except Exception as e:
-            console.print(f"[red]✗ Failed to load mBART model: {e}[/red]")
-            logger.error(f"mBART loading failed: {e}")
-            return None
     def load_opus_mt_model(self, task_id: str, model_name: str) -> Optional[Dict[str, Any]]:
-        """Load Opus-MT translation model."""
-        try:
-            console.print(f"[yellow]Loading Opus-MT model: {model_name}...[/yellow]")
-            cache_path = self.cache_dir / "opus_mt" / model_name.replace("/", "--")
-            cache_path.mkdir(parents=True, exist_ok=True)
-            # Load tokenizer
-            tokenizer = AutoTokenizer.from_pretrained(
-                model_name,
-                cache_dir=str(cache_path)
-            )
-            # Load model
-            model = AutoModelForSeq2SeqLM.from_pretrained(
-                model_name,
-                cache_dir=str(cache_path),
-                torch_dtype=torch.float32 if self.device == "cpu" else torch.float16
-            )
-            if self.device != "cpu":
-                model = model.to(self.device)
-            # Test the model
-            test_input = tokenizer("Hello world", return_tensors="pt")
-            if self.device != "cpu":
-                test_input = {k: v.to(self.device) for k, v in test_input.items()}
-            with torch.no_grad():
-                output = model.generate(**test_input, max_length=10)
-            console.print(f"[green]✓ {model_name} loaded successfully on {self.device}[/green]")
-            return {
-                "model": model,
-                "tokenizer": tokenizer
-            }
-        except Exception as e:
-            console.print(f"[red]✗ Failed to load {model_name}: {e}[/red]")
-            logger.error(f"Opus-MT loading failed: {e}")
-            return None
     def preload_all_models(self) -> Dict[str, Any]:
         """Preload all models with progress tracking."""
@@ -465,4 +551,4 @@ def main():
 if __name__ == "__main__":
     success = main()
-    sys.exit(0 if success else 1)

 #!/usr/bin/env python3
 """
+Model Preloader for Multilingual Audio Intelligence System - Enhanced Version
+Key improvements:
+1. Smart local cache detection with corruption checking
+2. Fallback to download if local files don't exist or are corrupted
+3. Better error handling and retry mechanisms
+4. Consistent approach across all model types
 """
 import os
 console = Console()
 class ModelPreloader:
+    """Comprehensive model preloader with enhanced local cache detection."""
     def __init__(self, cache_dir: str = "./model_cache", device: str = "auto"):
         self.cache_dir = Path(cache_dir)
             }
         }
+    def check_local_model_files(self, model_name: str, model_type: str) -> bool:
+        """
+        Check if model files exist locally and are not corrupted.
+        Returns True if valid local files exist, False otherwise.
+        """
+        try:
+            if model_type == "whisper":
+                # For Whisper, check the Systran faster-whisper cache
+                whisper_cache = self.cache_dir / "whisper" / "models--Systran--faster-whisper-small"
+                required_files = ["config.json", "model.bin", "tokenizer.json", "vocabulary.txt"]
+                # Find the snapshot directory
+                snapshots_dir = whisper_cache / "snapshots"
+                if not snapshots_dir.exists():
+                    return False
+                # Check for any snapshot directory (there should be one)
+                snapshot_dirs = [d for d in snapshots_dir.iterdir() if d.is_dir()]
+                if not snapshot_dirs:
+                    return False
+                # Check if required files exist in the snapshot
+                snapshot_path = snapshot_dirs[0]  # Use the first (and likely only) snapshot
+                for file in required_files:
+                    file_path = snapshot_path / file
+                    if not file_path.exists() or file_path.stat().st_size == 0:
+                        return False
+                return True
+            elif model_type in ["mbart", "opus_mt"]:
+                # For Transformers models, check the HuggingFace cache structure
+                if model_type == "mbart":
+                    model_cache_path = self.cache_dir / "mbart" / f"models--{model_name.replace('/', '--')}"
+                else:
+                    model_cache_path = self.cache_dir / "opus_mt" / f"{model_name.replace('/', '--')}" / f"models--{model_name.replace('/', '--')}"
+                required_files = ["config.json", "tokenizer_config.json"]
+                # Also check for model files (either .bin or .safetensors)
+                model_files = ["pytorch_model.bin", "model.safetensors"]
+                # Find the snapshot directory
+                snapshots_dir = model_cache_path / "snapshots"
+                if not snapshots_dir.exists():
+                    return False
+                # Check for any snapshot directory
+                snapshot_dirs = [d for d in snapshots_dir.iterdir() if d.is_dir()]
+                if not snapshot_dirs:
+                    return False
+                # Check the latest snapshot
+                snapshot_path = max(snapshot_dirs, key=lambda x: x.stat().st_mtime)
+                # Check required config files
+                for file in required_files:
+                    file_path = snapshot_path / file
+                    if not file_path.exists() or file_path.stat().st_size == 0:
+                        return False
+                # Check for at least one model file
+                model_file_exists = any(
+                    (snapshot_path / model_file).exists() and (snapshot_path / model_file).stat().st_size > 0
+                    for model_file in model_files
+                )
+                return model_file_exists
+            elif model_type == "pyannote":
+                # For pyannote, it uses HuggingFace hub caching, harder to predict exact path
+                # We'll rely on the transformers library's cache detection
+                return False  # Let it attempt to load and handle caching automatically
+        except Exception as e:
+            logger.warning(f"Error checking local files for {model_name}: {e}")
+            return False
+        return False
+    def load_transformers_model_with_cache_check(self, model_name: str, cache_path: Path, model_type: str = "seq2seq") -> Optional[Dict[str, Any]]:
+        """
+        Load transformers model with intelligent cache checking and fallback.
+        """
+        try:
+            # First, check if we have valid local files
+            has_local_files = self.check_local_model_files(model_name, "mbart" if "mbart" in model_name else "opus_mt")
+            if has_local_files:
+                console.print(f"[green]Found valid local cache for {model_name}, loading from cache...[/green]")
+                try:
+                    # Try loading from local cache first
+                    tokenizer = AutoTokenizer.from_pretrained(
+                        model_name,
+                        cache_dir=str(cache_path),
+                        local_files_only=True
+                    )
+                    model = AutoModelForSeq2SeqLM.from_pretrained(
+                        model_name,
+                        cache_dir=str(cache_path),
+                        local_files_only=True,
+                        torch_dtype=torch.float32 if self.device == "cpu" else torch.float16
+                    )
+                    console.print(f"[green]✓ Successfully loaded {model_name} from local cache[/green]")
+                except Exception as e:
+                    console.print(f"[yellow]Local cache load failed for {model_name}, will download: {e}[/yellow]")
+                    has_local_files = False  # Force download
+            if not has_local_files:
+                console.print(f"[yellow]No valid local cache for {model_name}, downloading...[/yellow]")
+                # Load with download (default behavior)
+                tokenizer = AutoTokenizer.from_pretrained(
+                    model_name,
+                    cache_dir=str(cache_path)
+                )
+                model = AutoModelForSeq2SeqLM.from_pretrained(
+                    model_name,
+                    cache_dir=str(cache_path),
+                    torch_dtype=torch.float32 if self.device == "cpu" else torch.float16
+                )
+                console.print(f"[green]✓ Successfully downloaded and loaded {model_name}[/green]")
+            # Move to device if needed
+            if self.device != "cpu":
+                model = model.to(self.device)
+            # Test the model
+            test_input = tokenizer("Hello world", return_tensors="pt")
+            if self.device != "cpu":
+                test_input = {k: v.to(self.device) for k, v in test_input.items()}
+            with torch.no_grad():
+                output = model.generate(**test_input, max_length=10)
+            return {
+                "model": model,
+                "tokenizer": tokenizer
+            }
+        except Exception as e:
+            console.print(f"[red]✗ Failed to load {model_name}: {e}[/red]")
+            logger.error(f"Model loading failed for {model_name}: {e}")
+            return None
     def get_system_info(self) -> Dict[str, Any]:
         """Get system information for optimal model loading."""
         return {
             return None
     def load_whisper_model(self, task_id: str) -> Optional[WhisperModel]:
+        """Load Whisper speech recognition model with enhanced cache checking."""
         try:
             console.print(f"[yellow]Loading Whisper model (small)...[/yellow]")
             # Determine compute type based on device
             compute_type = "int8" if self.device == "cpu" else "float16"
+            whisper_cache_dir = self.cache_dir / "whisper"
+            # Check if we have valid local files
+            has_local_files = self.check_local_model_files("small", "whisper")
+            if has_local_files:
+                console.print(f"[green]Found valid local Whisper cache, loading from cache...[/green]")
+            else:
+                console.print(f"[yellow]No valid local Whisper cache found, will download...[/yellow]")
+            # faster-whisper handles caching automatically, but we specify our cache dir
             model = WhisperModel(
                 "small",
                 device=self.device,
                 compute_type=compute_type,
+                download_root=str(whisper_cache_dir)
             )
             # Test the model with a dummy audio array
             return None
     def load_mbart_model(self, task_id: str) -> Optional[Dict[str, Any]]:
+        """Load mBART translation model with enhanced cache checking."""
+        console.print(f"[yellow]Loading mBART translation model...[/yellow]")
+        model_name = "facebook/mbart-large-50-many-to-many-mmt"
+        cache_path = self.cache_dir / "mbart"
+        cache_path.mkdir(exist_ok=True)
+        return self.load_transformers_model_with_cache_check(model_name, cache_path, "seq2seq")
     def load_opus_mt_model(self, task_id: str, model_name: str) -> Optional[Dict[str, Any]]:
+        """Load Opus-MT translation model with enhanced cache checking."""
+        console.print(f"[yellow]Loading Opus-MT model: {model_name}...[/yellow]")
+        cache_path = self.cache_dir / "opus_mt" / model_name.replace("/", "--")
+        cache_path.mkdir(parents=True, exist_ok=True)
+        return self.load_transformers_model_with_cache_check(model_name, cache_path, "seq2seq")
     def preload_all_models(self) -> Dict[str, Any]:
         """Preload all models with progress tracking."""
 if __name__ == "__main__":
     success = main()
+    sys.exit(0 if success else 1)