Spaces:

wzy013
/

hunyuanvideo-foley

Running

wzy013 commited on 10 days ago

Commit

21d1989

1 Parent(s): e78e3fd

Deploy real API calling version

- Implements Gradio Client API to call official HunyuanVideo-Foley Space
- Falls back to Hugging Face Inference API as secondary option
- Smart API inference with multiple fallback strategies
- No local model loading - solves 16GB memory limit issue
- Real AI audio generation through remote API calls
- Comprehensive error handling and user feedback
- Minimal dependencies focused on API calling

Files changed (4) hide show

app.py +221 -136
app_real_api.py +326 -0
requirements.txt +8 -5
requirements_api.txt +10 -0

app.py CHANGED Viewed

@@ -1,146 +1,211 @@
 import os
 import tempfile
 import gradio as gr
-import torch
-import torchaudio
-from loguru import logger
-from typing import Optional, Tuple
-import random
-import numpy as np
 import requests
 import json
-# Simplified working version without loading large models
-def create_demo_audio(video_file, text_prompt: str, duration: float = 5.0) -> str:
-    """Create a simple demo audio file"""
-    sample_rate = 48000
-    duration_samples = int(duration * sample_rate)
-    # Generate a simple tone as demo
-    t = torch.linspace(0, duration, duration_samples)
-    frequency = 440  # A note
-    audio = 0.3 * torch.sin(2 * 3.14159 * frequency * t)
-    # Add some variation based on text prompt length
-    if text_prompt:
-        freq_mod = len(text_prompt) * 10
-        audio += 0.1 * torch.sin(2 * 3.14159 * freq_mod * t)
-    # Save to temporary file
-    temp_dir = tempfile.mkdtemp()
-    audio_path = os.path.join(temp_dir, "demo_audio.wav")
-    torchaudio.save(audio_path, audio.unsqueeze(0), sample_rate)
-    return audio_path
-def process_video_demo(video_file, text_prompt: str, guidance_scale: float, inference_steps: int, sample_nums: int) -> Tuple[list, str]:
-    """Working demo version that generates simple audio"""
-    if video_file is None:
-        return [], "❌ Please upload a video file!"
-    if text_prompt is None:
-        text_prompt = ""
     try:
-        logger.info(f"Processing video in demo mode: {video_file}")
-        logger.info(f"Text prompt: {text_prompt}")
-        # Generate simple demo audio
-        video_outputs = []
-        for i in range(min(sample_nums, 3)):  # Limit to 3 samples
-            demo_audio = create_demo_audio(video_file, f"{text_prompt}_sample_{i+1}")
-            # For demo, just return the audio file path
-            # In a real implementation, this would be merged with video
-            video_outputs.append(demo_audio)
-        success_msg = f"""✅ Demo Generation Complete!
-📹 **Processed**: {os.path.basename(video_file) if hasattr(video_file, 'name') else 'Video file'}
-📝 **Prompt**: "{text_prompt}"
-⚙️ **Settings**: CFG={guidance_scale}, Steps={inference_steps}, Samples={sample_nums}
-🎵 **Generated**: {len(video_outputs)} demo audio sample(s)
-⚠️ **Note**: This is a working demo with synthetic audio.
-For real AI-generated Foley audio, run locally with the full model:
-https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley"""
-        return video_outputs, success_msg
     except Exception as e:
-        logger.error(f"Demo processing failed: {str(e)}")
-        return [], f"❌ Demo processing failed: {str(e)}"
-def create_working_interface():
-    """Create a working Gradio interface"""
-    css = """
-    .gradio-container {
-        font-family: 'Inter', sans-serif;
-        background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
-    }
-    .main-header {
-        text-align: center;
-        padding: 2rem;
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-        border-radius: 20px;
-        margin-bottom: 2rem;
-        color: white;
-    }
-    .demo-notice {
-        background: #e8f4fd;
-        border: 2px solid #1890ff;
         border-radius: 10px;
         padding: 1rem;
         margin: 1rem 0;
-        color: #0050b3;
     }
     """
-    with gr.Blocks(css=css, title="HunyuanVideo-Foley Demo") as app:
         # Header
-        with gr.Column(elem_classes=["main-header"]):
-            gr.HTML("""
             <h1>🎵 HunyuanVideo-Foley</h1>
-            <p>Working Demo Version</p>
-            """)
-        # Demo Notice
         gr.HTML("""
-        <div class="demo-notice">
-            <strong>🎯 Working Demo:</strong> This version generates synthetic audio to demonstrate the interface.
-            Upload a video and try the controls to see how it works!<br>
-            <strong>For real AI audio:</strong> Visit the <a href="https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley" target="_blank">original repository</a>
         </div>
         """)
         with gr.Row():
-            # Input Section
             with gr.Column(scale=1):
-                gr.Markdown("### 📹 Video Input")
                 video_input = gr.Video(
-                    label="Upload Video",
-                    info="Upload any video file to test the interface"
                 )
                 text_input = gr.Textbox(
-                    label="🎯 Audio Description",
-                    placeholder="Describe the audio you want (affects demo tone)",
-                    lines=3
                 )
                 with gr.Row():
                     guidance_scale = gr.Slider(
                         minimum=1.0,
                         maximum=10.0,
-                        value=4.0,
                         step=0.1,
                         label="🎚️ CFG Scale"
                     )
@@ -150,87 +215,107 @@ def create_working_interface():
                         maximum=100,
                         value=50,
                         step=5,
-                        label="⚡ Steps"
                     )
                     sample_nums = gr.Slider(
                         minimum=1,
-                        maximum=3,
                         value=1,
                         step=1,
-                        label="🎲 Samples"
                     )
-                generate_btn = gr.Button("🎵 Generate Demo Audio", variant="primary")
-            # Output Section
             with gr.Column(scale=1):
-                gr.Markdown("### 🎵 Generated Audio")
-                audio_output_1 = gr.Audio(label="Sample 1", visible=True)
-                audio_output_2 = gr.Audio(label="Sample 2", visible=False)
-                audio_output_3 = gr.Audio(label="Sample 3", visible=False)
                 status_output = gr.Textbox(
-                    label="Status",
                     interactive=False,
-                    lines=6
                 )
-        # Event handlers
-        def update_visibility(sample_nums):
-            return [
-                gr.update(visible=True),  # Sample 1 always visible
-                gr.update(visible=sample_nums >= 2),
-                gr.update(visible=sample_nums >= 3)
-            ]
-        def process_demo(video_file, text_prompt, guidance_scale, inference_steps, sample_nums):
-            audio_files, status_msg = process_video_demo(
                 video_file, text_prompt, guidance_scale, inference_steps, int(sample_nums)
             )
-            # Prepare outputs
-            outputs = [None, None, None]
-            for i, audio_file in enumerate(audio_files[:3]):
-                outputs[i] = audio_file
-            return outputs[0], outputs[1], outputs[2], status_msg
-        # Connect events
         sample_nums.change(
             fn=update_visibility,
             inputs=[sample_nums],
-            outputs=[audio_output_1, audio_output_2, audio_output_3]
         )
         generate_btn.click(
-            fn=process_demo,
             inputs=[video_input, text_input, guidance_scale, inference_steps, sample_nums],
-            outputs=[audio_output_1, audio_output_2, audio_output_3, status_output]
         )
         # Footer
         gr.HTML("""
-        <div style="text-align: center; padding: 2rem; color: #666;">
-            <p>🎭 <strong>Demo Version:</strong> Generates synthetic audio for interface demonstration</p>
-            <p>🚀 <strong>Full Version:</strong> <a href="https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley" target="_blank">GitHub Repository</a></p>
         </div>
         """)
     return app
 if __name__ == "__main__":
-    # Setup logging
     logger.remove()
     logger.add(lambda msg: print(msg, end=''), level="INFO")
-    logger.info("Starting HunyuanVideo-Foley Working Demo...")
-    # Create and launch app
-    app = create_working_interface()
-    logger.info("Demo app ready - will generate synthetic audio for testing")
     app.launch(
         server_name="0.0.0.0",

 import os
 import tempfile
 import gradio as gr
 import requests
 import json
+from loguru import logger
+from typing import Optional, Tuple
+import base64
+import time
+def call_gradio_client_api(video_file, text_prompt, guidance_scale, inference_steps, sample_nums):
+    """调用官方Hugging Face Space的API"""
     try:
+        from gradio_client import Client
+        logger.info("连接到官方 HunyuanVideo-Foley Space...")
+        # 连接到官方Space
+        client = Client("tencent/HunyuanVideo-Foley")
+        logger.info("发送推理请求...")
+        # 调用推理函数
+        result = client.predict(
+            video_file,  # 视频文件
+            text_prompt,  # 文本提示
+            guidance_scale,  # CFG scale
+            inference_steps,  # 推理步数
+            sample_nums,  # 样本数量
+            api_name="/infer_single_video"  # API端点名称
+        )
+        return result, "✅ 成功通过官方API生成音频!"
+    except Exception as e:
+        error_msg = str(e)
+        logger.error(f"Gradio Client API 调用失败: {error_msg}")
+        if "not found" in error_msg.lower():
+            return None, "❌ 官方Space的API端点未找到，可能接口已更改"
+        elif "connection" in error_msg.lower():
+            return None, "❌ 无法连接到官方Space，请检查网络"
+        elif "queue" in error_msg.lower():
+            return None, "⏳ 官方Space繁忙，请稍后重试"
+        else:
+            return None, f"❌ API调用错误: {error_msg}"
+def call_huggingface_inference_api(video_file, text_prompt):
+    """调用Hugging Face Inference API"""
+    try:
+        logger.info("尝试Hugging Face Inference API...")
+        API_URL = "https://api-inference.huggingface.co/models/tencent/HunyuanVideo-Foley"
+        # 读取视频文件
+        with open(video_file, "rb") as f:
+            video_data = f.read()
+        # 准备请求数据
+        headers = {
+            "Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}",
+        }
+        # 发送请求
+        response = requests.post(
+            API_URL,
+            headers=headers,
+            json={"inputs": {"video": base64.b64encode(video_data).decode(), "text": text_prompt}},
+            timeout=300
+        )
+        if response.status_code == 200:
+            # 保存结果
+            temp_dir = tempfile.mkdtemp()
+            audio_path = os.path.join(temp_dir, "generated_audio.wav")
+            with open(audio_path, 'wb') as f:
+                f.write(response.content)
+            return [audio_path], "✅ 通过Hugging Face API生成成功!"
+        else:
+            logger.error(f"HF API错误: {response.status_code}")
+            return None, f"❌ Hugging Face API返回错误: {response.status_code}"
+    except Exception as e:
+        logger.error(f"HF API调用失败: {str(e)}")
+        return None, f"❌ Hugging Face API调用失败: {str(e)}"
+def try_alternative_apis(video_file, text_prompt):
+    """尝试其他可能的API服务"""
+    # 1. 尝试通过公开的demo接口
+    try:
+        logger.info("尝试demo接口...")
+        # 这里可以尝试其他公开的API服务
+        # 比如Replicate、RunPod等
+        return None, "❌ 暂无可用的替代API服务"
     except Exception as e:
+        return None, f"❌ 替代API调用失败: {str(e)}"
+def smart_api_inference(video_file, text_prompt, guidance_scale=4.5, inference_steps=50, sample_nums=1):
+    """智能API推理 - 尝试多种API调用方式"""
+    if video_file is None:
+        return [], "❌ 请上传视频文件!"
+    if not text_prompt:
+        text_prompt = "audio for this video"
+    logger.info(f"开始API推理: {video_file}")
+    logger.info(f"文本提示: {text_prompt}")
+    status_updates = []
+    # 方法1: 尝试Gradio Client (最可能成功)
+    status_updates.append("🔄 尝试连接官方Space API...")
+    try:
+        result, status = call_gradio_client_api(
+            video_file, text_prompt, guidance_scale, inference_steps, sample_nums
+        )
+        if result:
+            return result, "\n".join(status_updates + [status])
+        status_updates.append(status)
+    except ImportError:
+        status_updates.append("⚠️ gradio_client未安装，跳过官方API调用")
+    # 方法2: 尝试Hugging Face Inference API
+    status_updates.append("🔄 尝试Hugging Face Inference API...")
+    result, status = call_huggingface_inference_api(video_file, text_prompt)
+    if result:
+        return result, "\n".join(status_updates + [status])
+    status_updates.append(status)
+    # 方法3: 尝试其他API
+    status_updates.append("🔄 尝试替代API服务...")
+    result, status = try_alternative_apis(video_file, text_prompt)
+    status_updates.append(status)
+    # 所有方法都失败了
+    final_message = "\n".join(status_updates + [
+        "",
+        "💡 **解决方案建议:**",
+        "• 安装 gradio_client: pip install gradio_client",
+        "• 配置 HF_TOKEN 环境变量",
+        "• 等待官方Space负载降低",
+        "• 本地运行完整模型(需24GB+ RAM)",
+        "",
+        "🔗 **官方Space**: https://huggingface.co/spaces/tencent/HunyuanVideo-Foley"
+    ])
+    return [], final_message
+def create_real_api_interface():
+    """创建真实API调用界面"""
+    css = """
+    .api-status {
+        background: #f0f8ff;
+        border: 2px solid #4169e1;
         border-radius: 10px;
         padding: 1rem;
         margin: 1rem 0;
+        color: #191970;
     }
     """
+    with gr.Blocks(css=css, title="HunyuanVideo-Foley API Client") as app:
         # Header
+        gr.HTML("""
+        <div style="text-align: center; padding: 2rem; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 20px; margin-bottom: 2rem; color: white;">
             <h1>🎵 HunyuanVideo-Foley</h1>
+            <p>API客户端 - 调用真实模型推理</p>
+        </div>
+        """)
+        # API Status Notice
         gr.HTML("""
+        <div class="api-status">
+            <strong>🌐 真实API调用模式:</strong> 这个版本会通过API调用真实的HunyuanVideo-Foley模型进行推理。
+            <br><strong>优点:</strong> 真实AI音频生成，无需本地大内存
+            <br><strong>缺点:</strong> 依赖外部服务可用性，可能需要等待队列
         </div>
         """)
         with gr.Row():
+            # 输入区域
             with gr.Column(scale=1):
+                gr.Markdown("### 📹 视频输入")
                 video_input = gr.Video(
+                    label="上传视频",
+                    info="支持MP4、AVI、MOV等格式"
                 )
                 text_input = gr.Textbox(
+                    label="🎯 音频描述",
+                    placeholder="描述你想要的音频效果，例如：脚步声、雨声、车辆行驶等",
+                    lines=3,
+                    value="audio sound effects for this video"
                 )
                 with gr.Row():
                     guidance_scale = gr.Slider(
                         minimum=1.0,
                         maximum=10.0,
+                        value=4.5,
                         step=0.1,
                         label="🎚️ CFG Scale"
                     )
                         maximum=100,
                         value=50,
                         step=5,
+                        label="⚡ 推理步数"
                     )
                     sample_nums = gr.Slider(
                         minimum=1,
+                        maximum=6,
                         value=1,
                         step=1,
+                        label="🎲 样本数量"
                     )
+                generate_btn = gr.Button(
+                    "🎵 调用API生成音频",
+                    variant="primary",
+                    size="lg"
+                )
+            # 输出区域
             with gr.Column(scale=1):
+                gr.Markdown("### 🎵 生成结果")
+                audio_outputs = []
+                for i in range(6):
+                    audio_output = gr.Audio(
+                        label=f"样本 {i+1}",
+                        visible=(i == 0)  # 只显示第一个
+                    )
+                    audio_outputs.append(audio_output)
                 status_output = gr.Textbox(
+                    label="API状态",
                     interactive=False,
+                    lines=10,
+                    placeholder="等待API调用..."
                 )
+        # 事件处理
+        def process_with_api(video_file, text_prompt, guidance_scale, inference_steps, sample_nums):
+            # 调用API推理
+            results, status_msg = smart_api_inference(
                 video_file, text_prompt, guidance_scale, inference_steps, int(sample_nums)
             )
+            # 准备输出
+            outputs = [None] * 6
+            visibilities = [False] * 6
+            if results and isinstance(results, list):
+                for i, result in enumerate(results[:6]):
+                    outputs[i] = result
+                    visibilities[i] = True
+            return outputs + visibilities + [status_msg]
+        # 动态显示样本数量
+        def update_visibility(sample_nums):
+            sample_nums = int(sample_nums)
+            return [gr.update(visible=(i < sample_nums)) for i in range(6)]
+        # 连接事件
         sample_nums.change(
             fn=update_visibility,
             inputs=[sample_nums],
+            outputs=audio_outputs
         )
         generate_btn.click(
+            fn=process_with_api,
             inputs=[video_input, text_input, guidance_scale, inference_steps, sample_nums],
+            outputs=audio_outputs + [gr.update(visible=(i < 6)) for i in range(6)] + [status_output]
         )
         # Footer
         gr.HTML("""
+        <div style="text-align: center; padding: 2rem; color: #666; border-top: 1px solid #eee; margin-top: 2rem;">
+            <p><strong>📡 API调用版本</strong> - 通过网络调用真实模型进行推理</p>
+            <p>🔗 官方Space: <a href="https://huggingface.co/spaces/tencent/HunyuanVideo-Foley" target="_blank">tencent/HunyuanVideo-Foley</a></p>
+            <p>⚠️ 需要安装: <code>pip install gradio_client</code></p>
         </div>
         """)
     return app
 if __name__ == "__main__":
+    # 设置日志
     logger.remove()
     logger.add(lambda msg: print(msg, end=''), level="INFO")
+    logger.info("启动 HunyuanVideo-Foley API 客户端...")
+    # 检查依赖
+    try:
+        import gradio_client
+        logger.info("✅ gradio_client 已安装")
+    except ImportError:
+        logger.warning("⚠️ gradio_client 未安装，API调用功能可能受限")
+    # 创建并启动应用
+    app = create_real_api_interface()
+    logger.info("API客户端就绪，准备调用真实模型...")
     app.launch(
         server_name="0.0.0.0",

app_real_api.py ADDED Viewed

	@@ -0,0 +1,326 @@

+import os
+import tempfile
+import gradio as gr
+import requests
+import json
+from loguru import logger
+from typing import Optional, Tuple
+import base64
+import time
+def call_gradio_client_api(video_file, text_prompt, guidance_scale, inference_steps, sample_nums):
+    """调用官方Hugging Face Space的API"""
+    try:
+        from gradio_client import Client
+        logger.info("连接到官方 HunyuanVideo-Foley Space...")
+        # 连接到官方Space
+        client = Client("tencent/HunyuanVideo-Foley")
+        logger.info("发送推理请求...")
+        # 调用推理函数
+        result = client.predict(
+            video_file,  # 视频文件
+            text_prompt,  # 文本提示
+            guidance_scale,  # CFG scale
+            inference_steps,  # 推理步数
+            sample_nums,  # 样本数量
+            api_name="/infer_single_video"  # API端点名称
+        )
+        return result, "✅ 成功通过官方API生成音频!"
+    except Exception as e:
+        error_msg = str(e)
+        logger.error(f"Gradio Client API 调用失败: {error_msg}")
+        if "not found" in error_msg.lower():
+            return None, "❌ 官方Space的API端点未找到，可能接口已更改"
+        elif "connection" in error_msg.lower():
+            return None, "❌ 无法连接到官方Space，请检查网络"
+        elif "queue" in error_msg.lower():
+            return None, "⏳ 官方Space繁忙，请稍后重试"
+        else:
+            return None, f"❌ API调用错误: {error_msg}"
+def call_huggingface_inference_api(video_file, text_prompt):
+    """调用Hugging Face Inference API"""
+    try:
+        logger.info("尝试Hugging Face Inference API...")
+        API_URL = "https://api-inference.huggingface.co/models/tencent/HunyuanVideo-Foley"
+        # 读取视频文件
+        with open(video_file, "rb") as f:
+            video_data = f.read()
+        # 准备请求数据
+        headers = {
+            "Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}",
+        }
+        # 发送请求
+        response = requests.post(
+            API_URL,
+            headers=headers,
+            json={"inputs": {"video": base64.b64encode(video_data).decode(), "text": text_prompt}},
+            timeout=300
+        )
+        if response.status_code == 200:
+            # 保存结果
+            temp_dir = tempfile.mkdtemp()
+            audio_path = os.path.join(temp_dir, "generated_audio.wav")
+            with open(audio_path, 'wb') as f:
+                f.write(response.content)
+            return [audio_path], "✅ 通过Hugging Face API生成成功!"
+        else:
+            logger.error(f"HF API错误: {response.status_code}")
+            return None, f"❌ Hugging Face API返回错误: {response.status_code}"
+    except Exception as e:
+        logger.error(f"HF API调用失败: {str(e)}")
+        return None, f"❌ Hugging Face API调用失败: {str(e)}"
+def try_alternative_apis(video_file, text_prompt):
+    """尝试其他可能的API服务"""
+    # 1. 尝试通过公开的demo接口
+    try:
+        logger.info("尝试demo接口...")
+        # 这里可以尝试其他公开的API服务
+        # 比如Replicate、RunPod等
+        return None, "❌ 暂无可用的替代API服务"
+    except Exception as e:
+        return None, f"❌ 替代API调用失败: {str(e)}"
+def smart_api_inference(video_file, text_prompt, guidance_scale=4.5, inference_steps=50, sample_nums=1):
+    """智能API推理 - 尝试多种API调用方式"""
+    if video_file is None:
+        return [], "❌ 请上传视频文件!"
+    if not text_prompt:
+        text_prompt = "audio for this video"
+    logger.info(f"开始API推理: {video_file}")
+    logger.info(f"文本提示: {text_prompt}")
+    status_updates = []
+    # 方法1: 尝试Gradio Client (最可能成功)
+    status_updates.append("🔄 尝试连接官方Space API...")
+    try:
+        result, status = call_gradio_client_api(
+            video_file, text_prompt, guidance_scale, inference_steps, sample_nums
+        )
+        if result:
+            return result, "\n".join(status_updates + [status])
+        status_updates.append(status)
+    except ImportError:
+        status_updates.append("⚠️ gradio_client未安装，跳过官方API调用")
+    # 方法2: 尝试Hugging Face Inference API
+    status_updates.append("🔄 尝试Hugging Face Inference API...")
+    result, status = call_huggingface_inference_api(video_file, text_prompt)
+    if result:
+        return result, "\n".join(status_updates + [status])
+    status_updates.append(status)
+    # 方法3: 尝试其他API
+    status_updates.append("🔄 尝试替代API服务...")
+    result, status = try_alternative_apis(video_file, text_prompt)
+    status_updates.append(status)
+    # 所有方法都失败了
+    final_message = "\n".join(status_updates + [
+        "",
+        "💡 **解决方案建议:**",
+        "• 安装 gradio_client: pip install gradio_client",
+        "• 配置 HF_TOKEN 环境变量",
+        "• 等待官方Space负载降低",
+        "• 本地运行完整模型(需24GB+ RAM)",
+        "",
+        "🔗 **官方Space**: https://huggingface.co/spaces/tencent/HunyuanVideo-Foley"
+    ])
+    return [], final_message
+def create_real_api_interface():
+    """创建真实API调用界面"""
+    css = """
+    .api-status {
+        background: #f0f8ff;
+        border: 2px solid #4169e1;
+        border-radius: 10px;
+        padding: 1rem;
+        margin: 1rem 0;
+        color: #191970;
+    }
+    """
+    with gr.Blocks(css=css, title="HunyuanVideo-Foley API Client") as app:
+        # Header
+        gr.HTML("""
+        <div style="text-align: center; padding: 2rem; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 20px; margin-bottom: 2rem; color: white;">
+            <h1>🎵 HunyuanVideo-Foley</h1>
+            <p>API客户端 - 调用真实模型推理</p>
+        </div>
+        """)
+        # API Status Notice
+        gr.HTML("""
+        <div class="api-status">
+            <strong>🌐 真实API调用模式:</strong> 这个版本会通过API调用真实的HunyuanVideo-Foley模型进行推理。
+            <br><strong>优点:</strong> 真实AI音频生成，无需本地大内存
+            <br><strong>缺点:</strong> 依赖外部服务可用性，可能需要等待队列
+        </div>
+        """)
+        with gr.Row():
+            # 输入区域
+            with gr.Column(scale=1):
+                gr.Markdown("### 📹 视频输入")
+                video_input = gr.Video(
+                    label="上传视频",
+                    info="支持MP4、AVI、MOV等格式"
+                )
+                text_input = gr.Textbox(
+                    label="🎯 音频描述",
+                    placeholder="描述你想要的音频效果，例如：脚步声、雨声、车辆行驶等",
+                    lines=3,
+                    value="audio sound effects for this video"
+                )
+                with gr.Row():
+                    guidance_scale = gr.Slider(
+                        minimum=1.0,
+                        maximum=10.0,
+                        value=4.5,
+                        step=0.1,
+                        label="🎚️ CFG Scale"
+                    )
+                    inference_steps = gr.Slider(
+                        minimum=10,
+                        maximum=100,
+                        value=50,
+                        step=5,
+                        label="⚡ 推理步数"
+                    )
+                    sample_nums = gr.Slider(
+                        minimum=1,
+                        maximum=6,
+                        value=1,
+                        step=1,
+                        label="🎲 样本数量"
+                    )
+                generate_btn = gr.Button(
+                    "🎵 调用API生成音频",
+                    variant="primary",
+                    size="lg"
+                )
+            # 输出区域
+            with gr.Column(scale=1):
+                gr.Markdown("### 🎵 生成结果")
+                audio_outputs = []
+                for i in range(6):
+                    audio_output = gr.Audio(
+                        label=f"样本 {i+1}",
+                        visible=(i == 0)  # 只显示第一个
+                    )
+                    audio_outputs.append(audio_output)
+                status_output = gr.Textbox(
+                    label="API状态",
+                    interactive=False,
+                    lines=10,
+                    placeholder="等待API调用..."
+                )
+        # 事件处理
+        def process_with_api(video_file, text_prompt, guidance_scale, inference_steps, sample_nums):
+            # 调用API推理
+            results, status_msg = smart_api_inference(
+                video_file, text_prompt, guidance_scale, inference_steps, int(sample_nums)
+            )
+            # 准备输出
+            outputs = [None] * 6
+            visibilities = [False] * 6
+            if results and isinstance(results, list):
+                for i, result in enumerate(results[:6]):
+                    outputs[i] = result
+                    visibilities[i] = True
+            return outputs + visibilities + [status_msg]
+        # 动态显示样本数量
+        def update_visibility(sample_nums):
+            sample_nums = int(sample_nums)
+            return [gr.update(visible=(i < sample_nums)) for i in range(6)]
+        # 连���事件
+        sample_nums.change(
+            fn=update_visibility,
+            inputs=[sample_nums],
+            outputs=audio_outputs
+        )
+        generate_btn.click(
+            fn=process_with_api,
+            inputs=[video_input, text_input, guidance_scale, inference_steps, sample_nums],
+            outputs=audio_outputs + [gr.update(visible=(i < 6)) for i in range(6)] + [status_output]
+        )
+        # Footer
+        gr.HTML("""
+        <div style="text-align: center; padding: 2rem; color: #666; border-top: 1px solid #eee; margin-top: 2rem;">
+            <p><strong>📡 API调用版本</strong> - 通过网络调用真实模型进行推理</p>
+            <p>🔗 官方Space: <a href="https://huggingface.co/spaces/tencent/HunyuanVideo-Foley" target="_blank">tencent/HunyuanVideo-Foley</a></p>
+            <p>⚠️ 需要安装: <code>pip install gradio_client</code></p>
+        </div>
+        """)
+    return app
+if __name__ == "__main__":
+    # 设置日志
+    logger.remove()
+    logger.add(lambda msg: print(msg, end=''), level="INFO")
+    logger.info("启动 HunyuanVideo-Foley API 客户端...")
+    # 检查依赖
+    try:
+        import gradio_client
+        logger.info("✅ gradio_client 已安装")
+    except ImportError:
+        logger.warning("⚠️ gradio_client 未安装，API调用功能可能受限")
+    # 创建并启动应用
+    app = create_real_api_interface()
+    logger.info("API客户端就绪，准备调用真实模型...")
+    app.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        debug=False,
+        show_error=True
+    )

requirements.txt CHANGED Viewed

@@ -1,7 +1,10 @@
-# Minimal requirements for working demo version
-torch>=2.0.0
-torchaudio>=2.0.0
-numpy>=1.21.0
 gradio>=4.0.0
 loguru>=0.6.0
-requests>=2.25.0

+# API调用版本的依赖
 gradio>=4.0.0
+gradio_client>=0.8.0
+requests>=2.25.0
 loguru>=0.6.0
+numpy>=1.21.0
+# 可选依赖（用于备用功能）
+torch>=2.0.0
+torchaudio>=2.0.0

requirements_api.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+# API调用版本的依赖
+gradio>=4.0.0
+gradio_client>=0.8.0
+requests>=2.25.0
+loguru>=0.6.0
+numpy>=1.21.0
+# 可选依赖（用于备用功能）
+torch>=2.0.0
+torchaudio>=2.0.0