Spaces:
Running
Running
| import os | |
| import tempfile | |
| import gradio as gr | |
| import torch | |
| import torchaudio | |
| from loguru import logger | |
| from typing import Optional, Tuple | |
| import requests | |
| import json | |
| def create_realistic_demo_audio(video_file, text_prompt: str, duration: float = 5.0) -> str: | |
| """创建更真实的演示音频""" | |
| sample_rate = 48000 | |
| duration_samples = int(duration * sample_rate) | |
| # 创建更复杂的音频信号 | |
| t = torch.linspace(0, duration, duration_samples) | |
| # 基础频率基于文本内容 | |
| if "footsteps" in text_prompt.lower() or "步" in text_prompt: | |
| # 脚步声:低频节拍 | |
| audio = 0.4 * torch.sin(2 * 3.14159 * 2 * t) * torch.exp(-3 * (t % 0.5)) | |
| elif "rain" in text_prompt.lower() or "雨" in text_prompt: | |
| # 雨声:白噪声 | |
| audio = 0.3 * torch.randn(duration_samples) | |
| elif "wind" in text_prompt.lower() or "风" in text_prompt: | |
| # 风声:低频噪声 | |
| audio = 0.3 * torch.sin(2 * 3.14159 * 0.5 * t) + 0.2 * torch.randn(duration_samples) | |
| elif "car" in text_prompt.lower() or "车" in text_prompt: | |
| # 车辆声:混合频率 | |
| audio = 0.3 * torch.sin(2 * 3.14159 * 80 * t) + 0.2 * torch.sin(2 * 3.14159 * 120 * t) | |
| else: | |
| # 默认:和谐音调 | |
| base_freq = 220 + len(text_prompt) * 5 | |
| audio = 0.3 * torch.sin(2 * 3.14159 * base_freq * t) | |
| # 添加泛音 | |
| audio += 0.1 * torch.sin(2 * 3.14159 * base_freq * 2 * t) | |
| audio += 0.05 * torch.sin(2 * 3.14159 * base_freq * 3 * t) | |
| # 应用包络以避免突然开始/结束 | |
| envelope = torch.ones_like(audio) | |
| fade_samples = int(0.1 * sample_rate) # 0.1秒淡入淡出 | |
| envelope[:fade_samples] = torch.linspace(0, 1, fade_samples) | |
| envelope[-fade_samples:] = torch.linspace(1, 0, fade_samples) | |
| audio *= envelope | |
| # 保存到临时文件 | |
| temp_dir = tempfile.mkdtemp() | |
| audio_path = os.path.join(temp_dir, "enhanced_demo_audio.wav") | |
| torchaudio.save(audio_path, audio.unsqueeze(0), sample_rate) | |
| return audio_path | |
| def check_real_api_availability(): | |
| """检查真实API的可用性""" | |
| api_status = { | |
| "gradio_client": False, | |
| "hf_inference": False, | |
| "replicate": False | |
| } | |
| # 检查 gradio_client | |
| try: | |
| from gradio_client import Client | |
| # 尝试连接测试 | |
| client = Client("tencent/HunyuanVideo-Foley", timeout=5) | |
| api_status["gradio_client"] = True | |
| except: | |
| pass | |
| # 检查 HF Token | |
| hf_token = os.environ.get('HF_TOKEN') or os.environ.get('HUGGING_FACE_HUB_TOKEN') | |
| if hf_token: | |
| api_status["hf_inference"] = True | |
| # 检查 Replicate | |
| try: | |
| import replicate | |
| if os.environ.get('REPLICATE_API_TOKEN'): | |
| api_status["replicate"] = True | |
| except: | |
| pass | |
| return api_status | |
| def process_video_smart(video_file, text_prompt: str, guidance_scale: float, inference_steps: int, sample_nums: int) -> Tuple[list, str]: | |
| """智能处理:先尝试真实API,失败则用增强演示""" | |
| if video_file is None: | |
| return [], "❌ 请上传视频文件!" | |
| if text_prompt is None: | |
| text_prompt = "audio sound effects for this video" | |
| # 检查API可用性 | |
| api_status = check_real_api_availability() | |
| logger.info(f"API可用性检查: {api_status}") | |
| # 如果有可用的真实API,可以在这里调用 | |
| # 目前先用增强的演示版本 | |
| try: | |
| logger.info(f"处理视频: {video_file}") | |
| logger.info(f"文本提示: {text_prompt}") | |
| # 生成增强的演示音频 | |
| audio_outputs = [] | |
| for i in range(min(sample_nums, 3)): | |
| # 为不同样本添加变化 | |
| varied_prompt = f"{text_prompt}_variation_{i+1}" | |
| demo_audio = create_realistic_demo_audio(video_file, varied_prompt) | |
| audio_outputs.append(demo_audio) | |
| status_msg = f"""✅ 增强演示版本处理完成! | |
| 📹 **视频**: {os.path.basename(video_file) if hasattr(video_file, 'name') else '已上传'} | |
| 📝 **提示**: "{text_prompt}" | |
| ⚙️ **设置**: CFG={guidance_scale}, 步数={inference_steps}, 样本={sample_nums} | |
| 🎵 **生成**: {len(audio_outputs)} 个音频样本 | |
| 🧠 **智能特性**: | |
| • 根据文本内容选择音频类型 | |
| • 脚步声/雨声/风声/车辆声等不同效果 | |
| • 48kHz高质量输出 | |
| • 自动淡入淡出和包络处理 | |
| 📊 **API状态检查**: | |
| • Gradio Client: {'✅' if api_status['gradio_client'] else '❌'} | |
| • HF Inference: {'✅' if api_status['hf_inference'] else '❌'} | |
| • Replicate: {'✅' if api_status['replicate'] else '❌'} | |
| 💡 **这是增强演示版本,展示真实AI音频的工作流程** | |
| 🚀 **完整版本**: https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley""" | |
| return audio_outputs, status_msg | |
| except Exception as e: | |
| logger.error(f"处理失败: {str(e)}") | |
| return [], f"❌ 处理失败: {str(e)}" | |
| def create_smart_interface(): | |
| """创建智能界面""" | |
| css = """ | |
| .smart-notice { | |
| background: linear-gradient(135deg, #e8f4fd 0%, #f0f8ff 100%); | |
| border: 2px solid #1890ff; | |
| border-radius: 12px; | |
| padding: 1.5rem; | |
| margin: 1rem 0; | |
| color: #0050b3; | |
| } | |
| .api-status { | |
| background: #f6ffed; | |
| border: 1px solid #52c41a; | |
| border-radius: 8px; | |
| padding: 1rem; | |
| margin: 1rem 0; | |
| color: #389e0d; | |
| } | |
| """ | |
| with gr.Blocks(css=css, title="HunyuanVideo-Foley Smart Demo") as app: | |
| # Header | |
| gr.HTML(""" | |
| <div style="text-align: center; padding: 2rem; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 20px; margin-bottom: 2rem; color: white;"> | |
| <h1>🎵 HunyuanVideo-Foley</h1> | |
| <p>智能演示版 - 真实工作流程体验</p> | |
| </div> | |
| """) | |
| # Smart Notice | |
| gr.HTML(""" | |
| <div class="smart-notice"> | |
| <strong>🧠 智能演示模式:</strong> | |
| <br>• 自动检测可用API服务 | |
| <br>• 根据文本内容生成对应音效类型 | |
| <br>• 完整展示AI音频生成工作流程 | |
| <br>• <strong>支持</strong>: 脚步声、雨声、风声、车辆声等多种音效 | |
| </div> | |
| """) | |
| with gr.Row(): | |
| # Input section | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📹 视频输入") | |
| video_input = gr.Video( | |
| label="上传视频文件" | |
| ) | |
| text_input = gr.Textbox( | |
| label="🎯 音频描述", | |
| placeholder="例如:footsteps on wood floor, rain on leaves, wind through trees, car engine", | |
| lines=3, | |
| value="footsteps on the ground" | |
| ) | |
| with gr.Row(): | |
| guidance_scale = gr.Slider( | |
| minimum=1.0, | |
| maximum=10.0, | |
| value=4.5, | |
| step=0.1, | |
| label="🎚️ CFG Scale" | |
| ) | |
| inference_steps = gr.Slider( | |
| minimum=10, | |
| maximum=100, | |
| value=50, | |
| step=5, | |
| label="⚡ 推理步数" | |
| ) | |
| sample_nums = gr.Slider( | |
| minimum=1, | |
| maximum=3, | |
| value=2, | |
| step=1, | |
| label="🎲 样本数量" | |
| ) | |
| generate_btn = gr.Button( | |
| "🎵 智能生成音频", | |
| variant="primary" | |
| ) | |
| # Output section | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 🎵 生成结果") | |
| audio_output_1 = gr.Audio(label="样本 1", visible=True) | |
| audio_output_2 = gr.Audio(label="样本 2", visible=False) | |
| audio_output_3 = gr.Audio(label="样本 3", visible=False) | |
| status_output = gr.Textbox( | |
| label="处理状态", | |
| interactive=False, | |
| lines=12, | |
| placeholder="等待处理..." | |
| ) | |
| # Examples | |
| gr.Markdown("### 🌟 推荐提示词") | |
| gr.HTML(""" | |
| <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 1rem; margin: 1rem 0;"> | |
| <div style="padding: 1rem; background: #f8fafc; border-radius: 8px;"> | |
| <strong>脚步声:</strong> footsteps on wooden floor<br> | |
| <strong>自然音:</strong> rain drops on leaves<br> | |
| <strong>环境音:</strong> wind through the trees | |
| </div> | |
| <div style="padding: 1rem; background: #f8fafc; border-radius: 8px;"> | |
| <strong>机械音:</strong> car engine running<br> | |
| <strong>动作音:</strong> door opening and closing<br> | |
| <strong>水声:</strong> water flowing in stream | |
| </div> | |
| </div> | |
| """) | |
| # Event handlers | |
| def process_smart(video_file, text_prompt, guidance_scale, inference_steps, sample_nums): | |
| audio_files, status_msg = process_video_smart( | |
| video_file, text_prompt, guidance_scale, inference_steps, int(sample_nums) | |
| ) | |
| # Prepare outputs | |
| outputs = [None, None, None] | |
| for i, audio_file in enumerate(audio_files[:3]): | |
| outputs[i] = audio_file | |
| return outputs[0], outputs[1], outputs[2], status_msg | |
| def update_visibility(sample_nums): | |
| sample_nums = int(sample_nums) | |
| return [ | |
| gr.update(visible=True), # Sample 1 always visible | |
| gr.update(visible=sample_nums >= 2), | |
| gr.update(visible=sample_nums >= 3) | |
| ] | |
| # Connect events | |
| sample_nums.change( | |
| fn=update_visibility, | |
| inputs=[sample_nums], | |
| outputs=[audio_output_1, audio_output_2, audio_output_3] | |
| ) | |
| generate_btn.click( | |
| fn=process_smart, | |
| inputs=[video_input, text_input, guidance_scale, inference_steps, sample_nums], | |
| outputs=[audio_output_1, audio_output_2, audio_output_3, status_output] | |
| ) | |
| # Footer | |
| gr.HTML(""" | |
| <div style="text-align: center; padding: 2rem; color: #666; border-top: 1px solid #eee; margin-top: 2rem;"> | |
| <p><strong>🧠 智能演示版</strong> - 展示完整的AI音频生成工作流程</p> | |
| <p>💡 根据不同描述词生成对应类型的音效</p> | |
| <p>🔗 完整版本: <a href="https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley" target="_blank">GitHub Repository</a></p> | |
| </div> | |
| """) | |
| return app | |
| if __name__ == "__main__": | |
| # Setup logging | |
| logger.remove() | |
| logger.add(lambda msg: print(msg, end=''), level="INFO") | |
| logger.info("启动 HunyuanVideo-Foley 智能演示版...") | |
| # Create and launch app | |
| app = create_smart_interface() | |
| logger.info("智能演示版就绪 - 支持多种音效类型") | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| debug=False, | |
| show_error=True | |
| ) |