import os import tempfile import gradio as gr import requests import json from loguru import logger from typing import Optional, Tuple import base64 import time def call_gradio_client_api(video_file, text_prompt, guidance_scale, inference_steps, sample_nums): """调用官方Hugging Face Space的API""" try: from gradio_client import Client logger.info("连接到官方 HunyuanVideo-Foley Space...") # 连接到官方Space client = Client("tencent/HunyuanVideo-Foley") logger.info("发送推理请求...") # 调用推理函数 result = client.predict( video_file, # 视频文件 text_prompt, # 文本提示 guidance_scale, # CFG scale inference_steps, # 推理步数 sample_nums, # 样本数量 api_name="/infer_single_video" # API端点名称 ) return result, "✅ 成功通过官方API生成音频!" except Exception as e: error_msg = str(e) logger.error(f"Gradio Client API 调用失败: {error_msg}") if "not found" in error_msg.lower(): return None, "❌ 官方Space的API端点未找到,可能接口已更改" elif "connection" in error_msg.lower(): return None, "❌ 无法连接到官方Space,请检查网络" elif "queue" in error_msg.lower(): return None, "⏳ 官方Space繁忙,请稍后重试" else: return None, f"❌ API调用错误: {error_msg}" def call_huggingface_inference_api(video_file, text_prompt): """调用Hugging Face Inference API""" try: logger.info("尝试Hugging Face Inference API...") API_URL = "https://api-inference.huggingface.co/models/tencent/HunyuanVideo-Foley" # 读取视频文件 with open(video_file, "rb") as f: video_data = f.read() # 准备请求数据 headers = { "Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}", } # 发送请求 response = requests.post( API_URL, headers=headers, json={"inputs": {"video": base64.b64encode(video_data).decode(), "text": text_prompt}}, timeout=300 ) if response.status_code == 200: # 保存结果 temp_dir = tempfile.mkdtemp() audio_path = os.path.join(temp_dir, "generated_audio.wav") with open(audio_path, 'wb') as f: f.write(response.content) return [audio_path], "✅ 通过Hugging Face API生成成功!" else: logger.error(f"HF API错误: {response.status_code}") return None, f"❌ Hugging Face API返回错误: {response.status_code}" except Exception as e: logger.error(f"HF API调用失败: {str(e)}") return None, f"❌ Hugging Face API调用失败: {str(e)}" def try_alternative_apis(video_file, text_prompt): """尝试其他可能的API服务""" # 1. 尝试通过公开的demo接口 try: logger.info("尝试demo接口...") # 这里可以尝试其他公开的API服务 # 比如Replicate、RunPod等 return None, "❌ 暂无可用的替代API服务" except Exception as e: return None, f"❌ 替代API调用失败: {str(e)}" def smart_api_inference(video_file, text_prompt, guidance_scale=4.5, inference_steps=50, sample_nums=1): """智能API推理 - 尝试多种API调用方式""" if video_file is None: return [], "❌ 请上传视频文件!" if not text_prompt: text_prompt = "audio for this video" logger.info(f"开始API推理: {video_file}") logger.info(f"文本提示: {text_prompt}") status_updates = [] # 方法1: 尝试Gradio Client (最可能成功) status_updates.append("🔄 尝试连接官方Space API...") try: result, status = call_gradio_client_api( video_file, text_prompt, guidance_scale, inference_steps, sample_nums ) if result: return result, "\n".join(status_updates + [status]) status_updates.append(status) except ImportError: status_updates.append("⚠️ gradio_client未安装,跳过官方API调用") # 方法2: 尝试Hugging Face Inference API status_updates.append("🔄 尝试Hugging Face Inference API...") result, status = call_huggingface_inference_api(video_file, text_prompt) if result: return result, "\n".join(status_updates + [status]) status_updates.append(status) # 方法3: 尝试其他API status_updates.append("🔄 尝试替代API服务...") result, status = try_alternative_apis(video_file, text_prompt) status_updates.append(status) # 所有方法都失败了 final_message = "\n".join(status_updates + [ "", "💡 **解决方案建议:**", "• 安装 gradio_client: pip install gradio_client", "• 配置 HF_TOKEN 环境变量", "• 等待官方Space负载降低", "• 本地运行完整模型(需24GB+ RAM)", "", "🔗 **官方Space**: https://huggingface.co/spaces/tencent/HunyuanVideo-Foley" ]) return [], final_message def create_real_api_interface(): """创建真实API调用界面""" css = """ .api-status { background: #f0f8ff; border: 2px solid #4169e1; border-radius: 10px; padding: 1rem; margin: 1rem 0; color: #191970; } """ with gr.Blocks(css=css, title="HunyuanVideo-Foley API Client") as app: # Header gr.HTML("""

🎵 HunyuanVideo-Foley

API客户端 - 调用真实模型推理

""") # API Status Notice gr.HTML("""
🌐 真实API调用模式: 这个版本会通过API调用真实的HunyuanVideo-Foley模型进行推理。
优点: 真实AI音频生成,无需本地大内存
缺点: 依赖外部服务可用性,可能需要等待队列
""") with gr.Row(): # 输入区域 with gr.Column(scale=1): gr.Markdown("### 📹 视频输入") video_input = gr.Video( label="上传视频", info="支持MP4、AVI、MOV等格式" ) text_input = gr.Textbox( label="🎯 音频描述", placeholder="描述你想要的音频效果,例如:脚步声、雨声、车辆行驶等", lines=3, value="audio sound effects for this video" ) with gr.Row(): guidance_scale = gr.Slider( minimum=1.0, maximum=10.0, value=4.5, step=0.1, label="🎚️ CFG Scale" ) inference_steps = gr.Slider( minimum=10, maximum=100, value=50, step=5, label="⚡ 推理步数" ) sample_nums = gr.Slider( minimum=1, maximum=6, value=1, step=1, label="🎲 样本数量" ) generate_btn = gr.Button( "🎵 调用API生成音频", variant="primary", size="lg" ) # 输出区域 with gr.Column(scale=1): gr.Markdown("### 🎵 生成结果") audio_outputs = [] for i in range(6): audio_output = gr.Audio( label=f"样本 {i+1}", visible=(i == 0) # 只显示第一个 ) audio_outputs.append(audio_output) status_output = gr.Textbox( label="API状态", interactive=False, lines=10, placeholder="等待API调用..." ) # 事件处理 def process_with_api(video_file, text_prompt, guidance_scale, inference_steps, sample_nums): # 调用API推理 results, status_msg = smart_api_inference( video_file, text_prompt, guidance_scale, inference_steps, int(sample_nums) ) # 准备输出 outputs = [None] * 6 visibilities = [False] * 6 if results and isinstance(results, list): for i, result in enumerate(results[:6]): outputs[i] = result visibilities[i] = True return outputs + visibilities + [status_msg] # 动态显示样本数量 def update_visibility(sample_nums): sample_nums = int(sample_nums) return [gr.update(visible=(i < sample_nums)) for i in range(6)] # 连接事件 sample_nums.change( fn=update_visibility, inputs=[sample_nums], outputs=audio_outputs ) generate_btn.click( fn=process_with_api, inputs=[video_input, text_input, guidance_scale, inference_steps, sample_nums], outputs=audio_outputs + [gr.update(visible=(i < 6)) for i in range(6)] + [status_output] ) # Footer gr.HTML("""

📡 API调用版本 - 通过网络调用真实模型进行推理

🔗 官方Space: tencent/HunyuanVideo-Foley

⚠️ 需要安装: pip install gradio_client

""") return app if __name__ == "__main__": # 设置日志 logger.remove() logger.add(lambda msg: print(msg, end=''), level="INFO") logger.info("启动 HunyuanVideo-Foley API 客户端...") # 检查依赖 try: import gradio_client logger.info("✅ gradio_client 已安装") except ImportError: logger.warning("⚠️ gradio_client 未安装,API调用功能可能受限") # 创建并启动应用 app = create_real_api_interface() logger.info("API客户端就绪,准备调用真实模型...") app.launch( server_name="0.0.0.0", server_port=7860, share=False, debug=False, show_error=True )