hunyuanvideo-foley / app_real_api.py
wzy013's picture
Deploy real API calling version
21d1989
raw
history blame
11.6 kB
import os
import tempfile
import gradio as gr
import requests
import json
from loguru import logger
from typing import Optional, Tuple
import base64
import time
def call_gradio_client_api(video_file, text_prompt, guidance_scale, inference_steps, sample_nums):
"""调用官方Hugging Face Space的API"""
try:
from gradio_client import Client
logger.info("连接到官方 HunyuanVideo-Foley Space...")
# 连接到官方Space
client = Client("tencent/HunyuanVideo-Foley")
logger.info("发送推理请求...")
# 调用推理函数
result = client.predict(
video_file, # 视频文件
text_prompt, # 文本提示
guidance_scale, # CFG scale
inference_steps, # 推理步数
sample_nums, # 样本数量
api_name="/infer_single_video" # API端点名称
)
return result, "✅ 成功通过官方API生成音频!"
except Exception as e:
error_msg = str(e)
logger.error(f"Gradio Client API 调用失败: {error_msg}")
if "not found" in error_msg.lower():
return None, "❌ 官方Space的API端点未找到,可能接口已更改"
elif "connection" in error_msg.lower():
return None, "❌ 无法连接到官方Space,请检查网络"
elif "queue" in error_msg.lower():
return None, "⏳ 官方Space繁忙,请稍后重试"
else:
return None, f"❌ API调用错误: {error_msg}"
def call_huggingface_inference_api(video_file, text_prompt):
"""调用Hugging Face Inference API"""
try:
logger.info("尝试Hugging Face Inference API...")
API_URL = "https://api-inference.huggingface.co/models/tencent/HunyuanVideo-Foley"
# 读取视频文件
with open(video_file, "rb") as f:
video_data = f.read()
# 准备请求数据
headers = {
"Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}",
}
# 发送请求
response = requests.post(
API_URL,
headers=headers,
json={"inputs": {"video": base64.b64encode(video_data).decode(), "text": text_prompt}},
timeout=300
)
if response.status_code == 200:
# 保存结果
temp_dir = tempfile.mkdtemp()
audio_path = os.path.join(temp_dir, "generated_audio.wav")
with open(audio_path, 'wb') as f:
f.write(response.content)
return [audio_path], "✅ 通过Hugging Face API生成成功!"
else:
logger.error(f"HF API错误: {response.status_code}")
return None, f"❌ Hugging Face API返回错误: {response.status_code}"
except Exception as e:
logger.error(f"HF API调用失败: {str(e)}")
return None, f"❌ Hugging Face API调用失败: {str(e)}"
def try_alternative_apis(video_file, text_prompt):
"""尝试其他可能的API服务"""
# 1. 尝试通过公开的demo接口
try:
logger.info("尝试demo接口...")
# 这里可以尝试其他公开的API服务
# 比如Replicate、RunPod等
return None, "❌ 暂无可用的替代API服务"
except Exception as e:
return None, f"❌ 替代API调用失败: {str(e)}"
def smart_api_inference(video_file, text_prompt, guidance_scale=4.5, inference_steps=50, sample_nums=1):
"""智能API推理 - 尝试多种API调用方式"""
if video_file is None:
return [], "❌ 请上传视频文件!"
if not text_prompt:
text_prompt = "audio for this video"
logger.info(f"开始API推理: {video_file}")
logger.info(f"文本提示: {text_prompt}")
status_updates = []
# 方法1: 尝试Gradio Client (最可能成功)
status_updates.append("🔄 尝试连接官方Space API...")
try:
result, status = call_gradio_client_api(
video_file, text_prompt, guidance_scale, inference_steps, sample_nums
)
if result:
return result, "\n".join(status_updates + [status])
status_updates.append(status)
except ImportError:
status_updates.append("⚠️ gradio_client未安装,跳过官方API调用")
# 方法2: 尝试Hugging Face Inference API
status_updates.append("🔄 尝试Hugging Face Inference API...")
result, status = call_huggingface_inference_api(video_file, text_prompt)
if result:
return result, "\n".join(status_updates + [status])
status_updates.append(status)
# 方法3: 尝试其他API
status_updates.append("🔄 尝试替代API服务...")
result, status = try_alternative_apis(video_file, text_prompt)
status_updates.append(status)
# 所有方法都失败了
final_message = "\n".join(status_updates + [
"",
"💡 **解决方案建议:**",
"• 安装 gradio_client: pip install gradio_client",
"• 配置 HF_TOKEN 环境变量",
"• 等待官方Space负载降低",
"• 本地运行完整模型(需24GB+ RAM)",
"",
"🔗 **官方Space**: https://huggingface.co/spaces/tencent/HunyuanVideo-Foley"
])
return [], final_message
def create_real_api_interface():
"""创建真实API调用界面"""
css = """
.api-status {
background: #f0f8ff;
border: 2px solid #4169e1;
border-radius: 10px;
padding: 1rem;
margin: 1rem 0;
color: #191970;
}
"""
with gr.Blocks(css=css, title="HunyuanVideo-Foley API Client") as app:
# Header
gr.HTML("""
<div style="text-align: center; padding: 2rem; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 20px; margin-bottom: 2rem; color: white;">
<h1>🎵 HunyuanVideo-Foley</h1>
<p>API客户端 - 调用真实模型推理</p>
</div>
""")
# API Status Notice
gr.HTML("""
<div class="api-status">
<strong>🌐 真实API调用模式:</strong> 这个版本会通过API调用真实的HunyuanVideo-Foley模型进行推理。
<br><strong>优点:</strong> 真实AI音频生成,无需本地大内存
<br><strong>缺点:</strong> 依赖外部服务可用性,可能需要等待队列
</div>
""")
with gr.Row():
# 输入区域
with gr.Column(scale=1):
gr.Markdown("### 📹 视频输入")
video_input = gr.Video(
label="上传视频",
info="支持MP4、AVI、MOV等格式"
)
text_input = gr.Textbox(
label="🎯 音频描述",
placeholder="描述你想要的音频效果,例如:脚步声、雨声、车辆行驶等",
lines=3,
value="audio sound effects for this video"
)
with gr.Row():
guidance_scale = gr.Slider(
minimum=1.0,
maximum=10.0,
value=4.5,
step=0.1,
label="🎚️ CFG Scale"
)
inference_steps = gr.Slider(
minimum=10,
maximum=100,
value=50,
step=5,
label="⚡ 推理步数"
)
sample_nums = gr.Slider(
minimum=1,
maximum=6,
value=1,
step=1,
label="🎲 样本数量"
)
generate_btn = gr.Button(
"🎵 调用API生成音频",
variant="primary",
size="lg"
)
# 输出区域
with gr.Column(scale=1):
gr.Markdown("### 🎵 生成结果")
audio_outputs = []
for i in range(6):
audio_output = gr.Audio(
label=f"样本 {i+1}",
visible=(i == 0) # 只显示第一个
)
audio_outputs.append(audio_output)
status_output = gr.Textbox(
label="API状态",
interactive=False,
lines=10,
placeholder="等待API调用..."
)
# 事件处理
def process_with_api(video_file, text_prompt, guidance_scale, inference_steps, sample_nums):
# 调用API推理
results, status_msg = smart_api_inference(
video_file, text_prompt, guidance_scale, inference_steps, int(sample_nums)
)
# 准备输出
outputs = [None] * 6
visibilities = [False] * 6
if results and isinstance(results, list):
for i, result in enumerate(results[:6]):
outputs[i] = result
visibilities[i] = True
return outputs + visibilities + [status_msg]
# 动态显示样本数量
def update_visibility(sample_nums):
sample_nums = int(sample_nums)
return [gr.update(visible=(i < sample_nums)) for i in range(6)]
# 连接事件
sample_nums.change(
fn=update_visibility,
inputs=[sample_nums],
outputs=audio_outputs
)
generate_btn.click(
fn=process_with_api,
inputs=[video_input, text_input, guidance_scale, inference_steps, sample_nums],
outputs=audio_outputs + [gr.update(visible=(i < 6)) for i in range(6)] + [status_output]
)
# Footer
gr.HTML("""
<div style="text-align: center; padding: 2rem; color: #666; border-top: 1px solid #eee; margin-top: 2rem;">
<p><strong>📡 API调用版本</strong> - 通过网络调用真实模型进行推理</p>
<p>🔗 官方Space: <a href="https://huggingface.co/spaces/tencent/HunyuanVideo-Foley" target="_blank">tencent/HunyuanVideo-Foley</a></p>
<p>⚠️ 需要安装: <code>pip install gradio_client</code></p>
</div>
""")
return app
if __name__ == "__main__":
# 设置日志
logger.remove()
logger.add(lambda msg: print(msg, end=''), level="INFO")
logger.info("启动 HunyuanVideo-Foley API 客户端...")
# 检查依赖
try:
import gradio_client
logger.info("✅ gradio_client 已安装")
except ImportError:
logger.warning("⚠️ gradio_client 未安装,API调用功能可能受限")
# 创建并启动应用
app = create_real_api_interface()
logger.info("API客户端就绪,准备调用真实模型...")
app.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
debug=False,
show_error=True
)