import gradio as gr from gradio_client import Client, handle_file # Step 1: Generate first image def call_image_gen(prompt, oauth_token: gr.OAuthToken): gr.Info("Calling multimodalart/Qwen-Image-Fast...") client = Client("multimodalart/Qwen-Image-Fast", hf_token=oauth_token.token) result = client.predict( prompt=prompt, seed=0, randomize_seed=True, aspect_ratio="16:9", guidance_scale=1, num_inference_steps=8, prompt_enhance=True, api_name="/infer" ) print(result) return result[0] # Step 2: Edit image def call_edit(input_image, prompt, oauth_token: gr.OAuthToken): gr.Info("Calling multimodalart/Qwen-Image-Edit-Fast...") client = Client("multimodalart/Qwen-Image-Edit-Fast", hf_token=oauth_token.token) result = client.predict( image=handle_file(input_image), prompt=prompt, seed=0, randomize_seed=True, true_guidance_scale=1, num_inference_steps=8, rewrite_prompt=True, api_name="/infer" ) print(result) return result[0] # Step 3: Generate video from First/Last frames def call_video_gen(start_image_in, end_image_in, action_prompt, oauth_token: gr.OAuthToken): gr.Info("Calling multimodalart/wan-2-2-first-last-frame...") client = Client("multimodalart/wan-2-2-first-last-frame", hf_token=oauth_token.token) result = client.predict( start_image_pil=handle_file(start_image_in), end_image_pil=handle_file(end_image_in), prompt=action_prompt, negative_prompt="色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走,过曝,", duration_seconds=2.1, steps=8, guidance_scale=1, guidance_scale_2=1, seed=42, randomize_seed=True, api_name="/generate_video" ) print(result) return result[0]["video"] with gr.Blocks() as demo: with gr.Column(): gr.Markdown("# 3-Steps Qwen+Wan Fast Combo") hf_login = gr.LoginButton() with gr.Row(): with gr.Column(): with gr.Group(): first_gen_prompt = gr.Textbox(label="First Image prompt") first_image = gr.Image(label="First Image", type="filepath", interactive=False) first_step_submit_btn = gr.Button("1. Get Start frame") with gr.Column(): with gr.Group(): edit_gen_prompt = gr.Textbox(label="Edit prompt") last_image = gr.Image(label="Edited Last Frame", type="filepath", interactive=False) second_step_submit_btn = gr.Button("2. Get Edited Last frame") with gr.Column(): with gr.Group(): video_gen_prompt = gr.Textbox(label="Action prompt") video_result = gr.Video(label="Video Result") video_gen_submit_btn = gr.Button("3. Gen First/Last video") first_step_submit_btn.click( fn = call_image_gen, inputs = [first_gen_prompt], outputs = [first_image], show_api=False, queue=False ) second_step_submit_btn.click( fn = call_edit, inputs = [first_image, edit_gen_prompt], outputs = [last_image], show_api=False, queue=False ) video_gen_submit_btn.click( fn = call_video_gen, inputs = [first_image, last_image, video_gen_prompt], outputs = [video_result], show_api=False, queue=False ) demo.queue().launch(show_error=True, show_api=False)