import gradio as gr
from gradio_client import Client, handle_file

# Step 1: Generate first image
def call_image_gen(prompt, oauth_token: gr.OAuthToken):
    gr.Info("Calling multimodalart/Qwen-Image-Fast...")
    client = Client("multimodalart/Qwen-Image-Fast", hf_token=oauth_token.token)
    result = client.predict(
		prompt=prompt,
		seed=0,
		randomize_seed=True,
		aspect_ratio="16:9",
		guidance_scale=1,
		num_inference_steps=8,
		prompt_enhance=True,
		api_name="/infer"
    )
    print(result)
    return result[0]

# Step 2: Edit image
def call_edit(input_image, prompt, oauth_token: gr.OAuthToken):
    gr.Info("Calling multimodalart/Qwen-Image-Edit-Fast...")
    client = Client("multimodalart/Qwen-Image-Edit-Fast", hf_token=oauth_token.token)
    result = client.predict(
		image=handle_file(input_image),
		prompt=prompt,
		seed=0,
		randomize_seed=True,
		true_guidance_scale=1,
		num_inference_steps=8,
		rewrite_prompt=True,
		api_name="/infer"
    )
    print(result)
    return result[0]

# Step 3: Generate video from First/Last frames
def call_video_gen(start_image_in, end_image_in, action_prompt, oauth_token: gr.OAuthToken):
    gr.Info("Calling multimodalart/wan-2-2-first-last-frame...")
    client = Client("multimodalart/wan-2-2-first-last-frame", hf_token=oauth_token.token)
    result = client.predict(
		start_image_pil=handle_file(start_image_in),
		end_image_pil=handle_file(end_image_in),
		prompt=action_prompt,
		negative_prompt="色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走,过曝，",
		duration_seconds=2.1,
		steps=8,
		guidance_scale=1,
		guidance_scale_2=1,
		seed=42,
		randomize_seed=True,
		api_name="/generate_video"
    )
    print(result)
    return result[0]["video"]

with gr.Blocks() as demo:
    with gr.Column():
        gr.Markdown("# 3-Steps Qwen+Wan Fast Combo")
        hf_login = gr.LoginButton()

        with gr.Row():

            with gr.Column():

                with gr.Group():

                    first_gen_prompt = gr.Textbox(label="First Image prompt")
                    first_image = gr.Image(label="First Image", type="filepath", interactive=False)
                    first_step_submit_btn = gr.Button("1. Get Start frame")

            with gr.Column():

                with gr.Group():
                
                    edit_gen_prompt = gr.Textbox(label="Edit prompt")
                    last_image = gr.Image(label="Edited Last Frame", type="filepath", interactive=False)
                    second_step_submit_btn = gr.Button("2. Get Edited Last frame")

            with gr.Column():

                with gr.Group():

                    video_gen_prompt = gr.Textbox(label="Action prompt")
                    video_result = gr.Video(label="Video Result")
                    video_gen_submit_btn = gr.Button("3. Gen First/Last video")

    first_step_submit_btn.click(
        fn = call_image_gen,
        inputs = [first_gen_prompt],
        outputs = [first_image],
        show_api=False,
        queue=False
    )

    second_step_submit_btn.click(
        fn = call_edit,
        inputs = [first_image, edit_gen_prompt],
        outputs = [last_image],
        show_api=False,
        queue=False
    )

    video_gen_submit_btn.click(
        fn = call_video_gen,
        inputs = [first_image, last_image, video_gen_prompt],
        outputs = [video_result],
        show_api=False,
        queue=False
    )

demo.queue().launch(show_error=True, show_api=False)