Spaces:

GlyphByT5
/

ART_v1.0

Running on Zero

File size: 30,667 Bytes

import os
# import spaces

import ast
import numpy as np
from functools import partial

import torch
import torch.utils.checkpoint

from PIL import Image
import xml.etree.cElementTree as ET
from io import BytesIO
import base64
import json

import gradio as gr
from functools import partial
import requests
import base64
import os
import time
import re

from transformers import (
    AutoTokenizer,
    set_seed
)
from typing import List

os.environ["TOKENIZERS_PARALLELISM"] = "false"
from transformers.generation.stopping_criteria import StoppingCriteria, StoppingCriteriaList, \
    STOPPING_CRITERIA_INPUTS_DOCSTRING, add_start_docstrings
class StopAtSpecificTokenCriteria(StoppingCriteria):
    def __init__(self, token_id_list: List[int] = None):
        self.token_id_list = token_id_list
    @add_start_docstrings(STOPPING_CRITERIA_INPUTS_DOCSTRING)
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        return input_ids[0][-1].detach().cpu().numpy() in self.token_id_list

def ensure_space_after_period(input_string):
    # 去除多余的空格
    output_string = re.sub(r'\.\s*', '. ', input_string)
    return output_string

def generate_unique_filename():
    # 生成一个基于时间戳和随机数的唯一文件名
    timestamp = int(time.time() * 1000)  # 时间戳，毫秒级
    # random_num = random.randint(1000, 9999)  # 随机数
    unique_filename = f"{timestamp}"
    return unique_filename

git_token = os.environ.get("GIT_TOKEN")
def upload_to_github(file_path, 
                     repo='WYBar/gradiodemo_svg', 
                     branch='main', 
                     token=git_token):
    if not os.path.isfile(file_path):
        print(f"File not found: {file_path}")
        return None
    with open(file_path, 'rb') as file:
        content = file.read()
    encoded_content = base64.b64encode(content).decode('utf-8')
    unique_filename = generate_unique_filename()
    url = f"https://api.github.com/repos/{repo}/contents/{unique_filename}.svg"
    headers = {
        "Authorization": f"token {token}"
    }
    response = requests.get(url, headers=headers)
    
    sha = None
    if response.status_code == 200:
        sha = response.json()['sha']
    elif response.status_code == 404:
        # 文件不存在，不需要SHA
        pass
    else:
        print(f"Failed to get file status: {response.status_code}")
        # print(response.text)
        return None
    
    headers = {
        "Authorization": f"token {token}",
        "Content-Type": "application/json"
    }
    data = {
        "message": "upload svg file",
        "content": encoded_content,
        "branch": branch
    }
    
    if sha:
        # 文件存在，更新文件
        # print('sha exists, update the old one')
        data["sha"] = sha
        response = requests.put(url, headers=headers, json=data)
    else:
        # 文件不存在，创建新文件
        print("sha not exist, need to create a new one")
        response = requests.put(url, headers=headers, json=data)
        
    # print(response.status_code)
    # print(response.text)
    if response.status_code in [200, 201]:
        # print(response.json()['content']['download_url'])
        return response.json()['content']['download_url'], unique_filename
    else:
        print("None")
        return None
    
def calculate_iou(box1, box2):
    # 计算两个框的交集
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    intersection_area = max(0, x2 - x1) * max(0, y2 - y1)
    
    # 计算两个框的并集
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    
    union_area = box1_area + box2_area - intersection_area
    
    # 计算IOU
    iou = intersection_area / union_area
    return iou

def adjust_coordinates(box):
    size = 32
    (x1, y1, x2, y2) = box
    if x1 % size != 0:
        x1 = (x1 // size) * size
    if x2 % size != 0:
        x2 = (x2 // size + 1) * size
    
    if y1 % size != 0:
        y1 = (y1 // size) * size
    if y2 % size != 0:
        y2 = (y2 // size + 1) * size
    return (x1, y1, x2, y2)

def adjust_validation_box(validation_box):
    return [adjust_coordinates(box) for box in validation_box]

def get_list_layer_box(list_png_images):
    list_layer_box = []
    for img in list_png_images:
        img_np = np.array(img)
        alpha_channel = img_np[:, :, -1]

        # Step 1: Find the non-zero indices
        rows, cols = np.nonzero(alpha_channel)

        if (len(rows) == 0) or (len(cols) == 0):
            # If there are no non-zero indices, we can skip this layer
            list_layer_box.append((0, 0, 0, 0))
            continue

        # Step 2: Get the minimum and maximum indices for rows and columns
        min_row, max_row = rows.min().item(), rows.max().item()
        min_col, max_col = cols.min().item(), cols.max().item()

        # Step 3: Quantize the minimum values down to the nearest multiple of 8
        quantized_min_row = (min_row // 8) * 8
        quantized_min_col = (min_col // 8) * 8

        # Step 4: Quantize the maximum values up to the nearest multiple of 8 outside of the max
        quantized_max_row = ((max_row // 8) + 1) * 8
        quantized_max_col = ((max_col // 8) + 1) * 8
        list_layer_box.append(
            (quantized_min_col, quantized_min_row, quantized_max_col, quantized_max_row)
        )
    return list_layer_box

def pngs_to_svg(list_png_images):
    list_layer_box = get_list_layer_box(list_png_images)
    assert(len(list_png_images) == len(list_layer_box))
    width, height = list_png_images[0].width, list_png_images[0].height
    img_svg = ET.Element(
       'svg', 
        {
            "width": str(width),
            "height": str(height),
             "xmlns": "http://www.w3.org/2000/svg", 
             "xmlns:svg": "http://www.w3.org/2000/svg", 
             "xmlns:xlink":"http://www.w3.org/1999/xlink"                 
        }
    )
    for img, box in zip(list_png_images, list_layer_box):
        x, y, w, h = box[0], box[1], box[2]-box[0], box[3]-box[1]
        if (w == 0 or h == 0):
            continue
        img = img.crop((x, y, x+w, y+h))
        buffer = BytesIO()
        img.save(buffer, format='PNG')
        img_str = base64.b64encode(buffer.getvalue())
        ET.SubElement(
            img_svg,
            "image",
            {
                "x": str(x),
                "y": str(y),
                "width": str(w),
                "height": str(h),
                "xlink:href": "data:image/png;base64,"+img_str.decode('utf-8')
            }
        )
    return ET.tostring(img_svg, encoding='utf-8').decode('utf-8')

def calculate_iou(box1, box2):
    # 计算两个框的交集
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    intersection_area = max(0, x2 - x1) * max(0, y2 - y1)
    
    # 计算两个框的并集
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    
    union_area = box1_area + box2_area - intersection_area
    
    # 计算IOU
    iou = intersection_area / union_area
    return iou

# @spaces.GPU(enable_queue=True, duration=60)
def buildmodel(**kwargs):
    from modeling_crello import CrelloModel, CrelloModelConfig
    from quantizer import get_quantizer
    # seed / input model / resume
    resume = kwargs.get('resume', None)
    seed = kwargs.get('seed', None)
    input_model = kwargs.get('input_model', None)
    quantizer_version = kwargs.get('quantizer_version', 'v4')
    
    set_seed(seed)
    # old_tokenizer = AutoTokenizer.from_pretrained(input_model, trust_remote_code=True)
    old_tokenizer = AutoTokenizer.from_pretrained(
        "WYBar/LLM_For_Layout_Planning",  # 仓库路径
        subfolder="Meta-Llama-3-8B",       # 子目录对应模型文件夹
        trust_remote_code=True,
        cache_dir="/openseg_blob/v-yanbin/GradioDemo/cache_dir",
    )
    old_vocab_size = len(old_tokenizer)
    # tokenizer = AutoTokenizer.from_pretrained(resume, trust_remote_code=True)
    tokenizer = AutoTokenizer.from_pretrained(
        "WYBar/LLM_For_Layout_Planning",
        subfolder="checkpoint-26000",     # 检查点所在子目录
        trust_remote_code=True,
        cache_dir="/openseg_blob/v-yanbin/GradioDemo/cache_dir",
    )
    
    quantizer = get_quantizer(
                    quantizer_version, 
                    update_vocab = False,
                    decimal_quantize_types = kwargs.get('decimal_quantize_types'),
                    mask_values = kwargs['mask_values'],
                    width = kwargs['width'],
                    height = kwargs['height'],
                    simplify_json = False,
                    num_mask_tokens = 0, 
                    mask_type = kwargs.get('mask_type'),
                )
    quantizer.setup_tokenizer(tokenizer)  
      
    model_args = CrelloModelConfig(
        old_vocab_size = old_vocab_size,
        vocab_size=len(tokenizer),
        pad_token_id=tokenizer.pad_token_id,
        ignore_ids=tokenizer.convert_tokens_to_ids(quantizer.ignore_tokens), 
    )
    model_args.freeze_lm = False
    model_args.opt_version = input_model
    model_args.use_lora = False
    model_args.load_in_4bit = kwargs.get('load_in_4bit', False)
    # model = CrelloModel.from_pretrained(
    #     resume,
    #     config=model_args
    # ).to(device)
    
    model = CrelloModel.from_pretrained(
        "WYBar/LLM_For_Layout_Planning",
        subfolder="checkpoint-26000",      # 加载检查点目录
        config=model_args,
        cache_dir="/openseg_blob/v-yanbin/GradioDemo/cache_dir",
    ).to("cuda")
    # model = CrelloModel(config=model_args)
    
    tokenizer.add_special_tokens({"mask_token": "<mask>"}) 
    quantizer.additional_special_tokens.add("<mask>")
    added_special_tokens_list = ["<layout>", "<position>", "<wholecaption>"] 
    tokenizer.add_special_tokens({"additional_special_tokens": added_special_tokens_list}, replace_additional_special_tokens=False)
    for token in added_special_tokens_list:
        quantizer.additional_special_tokens.add(token)
        
    return model, quantizer, tokenizer

def construction_layout():
    params_dict = {  
        # 需要修改
        "input_model": "/openseg_blob/v-sirui/temporary/2024-02-21/Layout_train/COLEv2/Design_LLM/checkpoint/Meta-Llama-3-8B", 
        "resume": "/openseg_blob/v-sirui/temporary/2024-02-21/SVD/Int2lay_1016/checkpoint/int2lay_1031/1031_test/checkpoint-26000/",
        
        "seed": 0,  
        "mask_values": False,  
        "quantizer_version": 'v4',  
        "mask_type": 'cm3',  
        "decimal_quantize_types": [],  
        "num_mask_tokens": 0,  
        "width": 512,
        "height": 512,
        "device": 0,
    }  
    device = "cuda"
    # Init model
    model, quantizer, tokenizer = buildmodel(**params_dict)
    
    # print('resize token embeddings to match the tokenizer', 129423)
    # model.lm.resize_token_embeddings(129423)
    # model.input_embeddings = model.lm.get_input_embeddings()
    # print('after token embeddings to match the tokenizer', 129423)
    
    print("before .to(device)")
    model = model.to("cuda")
    print("after .to(device)")
    model = model.bfloat16()
    model.eval()
    # quantizer = quantizer.to("cuda")
    # tokenizer = tokenizer.to("cuda")
    model.lm = model.lm.to("cuda")
    return model, quantizer, tokenizer, params_dict["width"], params_dict["height"], device

@torch.no_grad()   
# @spaces.GPU(enable_queue=True, duration=60) 
def evaluate_v1(inputs, model, quantizer, tokenizer, width, height, device, do_sample=False, temperature=1.0, top_p=1.0, top_k=50):
    json_example = inputs
    input_intension = '{"wholecaption":"' + json_example["wholecaption"] + '","layout":[{"layer":'
    print("tokenizer1")
    inputs = tokenizer(
        input_intension, return_tensors="pt"
    ).to(model.lm.device)
    print("tokenizer2")
    
    stopping_criteria = StoppingCriteriaList()
    stopping_criteria.append(StopAtSpecificTokenCriteria(token_id_list=[128000]))

    print("lm1")
    outputs = model.lm.generate(**inputs, use_cache=True, max_length=8000, stopping_criteria=stopping_criteria, do_sample=do_sample, temperature=temperature, top_p=top_p, top_k=top_k)
    print("lm2")
    inputs_length = inputs['input_ids'].shape[1] 
    outputs = outputs[:, inputs_length:]
    
    outputs_word = tokenizer.batch_decode(outputs)[0]
    split_word = outputs_word.split('}]}')[0]+"}]}"
    split_word = '{"wholecaption":"' + json_example["wholecaption"].replace('\n', '\\n').replace('"', '\\"') + '","layout":[{"layer":' + split_word
    map_dict = quantizer.construct_map_dict()
    
    for key ,value in map_dict.items():
        split_word = split_word.replace(key, value)
    try:
        pred_json_example = json.loads(split_word)
        for layer in pred_json_example["layout"]:
            layer['x'] = round(int(width)*layer['x'])
            layer['y'] = round(int(height)*layer['y'])
            layer['width'] = round(int(width)*layer['width'])
            layer['height'] = round(int(height)*layer['height'])
    except Exception as e:
        print(e)
        pred_json_example = None
    return pred_json_example

def inference(generate_method, intention, model, quantizer, tokenizer, width, height, device, do_sample=True, temperature=1.0, top_p=1.0, top_k=50):
    def FormulateInput(intension: str):
        resdict = {}
        resdict["wholecaption"] = intension
        resdict["layout"] = []
        return resdict
    
    rawdata = FormulateInput(intention)
    
    if generate_method == 'v1':
        max_try_time = 5
        preddata = None
        while preddata is None and max_try_time > 0:
            preddata = evaluate_v1(rawdata, model, quantizer, tokenizer, width, height, device, do_sample=do_sample, temperature=temperature, top_p=top_p, top_k=top_k)
            max_try_time -= 1
    else:
        print("Please input correct generate method")
        preddata = None

    return preddata

# @spaces.GPU(enable_queue=True, duration=60)
def construction():
    from custom_model_mmdit import CustomFluxTransformer2DModel
    from custom_model_transp_vae import AutoencoderKLTransformerTraining as CustomVAE
    from custom_pipeline import CustomFluxPipelineCfg

    transformer = CustomFluxTransformer2DModel.from_pretrained(
        "WYBar/ART_test_weights",
        subfolder="fused_transformer", 
        torch_dtype=torch.bfloat16,
        cache_dir="/openseg_blob/v-yanbin/GradioDemo/cache_dir"
    )
    
    transp_vae = CustomVAE.from_pretrained(
        "WYBar/ART_test_weights",
        subfolder="custom_vae", 
        torch_dtype=torch.float32,
        cache_dir="/openseg_blob/v-yanbin/GradioDemo/cache_dir"
    )
    
    token = os.environ.get("HF_TOKEN")
    pipeline = CustomFluxPipelineCfg.from_pretrained(
        "black-forest-labs/FLUX.1-dev",
        transformer=transformer,
        torch_dtype=torch.bfloat16,
        token=token,
        cache_dir="/openseg_blob/v-yanbin/GradioDemo/cache_dir"
    ).to("cuda")
    pipeline.enable_model_cpu_offload(gpu_id=0) # Save GPU memory
    
    return pipeline, transp_vae

# @spaces.GPU(enable_queue=True, duration=60)
def test_one_sample(validation_box, validation_prompt, true_gs, inference_steps, pipeline, generator, transp_vae):
    print(validation_box)
    output, rgba_output, _, _ = pipeline(
        prompt=validation_prompt,
        validation_box=validation_box,
        generator=generator,
        height=512,
        width=512,
        num_layers=len(validation_box),
        guidance_scale=4.0,
        num_inference_steps=inference_steps,
        transparent_decoder=transp_vae,
        true_gs=true_gs
    )
    images = output.images   # list of PIL, len=layers
    rgba_images = [Image.fromarray(arr, 'RGBA') for arr in rgba_output]

    output_gradio = []
    merged_pil = images[1].convert('RGBA')
    for frame_idx, frame_pil in enumerate(rgba_images):
        if frame_idx < 2:
            frame_pil = images[frame_idx].convert('RGBA') # merged and background
        else:
            merged_pil = Image.alpha_composite(merged_pil, frame_pil)
        output_gradio.append(frame_pil)
    
    return output_gradio

def svg_test_one_sample(validation_prompt, validation_box_str, seed, true_gs, inference_steps, pipeline, transp_vae):
    generator = torch.Generator().manual_seed(seed)
    try:
        validation_box = ast.literal_eval(validation_box_str)
    except Exception as e:
        return [f"Error parsing validation_box: {e}"]
    if not isinstance(validation_box, list) or not all(isinstance(t, tuple) and len(t) == 4 for t in validation_box):
        return ["validation_box must be a list of tuples, each of length 4."]

    validation_box = adjust_validation_box(validation_box)
    
    result_images = test_one_sample(validation_box, validation_prompt, true_gs, inference_steps, pipeline, generator, transp_vae)
    
    svg_img = pngs_to_svg(result_images[1:])
    
    svg_file_path = './image.svg'
    os.makedirs(os.path.dirname(svg_file_path), exist_ok=True)
    with open(svg_file_path, 'w', encoding='utf-8') as f:
        f.write(svg_img)       
    
    return result_images, svg_file_path
    
def main():
    model, quantizer, tokenizer, width, height, device = construction_layout()
    
    inference_partial = partial(
        inference,
        model=model,
        quantizer=quantizer,
        tokenizer=tokenizer,
        width=width,
        height=height,
        device=device
    )
    
    def process_preddate(intention, temperature, top_p, generate_method='v1'):
        intention = intention.replace('\n', '').replace('\r', '').replace('\\', '')
        intention = ensure_space_after_period(intention)
        if temperature == 0.0:
            # print("looking for greedy decoding strategies, set `do_sample=False`.")
            preddata = inference_partial(generate_method, intention, do_sample=False)
        else:
            preddata = inference_partial(generate_method, intention, temperature=temperature, top_p=top_p)
        # wholecaption = preddata["wholecaption"]
        layouts = preddata["layout"]
        list_box = []
        for i, layout in enumerate(layouts):
            x, y = layout["x"], layout["y"]
            width, height = layout["width"], layout["height"]
            if i == 0:
                list_box.append((0, 0, width, height))
                list_box.append((0, 0, width, height))
            else:
                left = x - width // 2
                top = y - height // 2
                right = x + width // 2
                bottom = y + height // 2
                list_box.append((left, top, right, bottom))
                
        # print(list_box)
        filtered_boxes = list_box[:2]
        for i in range(2, len(list_box)):
            keep = True
            for j in range(1, len(filtered_boxes)):
                iou = calculate_iou(list_box[i], filtered_boxes[j])
                if iou > 0.65:
                    print(list_box[i], filtered_boxes[j])
                    keep = False
                    break
            if keep:
                filtered_boxes.append(list_box[i])
            
        return str(filtered_boxes), intention, str(filtered_boxes)
    
    # def process_preddate(intention, generate_method='v1'):
    #     list_box = [(0, 0, 512, 512), (0, 0, 512, 512), (136, 184, 512, 512), (144, 0, 512, 512), (0, 0, 328, 136), (160, 112, 512, 360), (168, 112, 512, 360), (40, 232, 112, 296), (32, 88, 248, 176), (48, 424, 144, 448), (48, 464, 144, 488), (240, 464, 352, 488), (384, 464, 488, 488), (48, 480, 144, 504), (240, 480, 360, 504), (456, 0, 512, 56), (0, 0, 56, 40), (440, 0, 512, 40), (0, 24, 48, 88), (48, 168, 168, 240)]
    #     wholecaption = "Design an engaging and vibrant recruitment advertisement for our company. The image should feature three animated characters in a modern cityscape, depicting a dynamic and collaborative work environment. Incorporate a light bulb graphic with a question mark, symbolizing innovation, creativity, and problem-solving. Use bold text to announce \"WE ARE RECRUITING\" and provide the company's social media handle \"@reallygreatsite\" and a contact phone number \"+123-456-7890\" for interested individuals. The overall design should be playful and youthful, attracting potential recruits who are innovative and eager to contribute to a lively team."
    #     json_file = "/home/wyb/openseg_blob/v-yanbin/GradioDemo/LLM-For-Layout-Planning/inference_test.json"
    #     return wholecaption, str(list_box), json_file

    pipeline, transp_vae = construction()

    gradio_test_one_sample_partial = partial(
        svg_test_one_sample,
        pipeline=pipeline,
        transp_vae=transp_vae,
    )
    
    def process_svg(text_input, tuple_input, seed, true_gs, inference_steps):
        result_images = []
        result_images, svg_file_path = gradio_test_one_sample_partial(text_input, tuple_input, seed, true_gs, inference_steps)

        url, unique_filename = upload_to_github(file_path=svg_file_path)
        unique_filename = f'{unique_filename}'
        
        if url != None:
            print(f"File uploaded to: {url}")
            svg_editor = f"""
                <iframe src="https://svgedit.netlify.app/editor/index.html?\
                storagePrompt=false&url={url}" \
                width="100%", height="800px"></iframe>
            """
        else:
            print('upload_to_github FAILED!')
            svg_editor = f"""
                <iframe src="https://svgedit.netlify.app/editor/index.html" \
                width="100%", height="800px"></iframe>
            """
        
        return result_images, svg_file_path, svg_editor
    
    def one_click_generate(intention_input, temperature, top_p, seed, true_gs, inference_steps):
        # 首先调用process_preddate
        list_box_output, intention_input, list_box_output = process_preddate(intention_input, temperature, top_p)
        
        # 然后将process_preddate的输出作为process_svg的输入
        result_images, svg_file, svg_editor = process_svg(intention_input, list_box_output, seed, true_gs, inference_steps)
        
        # 返回两个函数的输出
        return list_box_output, result_images, svg_file, svg_editor, intention_input, list_box_output

    def clear_inputs1():
        return "", ""
    
    def clear_inputs2():
        return "", ""
    
    def transfer_inputs(intention, list_box):
        return intention, list_box
    
    theme = gr.themes.Soft(
        radius_size="lg",
    ).set(
        block_background_fill='*primary_50',
        block_border_color='*primary_200',
        block_border_width='1px',
        block_border_width_dark='100px',
        block_info_text_color='*primary_950',
        block_label_border_color='*primary_200',
        block_radius='*radius_lg'
    )
        
    with gr.Blocks(theme=theme) as demo:
        gr.HTML("<h1 style='text-align: center;'>ART: Anonymous Region Transformer for Variable Multi-Layer Transparent Image Generation</h1>")
        gr.HTML("<h2>Anonymous Region Layout Planner</h2>")
    
        with gr.Row():
            with gr.Column():
                intention_input = gr.Textbox(lines=15, placeholder="Enter intention", label="Prompt")
                with gr.Row():
                    temperature_input=gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Temperature", value=0.0)
                    top_p_input=gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Top P", value=0.0)
                with gr.Row():
                    clear_btn1 = gr.Button("Clear")
                    model_btn1 = gr.Button("Commit", variant='primary')
                    transfer_btn1 = gr.Button("Export to below")
                    
                one_click_btn = gr.Button("One Click Generate ALL", variant='primary')
        
            with gr.Column():
                list_box_output = gr.Textbox(lines=10, placeholder="Validation Box", label="Validation Box")
           
        examples = gr.Examples(
            examples=[
                ['The image is a graphic design with a celebratory theme. At the top, there is a banner with the text \"Happy Anniversary\" in a bold, sans-serif font. Below this banner, there is a circular frame containing a photograph of a couple. The man has short, dark hair and is wearing a light-colored sweater, while the woman has long blonde hair and is also wearing a light-colored sweater. They are both smiling and appear to be embracing each other.Surrounding the circular frame are decorative elements such as pink flowers and green leaves, which add a festive touch to the design. Below the circular frame, there is a text that reads "Isabel & Morgan" in a cursive, elegant font, suggesting that the couple\'s names are Isabel and Morgan.At the bottom of the image, there is a banner with a message that says "Happy Anniversary! Cheers to another year of love, laughter, and cherished memories together.\" This text is in a smaller, sans-serif font and is placed against a solid background, providing a clear message of celebration and well-wishes for the couple.The overall style of the image is warm and celebratory, with a color scheme that includes shades of pink, green, and white, which contribute to a joyful and romantic atmosphere.'],
                ['The image is a digital illustration with a light blue background. At the top, there is a logo consisting of a snake wrapped around a staff, which is a common symbol in healthcare. Below the logo, the text "International Nurses Day" is prominently displayed in white, with the date "12 May 20xx" in smaller font size.The central part of the image features two stylized characters. On the left, there is a female character with dark hair, wearing a white nurse\'s uniform with a cap. She is holding a clipboard and appears to be speaking or gesturing, as indicated by a speech bubble with the word "OK" in it. On the right, there is a male character with light brown hair, wearing a light blue shirt with a white collar and a white apron. He is holding a stethoscope to his ear, suggesting he is a doctor or a healthcare professional.The characters are depicted in a friendly and approachable manner, with smiles on their faces. Around them, there are small blue plus signs, which are often associated with healthcare and medical services. The overall style of the image is clean, modern, and appears to be designed to celebrate International Nurses Day.'],
                ['The image features a graphic design with a festive theme. At the top, there is a decorative border with a wavy pattern. Below this border, the text "WINTER SEASON SPECIAL COOKIES" is prominently displayed in a bold, sans-serif font. The text is black with a slight shadow effect, giving it a three-dimensional appearance.In the center of the image, there are three illustrated gingerbread cookies. Each cookie has a smiling face with eyes, a nose, and a mouth, and they are colored in a warm, brown hue. The cookies are arranged in a staggered formation, with the middle cookie slightly higher than the others, creating a sense of depth.At the bottom of the image, there is a call to action that reads "ORDER.NOW" in a large, bold, sans-serif font. The text is colored in a darker shade of brown, contrasting with the lighter background. The overall style of the image suggests it is an advertisement or promotional graphic for a winter-themed cookie special.']
            ],
            inputs=[intention_input]
        )
        
        gr.HTML("<h2>Anonymous Region Transformer</h2>")
        with gr.Row():
            with gr.Column():
                text_input = gr.Textbox(lines=10, placeholder="Enter prompt text", label="Prompt")
                tuple_input = gr.Textbox(lines=5, placeholder="Enter list of tuples, e.g., [(1, 2, 3, 4), (5, 6, 7, 8)]", label="Validation Box")
                with gr.Row():
                    true_gs_input=gr.Slider(minimum=3.0, maximum=5.0, step=0.1, label="true_gs", value=3.5)
                    inference_steps_input=gr.Slider(minimum=5, maximum=50, step=1, label="inference_steps", value=28)
                with gr.Row():
                    seed_input = gr.Number(label="Seed", value=42)
                with gr.Row():
                    transfer_btn2 = gr.Button("Import from above")
                with gr.Row():
                    clear_btn2 = gr.Button("Clear")
                    model_btn2 = gr.Button("Commit", variant='primary')
                
            with gr.Column():
                result_images = gr.Gallery(label="Result Images", columns=5, height='auto')
                
        gr.HTML("<h1>SVG Image</h1>")
        svg_file = gr.File(label="Download SVG Image")
        svg_editor = gr.HTML(label="Editable SVG Editor")
        
        model_btn1.click(
            fn=process_preddate, 
            inputs=[intention_input, temperature_input, top_p_input], 
            outputs=[list_box_output, text_input, tuple_input], 
            api_name="process_preddate"
        )
        clear_btn1.click(
            fn=clear_inputs1, 
            inputs=[], 
            outputs=[intention_input, list_box_output]
        )
        model_btn2.click(
            fn=process_svg, 
            inputs=[text_input, tuple_input, seed_input, true_gs_input, inference_steps_input], 
            outputs=[result_images, svg_file, svg_editor], 
            api_name="process_svg"
        )
        clear_btn2.click(
            fn=clear_inputs2, 
            inputs=[], 
            outputs=[text_input, tuple_input]
        )
        transfer_btn1.click(
            fn=transfer_inputs, 
            inputs=[intention_input, list_box_output], 
            outputs=[text_input, tuple_input]
        )
        transfer_btn2.click(
            fn=transfer_inputs, 
            inputs=[intention_input, list_box_output], 
            outputs=[text_input, tuple_input]
        )
        one_click_btn.click(
            fn=one_click_generate, 
            inputs=[intention_input, temperature_input, top_p_input, seed_input, true_gs_input, inference_steps_input], 
            outputs=[list_box_output, result_images, svg_file, svg_editor, text_input, tuple_input]
        )
    demo.launch(server_name='0.0.0.0', server_port=7860)

if __name__ == "__main__":
    main()