File size: 30,667 Bytes
8fe62ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08acebb
8fe62ee
 
 
08acebb
8fe62ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08acebb
 
8fe62ee
 
 
 
 
 
b9f70fd
08acebb
 
 
 
 
d9bc274
08acebb
8fe62ee
 
 
 
 
 
 
 
 
 
 
 
 
08acebb
 
8fe62ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9f70fd
 
 
 
08acebb
 
d9bc274
08acebb
 
 
b23aabc
d19213f
b23aabc
8fe62ee
 
 
 
 
 
 
736a804
8fe62ee
d9bc274
e729779
736a804
8fe62ee
 
 
 
736a804
8fe62ee
736a804
8fe62ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08acebb
8fe62ee
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
import os
# import spaces

import ast
import numpy as np
from functools import partial

import torch
import torch.utils.checkpoint

from PIL import Image
import xml.etree.cElementTree as ET
from io import BytesIO
import base64
import json

import gradio as gr
from functools import partial
import requests
import base64
import os
import time
import re

from transformers import (
    AutoTokenizer,
    set_seed
)
from typing import List

os.environ["TOKENIZERS_PARALLELISM"] = "false"
from transformers.generation.stopping_criteria import StoppingCriteria, StoppingCriteriaList, \
    STOPPING_CRITERIA_INPUTS_DOCSTRING, add_start_docstrings
class StopAtSpecificTokenCriteria(StoppingCriteria):
    def __init__(self, token_id_list: List[int] = None):
        self.token_id_list = token_id_list
    @add_start_docstrings(STOPPING_CRITERIA_INPUTS_DOCSTRING)
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        return input_ids[0][-1].detach().cpu().numpy() in self.token_id_list

def ensure_space_after_period(input_string):
    # 去除多余的空格
    output_string = re.sub(r'\.\s*', '. ', input_string)
    return output_string

def generate_unique_filename():
    # 生成一个基于时间戳和随机数的唯一文件名
    timestamp = int(time.time() * 1000)  # 时间戳,毫秒级
    # random_num = random.randint(1000, 9999)  # 随机数
    unique_filename = f"{timestamp}"
    return unique_filename

git_token = os.environ.get("GIT_TOKEN")
def upload_to_github(file_path, 
                     repo='WYBar/gradiodemo_svg', 
                     branch='main', 
                     token=git_token):
    if not os.path.isfile(file_path):
        print(f"File not found: {file_path}")
        return None
    with open(file_path, 'rb') as file:
        content = file.read()
    encoded_content = base64.b64encode(content).decode('utf-8')
    unique_filename = generate_unique_filename()
    url = f"https://api.github.com/repos/{repo}/contents/{unique_filename}.svg"
    headers = {
        "Authorization": f"token {token}"
    }
    response = requests.get(url, headers=headers)
    
    sha = None
    if response.status_code == 200:
        sha = response.json()['sha']
    elif response.status_code == 404:
        # 文件不存在,不需要SHA
        pass
    else:
        print(f"Failed to get file status: {response.status_code}")
        # print(response.text)
        return None
    
    headers = {
        "Authorization": f"token {token}",
        "Content-Type": "application/json"
    }
    data = {
        "message": "upload svg file",
        "content": encoded_content,
        "branch": branch
    }
    
    if sha:
        # 文件存在,更新文件
        # print('sha exists, update the old one')
        data["sha"] = sha
        response = requests.put(url, headers=headers, json=data)
    else:
        # 文件不存在,创建新文件
        print("sha not exist, need to create a new one")
        response = requests.put(url, headers=headers, json=data)
        
    # print(response.status_code)
    # print(response.text)
    if response.status_code in [200, 201]:
        # print(response.json()['content']['download_url'])
        return response.json()['content']['download_url'], unique_filename
    else:
        print("None")
        return None
    
def calculate_iou(box1, box2):
    # 计算两个框的交集
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    intersection_area = max(0, x2 - x1) * max(0, y2 - y1)
    
    # 计算两个框的并集
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    
    union_area = box1_area + box2_area - intersection_area
    
    # 计算IOU
    iou = intersection_area / union_area
    return iou

def adjust_coordinates(box):
    size = 32
    (x1, y1, x2, y2) = box
    if x1 % size != 0:
        x1 = (x1 // size) * size
    if x2 % size != 0:
        x2 = (x2 // size + 1) * size
    
    if y1 % size != 0:
        y1 = (y1 // size) * size
    if y2 % size != 0:
        y2 = (y2 // size + 1) * size
    return (x1, y1, x2, y2)

def adjust_validation_box(validation_box):
    return [adjust_coordinates(box) for box in validation_box]

def get_list_layer_box(list_png_images):
    list_layer_box = []
    for img in list_png_images:
        img_np = np.array(img)
        alpha_channel = img_np[:, :, -1]

        # Step 1: Find the non-zero indices
        rows, cols = np.nonzero(alpha_channel)

        if (len(rows) == 0) or (len(cols) == 0):
            # If there are no non-zero indices, we can skip this layer
            list_layer_box.append((0, 0, 0, 0))
            continue

        # Step 2: Get the minimum and maximum indices for rows and columns
        min_row, max_row = rows.min().item(), rows.max().item()
        min_col, max_col = cols.min().item(), cols.max().item()

        # Step 3: Quantize the minimum values down to the nearest multiple of 8
        quantized_min_row = (min_row // 8) * 8
        quantized_min_col = (min_col // 8) * 8

        # Step 4: Quantize the maximum values up to the nearest multiple of 8 outside of the max
        quantized_max_row = ((max_row // 8) + 1) * 8
        quantized_max_col = ((max_col // 8) + 1) * 8
        list_layer_box.append(
            (quantized_min_col, quantized_min_row, quantized_max_col, quantized_max_row)
        )
    return list_layer_box

def pngs_to_svg(list_png_images):
    list_layer_box = get_list_layer_box(list_png_images)
    assert(len(list_png_images) == len(list_layer_box))
    width, height = list_png_images[0].width, list_png_images[0].height
    img_svg = ET.Element(
       'svg', 
        {
            "width": str(width),
            "height": str(height),
             "xmlns": "http://www.w3.org/2000/svg", 
             "xmlns:svg": "http://www.w3.org/2000/svg", 
             "xmlns:xlink":"http://www.w3.org/1999/xlink"                 
        }
    )
    for img, box in zip(list_png_images, list_layer_box):
        x, y, w, h = box[0], box[1], box[2]-box[0], box[3]-box[1]
        if (w == 0 or h == 0):
            continue
        img = img.crop((x, y, x+w, y+h))
        buffer = BytesIO()
        img.save(buffer, format='PNG')
        img_str = base64.b64encode(buffer.getvalue())
        ET.SubElement(
            img_svg,
            "image",
            {
                "x": str(x),
                "y": str(y),
                "width": str(w),
                "height": str(h),
                "xlink:href": "data:image/png;base64,"+img_str.decode('utf-8')
            }
        )
    return ET.tostring(img_svg, encoding='utf-8').decode('utf-8')

def calculate_iou(box1, box2):
    # 计算两个框的交集
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    intersection_area = max(0, x2 - x1) * max(0, y2 - y1)
    
    # 计算两个框的并集
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    
    union_area = box1_area + box2_area - intersection_area
    
    # 计算IOU
    iou = intersection_area / union_area
    return iou

# @spaces.GPU(enable_queue=True, duration=60)
def buildmodel(**kwargs):
    from modeling_crello import CrelloModel, CrelloModelConfig
    from quantizer import get_quantizer
    # seed / input model / resume
    resume = kwargs.get('resume', None)
    seed = kwargs.get('seed', None)
    input_model = kwargs.get('input_model', None)
    quantizer_version = kwargs.get('quantizer_version', 'v4')
    
    set_seed(seed)
    # old_tokenizer = AutoTokenizer.from_pretrained(input_model, trust_remote_code=True)
    old_tokenizer = AutoTokenizer.from_pretrained(
        "WYBar/LLM_For_Layout_Planning",  # 仓库路径
        subfolder="Meta-Llama-3-8B",       # 子目录对应模型文件夹
        trust_remote_code=True,
        cache_dir="/openseg_blob/v-yanbin/GradioDemo/cache_dir",
    )
    old_vocab_size = len(old_tokenizer)
    # tokenizer = AutoTokenizer.from_pretrained(resume, trust_remote_code=True)
    tokenizer = AutoTokenizer.from_pretrained(
        "WYBar/LLM_For_Layout_Planning",
        subfolder="checkpoint-26000",     # 检查点所在子目录
        trust_remote_code=True,
        cache_dir="/openseg_blob/v-yanbin/GradioDemo/cache_dir",
    )
    
    quantizer = get_quantizer(
                    quantizer_version, 
                    update_vocab = False,
                    decimal_quantize_types = kwargs.get('decimal_quantize_types'),
                    mask_values = kwargs['mask_values'],
                    width = kwargs['width'],
                    height = kwargs['height'],
                    simplify_json = False,
                    num_mask_tokens = 0, 
                    mask_type = kwargs.get('mask_type'),
                )
    quantizer.setup_tokenizer(tokenizer)  
      
    model_args = CrelloModelConfig(
        old_vocab_size = old_vocab_size,
        vocab_size=len(tokenizer),
        pad_token_id=tokenizer.pad_token_id,
        ignore_ids=tokenizer.convert_tokens_to_ids(quantizer.ignore_tokens), 
    )
    model_args.freeze_lm = False
    model_args.opt_version = input_model
    model_args.use_lora = False
    model_args.load_in_4bit = kwargs.get('load_in_4bit', False)
    # model = CrelloModel.from_pretrained(
    #     resume,
    #     config=model_args
    # ).to(device)
    
    model = CrelloModel.from_pretrained(
        "WYBar/LLM_For_Layout_Planning",
        subfolder="checkpoint-26000",      # 加载检查点目录
        config=model_args,
        cache_dir="/openseg_blob/v-yanbin/GradioDemo/cache_dir",
    ).to("cuda")
    # model = CrelloModel(config=model_args)
    
    tokenizer.add_special_tokens({"mask_token": "<mask>"}) 
    quantizer.additional_special_tokens.add("<mask>")
    added_special_tokens_list = ["<layout>", "<position>", "<wholecaption>"] 
    tokenizer.add_special_tokens({"additional_special_tokens": added_special_tokens_list}, replace_additional_special_tokens=False)
    for token in added_special_tokens_list:
        quantizer.additional_special_tokens.add(token)
        
    return model, quantizer, tokenizer

def construction_layout():
    params_dict = {  
        # 需要修改
        "input_model": "/openseg_blob/v-sirui/temporary/2024-02-21/Layout_train/COLEv2/Design_LLM/checkpoint/Meta-Llama-3-8B", 
        "resume": "/openseg_blob/v-sirui/temporary/2024-02-21/SVD/Int2lay_1016/checkpoint/int2lay_1031/1031_test/checkpoint-26000/",
        
        "seed": 0,  
        "mask_values": False,  
        "quantizer_version": 'v4',  
        "mask_type": 'cm3',  
        "decimal_quantize_types": [],  
        "num_mask_tokens": 0,  
        "width": 512,
        "height": 512,
        "device": 0,
    }  
    device = "cuda"
    # Init model
    model, quantizer, tokenizer = buildmodel(**params_dict)
    
    # print('resize token embeddings to match the tokenizer', 129423)
    # model.lm.resize_token_embeddings(129423)
    # model.input_embeddings = model.lm.get_input_embeddings()
    # print('after token embeddings to match the tokenizer', 129423)
    
    print("before .to(device)")
    model = model.to("cuda")
    print("after .to(device)")
    model = model.bfloat16()
    model.eval()
    # quantizer = quantizer.to("cuda")
    # tokenizer = tokenizer.to("cuda")
    model.lm = model.lm.to("cuda")
    return model, quantizer, tokenizer, params_dict["width"], params_dict["height"], device

@torch.no_grad()   
# @spaces.GPU(enable_queue=True, duration=60) 
def evaluate_v1(inputs, model, quantizer, tokenizer, width, height, device, do_sample=False, temperature=1.0, top_p=1.0, top_k=50):
    json_example = inputs
    input_intension = '{"wholecaption":"' + json_example["wholecaption"] + '","layout":[{"layer":'
    print("tokenizer1")
    inputs = tokenizer(
        input_intension, return_tensors="pt"
    ).to(model.lm.device)
    print("tokenizer2")
    
    stopping_criteria = StoppingCriteriaList()
    stopping_criteria.append(StopAtSpecificTokenCriteria(token_id_list=[128000]))

    print("lm1")
    outputs = model.lm.generate(**inputs, use_cache=True, max_length=8000, stopping_criteria=stopping_criteria, do_sample=do_sample, temperature=temperature, top_p=top_p, top_k=top_k)
    print("lm2")
    inputs_length = inputs['input_ids'].shape[1] 
    outputs = outputs[:, inputs_length:]
    
    outputs_word = tokenizer.batch_decode(outputs)[0]
    split_word = outputs_word.split('}]}')[0]+"}]}"
    split_word = '{"wholecaption":"' + json_example["wholecaption"].replace('\n', '\\n').replace('"', '\\"') + '","layout":[{"layer":' + split_word
    map_dict = quantizer.construct_map_dict()
    
    for key ,value in map_dict.items():
        split_word = split_word.replace(key, value)
    try:
        pred_json_example = json.loads(split_word)
        for layer in pred_json_example["layout"]:
            layer['x'] = round(int(width)*layer['x'])
            layer['y'] = round(int(height)*layer['y'])
            layer['width'] = round(int(width)*layer['width'])
            layer['height'] = round(int(height)*layer['height'])
    except Exception as e:
        print(e)
        pred_json_example = None
    return pred_json_example

def inference(generate_method, intention, model, quantizer, tokenizer, width, height, device, do_sample=True, temperature=1.0, top_p=1.0, top_k=50):
    def FormulateInput(intension: str):
        resdict = {}
        resdict["wholecaption"] = intension
        resdict["layout"] = []
        return resdict
    
    rawdata = FormulateInput(intention)
    
    if generate_method == 'v1':
        max_try_time = 5
        preddata = None
        while preddata is None and max_try_time > 0:
            preddata = evaluate_v1(rawdata, model, quantizer, tokenizer, width, height, device, do_sample=do_sample, temperature=temperature, top_p=top_p, top_k=top_k)
            max_try_time -= 1
    else:
        print("Please input correct generate method")
        preddata = None

    return preddata

# @spaces.GPU(enable_queue=True, duration=60)
def construction():
    from custom_model_mmdit import CustomFluxTransformer2DModel
    from custom_model_transp_vae import AutoencoderKLTransformerTraining as CustomVAE
    from custom_pipeline import CustomFluxPipelineCfg

    transformer = CustomFluxTransformer2DModel.from_pretrained(
        "WYBar/ART_test_weights",
        subfolder="fused_transformer", 
        torch_dtype=torch.bfloat16,
        cache_dir="/openseg_blob/v-yanbin/GradioDemo/cache_dir"
    )
    
    transp_vae = CustomVAE.from_pretrained(
        "WYBar/ART_test_weights",
        subfolder="custom_vae", 
        torch_dtype=torch.float32,
        cache_dir="/openseg_blob/v-yanbin/GradioDemo/cache_dir"
    )
    
    token = os.environ.get("HF_TOKEN")
    pipeline = CustomFluxPipelineCfg.from_pretrained(
        "black-forest-labs/FLUX.1-dev",
        transformer=transformer,
        torch_dtype=torch.bfloat16,
        token=token,
        cache_dir="/openseg_blob/v-yanbin/GradioDemo/cache_dir"
    ).to("cuda")
    pipeline.enable_model_cpu_offload(gpu_id=0) # Save GPU memory
    
    return pipeline, transp_vae

# @spaces.GPU(enable_queue=True, duration=60)
def test_one_sample(validation_box, validation_prompt, true_gs, inference_steps, pipeline, generator, transp_vae):
    print(validation_box)
    output, rgba_output, _, _ = pipeline(
        prompt=validation_prompt,
        validation_box=validation_box,
        generator=generator,
        height=512,
        width=512,
        num_layers=len(validation_box),
        guidance_scale=4.0,
        num_inference_steps=inference_steps,
        transparent_decoder=transp_vae,
        true_gs=true_gs
    )
    images = output.images   # list of PIL, len=layers
    rgba_images = [Image.fromarray(arr, 'RGBA') for arr in rgba_output]

    output_gradio = []
    merged_pil = images[1].convert('RGBA')
    for frame_idx, frame_pil in enumerate(rgba_images):
        if frame_idx < 2:
            frame_pil = images[frame_idx].convert('RGBA') # merged and background
        else:
            merged_pil = Image.alpha_composite(merged_pil, frame_pil)
        output_gradio.append(frame_pil)
    
    return output_gradio

def svg_test_one_sample(validation_prompt, validation_box_str, seed, true_gs, inference_steps, pipeline, transp_vae):
    generator = torch.Generator().manual_seed(seed)
    try:
        validation_box = ast.literal_eval(validation_box_str)
    except Exception as e:
        return [f"Error parsing validation_box: {e}"]
    if not isinstance(validation_box, list) or not all(isinstance(t, tuple) and len(t) == 4 for t in validation_box):
        return ["validation_box must be a list of tuples, each of length 4."]

    validation_box = adjust_validation_box(validation_box)
    
    result_images = test_one_sample(validation_box, validation_prompt, true_gs, inference_steps, pipeline, generator, transp_vae)
    
    svg_img = pngs_to_svg(result_images[1:])
    
    svg_file_path = './image.svg'
    os.makedirs(os.path.dirname(svg_file_path), exist_ok=True)
    with open(svg_file_path, 'w', encoding='utf-8') as f:
        f.write(svg_img)       
    
    return result_images, svg_file_path
    
def main():
    model, quantizer, tokenizer, width, height, device = construction_layout()
    
    inference_partial = partial(
        inference,
        model=model,
        quantizer=quantizer,
        tokenizer=tokenizer,
        width=width,
        height=height,
        device=device
    )
    
    def process_preddate(intention, temperature, top_p, generate_method='v1'):
        intention = intention.replace('\n', '').replace('\r', '').replace('\\', '')
        intention = ensure_space_after_period(intention)
        if temperature == 0.0:
            # print("looking for greedy decoding strategies, set `do_sample=False`.")
            preddata = inference_partial(generate_method, intention, do_sample=False)
        else:
            preddata = inference_partial(generate_method, intention, temperature=temperature, top_p=top_p)
        # wholecaption = preddata["wholecaption"]
        layouts = preddata["layout"]
        list_box = []
        for i, layout in enumerate(layouts):
            x, y = layout["x"], layout["y"]
            width, height = layout["width"], layout["height"]
            if i == 0:
                list_box.append((0, 0, width, height))
                list_box.append((0, 0, width, height))
            else:
                left = x - width // 2
                top = y - height // 2
                right = x + width // 2
                bottom = y + height // 2
                list_box.append((left, top, right, bottom))
                
        # print(list_box)
        filtered_boxes = list_box[:2]
        for i in range(2, len(list_box)):
            keep = True
            for j in range(1, len(filtered_boxes)):
                iou = calculate_iou(list_box[i], filtered_boxes[j])
                if iou > 0.65:
                    print(list_box[i], filtered_boxes[j])
                    keep = False
                    break
            if keep:
                filtered_boxes.append(list_box[i])
            
        return str(filtered_boxes), intention, str(filtered_boxes)
    
    # def process_preddate(intention, generate_method='v1'):
    #     list_box = [(0, 0, 512, 512), (0, 0, 512, 512), (136, 184, 512, 512), (144, 0, 512, 512), (0, 0, 328, 136), (160, 112, 512, 360), (168, 112, 512, 360), (40, 232, 112, 296), (32, 88, 248, 176), (48, 424, 144, 448), (48, 464, 144, 488), (240, 464, 352, 488), (384, 464, 488, 488), (48, 480, 144, 504), (240, 480, 360, 504), (456, 0, 512, 56), (0, 0, 56, 40), (440, 0, 512, 40), (0, 24, 48, 88), (48, 168, 168, 240)]
    #     wholecaption = "Design an engaging and vibrant recruitment advertisement for our company. The image should feature three animated characters in a modern cityscape, depicting a dynamic and collaborative work environment. Incorporate a light bulb graphic with a question mark, symbolizing innovation, creativity, and problem-solving. Use bold text to announce \"WE ARE RECRUITING\" and provide the company's social media handle \"@reallygreatsite\" and a contact phone number \"+123-456-7890\" for interested individuals. The overall design should be playful and youthful, attracting potential recruits who are innovative and eager to contribute to a lively team."
    #     json_file = "/home/wyb/openseg_blob/v-yanbin/GradioDemo/LLM-For-Layout-Planning/inference_test.json"
    #     return wholecaption, str(list_box), json_file

    pipeline, transp_vae = construction()

    gradio_test_one_sample_partial = partial(
        svg_test_one_sample,
        pipeline=pipeline,
        transp_vae=transp_vae,
    )
    
    def process_svg(text_input, tuple_input, seed, true_gs, inference_steps):
        result_images = []
        result_images, svg_file_path = gradio_test_one_sample_partial(text_input, tuple_input, seed, true_gs, inference_steps)

        url, unique_filename = upload_to_github(file_path=svg_file_path)
        unique_filename = f'{unique_filename}'
        
        if url != None:
            print(f"File uploaded to: {url}")
            svg_editor = f"""
                <iframe src="https://svgedit.netlify.app/editor/index.html?\
                storagePrompt=false&url={url}" \
                width="100%", height="800px"></iframe>
            """
        else:
            print('upload_to_github FAILED!')
            svg_editor = f"""
                <iframe src="https://svgedit.netlify.app/editor/index.html" \
                width="100%", height="800px"></iframe>
            """
        
        return result_images, svg_file_path, svg_editor
    
    def one_click_generate(intention_input, temperature, top_p, seed, true_gs, inference_steps):
        # 首先调用process_preddate
        list_box_output, intention_input, list_box_output = process_preddate(intention_input, temperature, top_p)
        
        # 然后将process_preddate的输出作为process_svg的输入
        result_images, svg_file, svg_editor = process_svg(intention_input, list_box_output, seed, true_gs, inference_steps)
        
        # 返回两个函数的输出
        return list_box_output, result_images, svg_file, svg_editor, intention_input, list_box_output

    def clear_inputs1():
        return "", ""
    
    def clear_inputs2():
        return "", ""
    
    def transfer_inputs(intention, list_box):
        return intention, list_box
    
    theme = gr.themes.Soft(
        radius_size="lg",
    ).set(
        block_background_fill='*primary_50',
        block_border_color='*primary_200',
        block_border_width='1px',
        block_border_width_dark='100px',
        block_info_text_color='*primary_950',
        block_label_border_color='*primary_200',
        block_radius='*radius_lg'
    )
        
    with gr.Blocks(theme=theme) as demo:
        gr.HTML("<h1 style='text-align: center;'>ART: Anonymous Region Transformer for Variable Multi-Layer Transparent Image Generation</h1>")
        gr.HTML("<h2>Anonymous Region Layout Planner</h2>")
    
        with gr.Row():
            with gr.Column():
                intention_input = gr.Textbox(lines=15, placeholder="Enter intention", label="Prompt")
                with gr.Row():
                    temperature_input=gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Temperature", value=0.0)
                    top_p_input=gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Top P", value=0.0)
                with gr.Row():
                    clear_btn1 = gr.Button("Clear")
                    model_btn1 = gr.Button("Commit", variant='primary')
                    transfer_btn1 = gr.Button("Export to below")
                    
                one_click_btn = gr.Button("One Click Generate ALL", variant='primary')
        
            with gr.Column():
                list_box_output = gr.Textbox(lines=10, placeholder="Validation Box", label="Validation Box")
           
        examples = gr.Examples(
            examples=[
                ['The image is a graphic design with a celebratory theme. At the top, there is a banner with the text \"Happy Anniversary\" in a bold, sans-serif font. Below this banner, there is a circular frame containing a photograph of a couple. The man has short, dark hair and is wearing a light-colored sweater, while the woman has long blonde hair and is also wearing a light-colored sweater. They are both smiling and appear to be embracing each other.Surrounding the circular frame are decorative elements such as pink flowers and green leaves, which add a festive touch to the design. Below the circular frame, there is a text that reads "Isabel & Morgan" in a cursive, elegant font, suggesting that the couple\'s names are Isabel and Morgan.At the bottom of the image, there is a banner with a message that says "Happy Anniversary! Cheers to another year of love, laughter, and cherished memories together.\" This text is in a smaller, sans-serif font and is placed against a solid background, providing a clear message of celebration and well-wishes for the couple.The overall style of the image is warm and celebratory, with a color scheme that includes shades of pink, green, and white, which contribute to a joyful and romantic atmosphere.'],
                ['The image is a digital illustration with a light blue background. At the top, there is a logo consisting of a snake wrapped around a staff, which is a common symbol in healthcare. Below the logo, the text "International Nurses Day" is prominently displayed in white, with the date "12 May 20xx" in smaller font size.The central part of the image features two stylized characters. On the left, there is a female character with dark hair, wearing a white nurse\'s uniform with a cap. She is holding a clipboard and appears to be speaking or gesturing, as indicated by a speech bubble with the word "OK" in it. On the right, there is a male character with light brown hair, wearing a light blue shirt with a white collar and a white apron. He is holding a stethoscope to his ear, suggesting he is a doctor or a healthcare professional.The characters are depicted in a friendly and approachable manner, with smiles on their faces. Around them, there are small blue plus signs, which are often associated with healthcare and medical services. The overall style of the image is clean, modern, and appears to be designed to celebrate International Nurses Day.'],
                ['The image features a graphic design with a festive theme. At the top, there is a decorative border with a wavy pattern. Below this border, the text "WINTER SEASON SPECIAL COOKIES" is prominently displayed in a bold, sans-serif font. The text is black with a slight shadow effect, giving it a three-dimensional appearance.In the center of the image, there are three illustrated gingerbread cookies. Each cookie has a smiling face with eyes, a nose, and a mouth, and they are colored in a warm, brown hue. The cookies are arranged in a staggered formation, with the middle cookie slightly higher than the others, creating a sense of depth.At the bottom of the image, there is a call to action that reads "ORDER.NOW" in a large, bold, sans-serif font. The text is colored in a darker shade of brown, contrasting with the lighter background. The overall style of the image suggests it is an advertisement or promotional graphic for a winter-themed cookie special.']
            ],
            inputs=[intention_input]
        )
        
        gr.HTML("<h2>Anonymous Region Transformer</h2>")
        with gr.Row():
            with gr.Column():
                text_input = gr.Textbox(lines=10, placeholder="Enter prompt text", label="Prompt")
                tuple_input = gr.Textbox(lines=5, placeholder="Enter list of tuples, e.g., [(1, 2, 3, 4), (5, 6, 7, 8)]", label="Validation Box")
                with gr.Row():
                    true_gs_input=gr.Slider(minimum=3.0, maximum=5.0, step=0.1, label="true_gs", value=3.5)
                    inference_steps_input=gr.Slider(minimum=5, maximum=50, step=1, label="inference_steps", value=28)
                with gr.Row():
                    seed_input = gr.Number(label="Seed", value=42)
                with gr.Row():
                    transfer_btn2 = gr.Button("Import from above")
                with gr.Row():
                    clear_btn2 = gr.Button("Clear")
                    model_btn2 = gr.Button("Commit", variant='primary')
                
            with gr.Column():
                result_images = gr.Gallery(label="Result Images", columns=5, height='auto')
                
        gr.HTML("<h1>SVG Image</h1>")
        svg_file = gr.File(label="Download SVG Image")
        svg_editor = gr.HTML(label="Editable SVG Editor")
        
        model_btn1.click(
            fn=process_preddate, 
            inputs=[intention_input, temperature_input, top_p_input], 
            outputs=[list_box_output, text_input, tuple_input], 
            api_name="process_preddate"
        )
        clear_btn1.click(
            fn=clear_inputs1, 
            inputs=[], 
            outputs=[intention_input, list_box_output]
        )
        model_btn2.click(
            fn=process_svg, 
            inputs=[text_input, tuple_input, seed_input, true_gs_input, inference_steps_input], 
            outputs=[result_images, svg_file, svg_editor], 
            api_name="process_svg"
        )
        clear_btn2.click(
            fn=clear_inputs2, 
            inputs=[], 
            outputs=[text_input, tuple_input]
        )
        transfer_btn1.click(
            fn=transfer_inputs, 
            inputs=[intention_input, list_box_output], 
            outputs=[text_input, tuple_input]
        )
        transfer_btn2.click(
            fn=transfer_inputs, 
            inputs=[intention_input, list_box_output], 
            outputs=[text_input, tuple_input]
        )
        one_click_btn.click(
            fn=one_click_generate, 
            inputs=[intention_input, temperature_input, top_p_input, seed_input, true_gs_input, inference_steps_input], 
            outputs=[list_box_output, result_images, svg_file, svg_editor, text_input, tuple_input]
        )
    demo.launch(server_name='0.0.0.0', server_port=7860)

if __name__ == "__main__":
    main()