File size: 6,639 Bytes
39417b0
 
 
 
 
 
 
 
d5b9c19
39417b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5b9c19
 
 
39417b0
d5b9c19
 
39417b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7acd15c
df04643
39417b0
df04643
39417b0
df04643
39417b0
 
 
 
 
7acd15c
 
 
 
 
39417b0
 
 
 
d5b9c19
 
110d9e4
 
39417b0
 
 
110d9e4
39417b0
110d9e4
d5b9c19
110d9e4
 
39417b0
 
 
 
 
 
 
 
 
 
 
 
110d9e4
 
39417b0
d5b9c19
39417b0
 
 
 
 
 
 
d5b9c19
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import argparse

import gradio as gr
from PIL import Image
import os
import torch
import numpy as np
import yaml
from huggingface_hub import hf_hub_download
#from gradio_imageslider import ImageSlider

## local code
from models import instructir
from text.models import LanguageModel, LMHead


def dict2namespace(config):
    namespace = argparse.Namespace()
    for key, value in config.items():
        if isinstance(value, dict):
            new_value = dict2namespace(value)
        else:
            new_value = value
        setattr(namespace, key, new_value)
    return namespace


hf_hub_download(repo_id="marcosv/InstructIR", filename="im_instructir-7d.pt", local_dir="./")
hf_hub_download(repo_id="marcosv/InstructIR", filename="lm_instructir-7d.pt", local_dir="./")

CONFIG     = "configs/eval5d.yml"
LM_MODEL   = "lm_instructir-7d.pt"
MODEL_NAME = "im_instructir-7d.pt"

# parse config file
with open(os.path.join(CONFIG), "r") as f:
    config = yaml.safe_load(f)

cfg = dict2namespace(config)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model = instructir.create_model(input_channels =cfg.model.in_ch, width=cfg.model.width, enc_blks = cfg.model.enc_blks, 
                            middle_blk_num = cfg.model.middle_blk_num, dec_blks = cfg.model.dec_blks, txtdim=cfg.model.textdim)
model = model.to(device)
print ("IMAGE MODEL CKPT:", MODEL_NAME)
model.load_state_dict(torch.load(MODEL_NAME, map_location="cpu"), strict=True)

os.environ["TOKENIZERS_PARALLELISM"] = "false"
LMODEL = cfg.llm.model
language_model = LanguageModel(model=LMODEL)
lm_head = LMHead(embedding_dim=cfg.llm.model_dim, hidden_dim=cfg.llm.embd_dim, num_classes=cfg.llm.nclasses)
lm_head = lm_head.to(device)

print("LMHEAD MODEL CKPT:", LM_MODEL)
lm_head.load_state_dict(torch.load(LM_MODEL, map_location="cpu"), strict=True)


def load_img (filename, norm=True,):
    img = np.array(Image.open(filename).convert("RGB"))
    if norm:
        img = img / 255.
        img = img.astype(np.float32)
    return img


def process_img (image, prompt):
    img = np.array(image)
    img = img / 255.
    img = img.astype(np.float32)
    y = torch.tensor(img).permute(2,0,1).unsqueeze(0).to(device)

    lm_embd = language_model(prompt)
    lm_embd = lm_embd.to(device)

    with torch.no_grad():
        text_embd, deg_pred = lm_head (lm_embd)
        x_hat = model(y, text_embd)

    restored_img = x_hat.squeeze().permute(1,2,0).clamp_(0, 1).cpu().detach().numpy()
    restored_img = np.clip(restored_img, 0. , 1.)

    restored_img = (restored_img * 255.0).round().astype(np.uint8)  # float32 to uint8
    return Image.fromarray(restored_img) #(image, Image.fromarray(restored_img))



title = "Digital Image Processing Project"
description = ''' ## High-Quality Image Restoration Following Human Instructions

Arindham Srinivasan (21BCE1262), Arvind Venkat Ramanan (21BCE1160)

Vellore Institute of Technology


<details>
<summary> <b> Abstract</b> (click me to read)</summary>
<p>
The project explores novel avenues in high-quality image generation, integrating human-written instructions to guide image restoration processes. In an era where image restoration remains a crucial task for enhancing visual quality, traditional methods often rely on predefined algorithms tailored to specific degradation types. However, the project pioneers a paradigm shift by harnessing human instructions to inform the restoration model's decision-making process.
By utilizing natural language prompts, the project's model adeptly restores high-quality images from their degraded counterparts, accommodating a myriad of degradation types such as noise, blur, rain, haze, and low-light conditions. This approach signifies a departure from conventional restoration methodologies, offering a more intuitive and user-friendly approach to image enhancement.
Results from the project demonstrate significant advancements in image restoration, with state-of-the-art performance across various restoration tasks. The project's model outperforms previous all-in-one restoration methods by an average improvement of +1dB, showcasing its efficacy in producing superior image quality.
Furthermore, the project introduces a comprehensive dataset tailored to text-guided image restoration and enhancement, providing a benchmark for future research endeavors in this domain. By fostering a deeper integration of human input into the image restoration process, the project lays the groundwork for a more collaborative and user-centric approach to visual content enhancement.
In summary, the project's innovative utilization of human-written instructions represents a significant step forward in high-quality image generation. By harnessing the power of natural language prompts, the project empowers users to actively participate in the image restoration process, ultimately leading to superior visual outcomes.
</p>
</details>

'''

#### Image,Prompts examples
examples = [['images/a4960.jpg', "my colors are too off, make it pop so I can use it in instagram"],
            ['images/rain-020.png', "I love this photo, could you remove the raindrops? please keep the content intact"],
            ['images/gradio_demo_images/city.jpg', "I took this photo during a foggy day, can you improve it?"], 
            ['images/gradio_demo_images/frog.png', "can you remove the tiny dots in the image? it is very unpleasant"], 
            ["images/lol_748.png", "my image is too dark, I cannot see anything, can you fix it?"], 
            ["images/lol_22.png", "Increase the brightness of my photo please, I want to see totoro"], 
            ["images/gopro.png", "I took this photo while I was running, can you stabilize the image? it is too blurry"],
            ["images/GOPR0871_11_00-000075-min.png", "Correct the motion blur in this image so it is more clear"],
            ["images/a0010.jpg", "please I want this image for my photo album, can you edit it as a photographer"],
            ["images/real_fog.png", "How can I remove the fog and mist from this photo?"]
            ]

css = """
    .image-frame img, .image-container img {
        width: auto;
        height: auto;
        max-width: none;
    }
"""

demo = gr.Interface(
    fn=process_img,
    inputs=[
            gr.Image(type="pil", label="Input", value="images/a4960.jpg"),
            gr.Text(label="Prompt", value="my colors are too off, make it pop so I can use it in instagram")
    ],
    outputs=[gr.Image(type="pil", label="Ouput")],
    title=title,
    description=description,
    examples=examples,
    css=css,
)

if __name__ == "__main__":
    demo.launch()