shahad-b commited on
Commit
242f316
Β·
verified Β·
1 Parent(s): 92506d5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ from diffusers import StableDiffusionPipeline
4
+ import torch
5
+ import wget
6
+
7
+ # Define the device to use (either "cuda" for GPU or "cpu" for CPU)
8
+ device = "cuda" if torch.cuda.is_available() else "cpu"
9
+
10
+ # Load the models
11
+ # Image captioning model to generate captions from uploaded images
12
+ caption_image = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large", device=device)
13
+ # Stable Diffusion model for generating new images based on captions
14
+ sd_pipeline = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5").to(device)
15
+
16
+ # Load the translation model (English to Arabic)
17
+ translator = pipeline(
18
+ task="translation",
19
+ model="facebook/nllb-200-distilled-600M",
20
+ torch_dtype=torch.bfloat16,
21
+ device=device
22
+ )
23
+
24
+ # Download the image
25
+ url1 = "https://github.com/Shahad-b/Image-database/blob/main/sea.jpg?raw=true"
26
+ sea = wget.download(url1)
27
+
28
+ url2 = "https://github.com/Shahad-b/Image-database/blob/main/Cat.jpeg?raw=true"
29
+ Cat = wget.download(url2)
30
+
31
+ url3 = "https://github.com/Shahad-b/Image-database/blob/main/Car.jpeg?raw=true"
32
+ Car = wget.download(url3)
33
+
34
+ # Function to generate images based on the image's caption
35
+ def generate_image_and_translate(image, num_images=1):
36
+ # Generate caption in English from the uploaded image
37
+ caption_en = caption_image(image)[0]['generated_text']
38
+
39
+ # Translate the English caption to Arabic
40
+ caption_ar = translator(caption_en, src_lang="eng_Latn", tgt_lang="arb_Arab")[0]['translation_text']
41
+
42
+ generated_images = []
43
+
44
+ # Generate the specified number of images based on the English caption
45
+ for _ in range(num_images):
46
+ generated_image = sd_pipeline(prompt=caption_en).images[0]
47
+ generated_images.append(generated_image)
48
+
49
+ # Return the generated images along with both captions
50
+ return generated_images, caption_en, caption_ar
51
+
52
+ # Set up the Gradio interface
53
+ interface = gr.Interface(
54
+ fn=generate_image_and_translate, # Function to call when processing input
55
+ inputs=[
56
+ gr.Image(type="pil", label="πŸ“€ Upload Image"), # Input for image upload
57
+ gr.Slider(minimum=1, maximum=10, label="πŸ”’ Number of Images", value=1, step=1) # Slider to select number of images
58
+ ],
59
+ outputs=[
60
+ gr.Gallery(label="πŸ–ΌοΈ Generated Images"),
61
+ gr.Textbox(label="πŸ“ Generated Caption (English)", interactive=False),
62
+ gr.Textbox(label="🌍 Translated Caption (Arabic)", interactive=False)
63
+ ],
64
+ title="Image Generation and Captioning", # Title of the interface
65
+ description="Upload an image to extract a caption and display it in both Arabic and English. Then, a new image will be generated based on that caption.", # Description
66
+ examples=[ # Example input
67
+ ["sea.jpg", 3],
68
+ ["Cat.jpeg", 4],
69
+ ["Car.jpeg", 2]
70
+ ],
71
+ theme='freddyaboulton/dracula_revamped' # Determine theme
72
+ )
73
+
74
+ # Launch the Gradio application
75
+ interface.launch()