Spaces:

Mayada
/

Visual-arabic-question-generator-2023

Running

App Files Files Community

Mayada commited on Sep 10

Commit

f3c07ed

•

1 Parent(s): b3bcaf6

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -20

app.py CHANGED Viewed

@@ -1,24 +1,115 @@
 import gradio as gr
-from transformers import pipeline
-# Load your image captioning model from Hugging Face
-model_name = "Mayada/AIC-transformer"  # Update this with your model path
-captioner = pipeline("image-to-text", model=model_name)
-# Define a function to generate a caption from an image
-def generate_caption(image):
-    result = captioner(image)
-    return result[0]['generated_text']
-# Create a Gradio interface
-interface = gr.Interface(
-    fn=generate_caption,  # Function to process image and return caption
-    inputs=gr.inputs.Image(type="pil"),  # Accept image input
-    outputs="text",  # Output the caption as text
-    title="AIC-transformer-2023",  # Title for your interface
-    description="Description",  # Description for users
 )
-# Launch the Gradio interface
-interface.launch()

 import gradio as gr
+from gradio.themes.base import Base
+from PIL import Image
+import torch
+import torchvision.transforms as transforms
+from transformers import VisionEncoderDecoderModel, AutoTokenizer, AutoModelForSeq2SeqLM
+# Load the models
+caption_model = VisionEncoderDecoderModel.from_pretrained('Mayada/AIC-transformer')  # Your model on Hugging Face
+caption_tokenizer = AutoTokenizer.from_pretrained('aubmindlab/bert-base-arabertv02')
+question_model = AutoModelForSeq2SeqLM.from_pretrained("Mihakram/AraT5-base-question-generation")
+question_tokenizer = AutoTokenizer.from_pretrained("Mihakram/AraT5-base-question-generation")
+# Define the normalization and transformations
+normalize = transforms.Normalize(
+    mean=[0.485, 0.456, 0.406],  # ImageNet mean
+    std=[0.229, 0.224, 0.225]  # ImageNet standard deviation
 )
+inference_transforms = transforms.Compose([
+    transforms.Resize((224, 224)),
+    transforms.ToTensor(),
+    normalize
+])
+# Load the dictionary (use it from your Hugging Face Space or include in the repo)
+dictionary = {
+    "caption": "alternative_caption"  # Replace with your actual dictionary
+}
+# Function to correct words in the caption using the dictionary
+def correct_caption(caption):
+    corrected_words = [dictionary.get(word, word) for word in caption.split()]
+    corrected_caption = " ".join(corrected_words)
+    return corrected_caption
+# Function to generate captions for an image
+def generate_captions(image):
+    img_tensor = inference_transforms(image).unsqueeze(0)
+    generated = caption_model.generate(
+        img_tensor,
+        num_beams=3,
+        max_length=10,
+        early_stopping=True,
+        do_sample=True,
+        top_k=1000,
+        num_return_sequences=1,
+    )
+    captions = [caption_tokenizer.decode(g, skip_special_tokens=True).strip() for g in generated]
+    return captions
+# Function to generate questions given a context and answer
+def generate_questions(context, answer):
+    text = "context: " + context + " " + "answer: " + answer + " </s>"
+    text_encoding = question_tokenizer.encode_plus(
+        text, return_tensors="pt"
+    )
+    question_model.eval()
+    generated_ids = question_model.generate(
+        input_ids=text_encoding['input_ids'],
+        attention_mask=text_encoding['attention_mask'],
+        max_length=64,
+        num_beams=5,
+        num_return_sequences=1
+    )
+    questions = [question_tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True).replace(
+        'question: ', ' ') for g in generated_ids]
+    return questions
+# Gradio Interface Function
+def caption_question_interface(image):
+    captions = generate_captions(image)
+    corrected_captions = [correct_caption(caption) for caption in captions]
+    questions_with_answers = []
+    for caption in corrected_captions:
+        words = caption.split()
+        if len(words) > 0:
+            answer = words[0]
+            question = generate_questions(caption, answer)
+            questions_with_answers.extend([(q, answer) for q in question])
+        if len(words) > 1:
+            answer = words[1]
+            question = generate_questions(caption, answer)
+            questions_with_answers.extend([(q, answer) for q in question])
+        if len(words) > 1:
+            answer = " ".join(words[:2])
+            question = generate_questions(caption, answer)
+            questions_with_answers.extend([(q, answer) for q in question])
+        if len(words) > 2:
+            answer = words[2]
+            question = generate_questions(caption, answer)
+            questions_with_answers.extend([(q, answer) for q in question])
+        if len(words) > 3:
+            answer = words[3]
+            question = generate_questions(caption, answer)
+            questions_with_answers.extend([(q, answer) for q in question])
+    formatted_questions = [f"Question: {q}\nAnswer: {a}" for q, a in questions_with_answers]
+    formatted_questions = "\n".join(formatted_questions)
+    return "\n".join(corrected_captions), formatted_questions
+gr_interface = gr.Interface(
+    fn=caption_question_interface,
+    inputs=gr.inputs.Image(type="pil", label="Input Image"),
+    outputs=[
+        gr.outputs.Textbox(label="Generated Captions"),
+        gr.outputs.Textbox(label="Generated Questions and Answers")
+    ],
+    title="Image Captioning and Question Generation",
+    description="Generate captions and questions for images using pre-trained models."
+)
+gr_interface.launch()