|
import gradio as gr |
|
import requests |
|
import re |
|
import fitz |
|
|
|
def extract_text_from_pdf(pdf_file_path): |
|
doc = fitz.open(pdf_file_path) |
|
text = "" |
|
for page in doc: |
|
text+=page.get_text() |
|
|
|
return text |
|
|
|
API_URL = "https://api-inference.huggingface.co/models/potsawee/t5-large-generation-squad-QuestionAnswer" |
|
headers = {"Authorization": "Bearer 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"} |
|
|
|
def query(payload): |
|
response = requests.post(API_URL, headers=headers, json=payload) |
|
return response.json() |
|
|
|
def generate_question_answer_pairs(input_file): |
|
if input_file is None: |
|
return "Please upload a file" |
|
|
|
pdf_text = extract_text_from_pdf(input_file) |
|
|
|
sentences = re.split(r'(?<=[.!?])', pdf_text) |
|
outputs = [] |
|
|
|
for sentence in sentences: |
|
if sentence.strip(): |
|
output = query({ |
|
"inputs": sentence, |
|
}) |
|
outputs.append(output) |
|
|
|
return outputs |
|
|
|
title = "Question-Answer Pairs Generation" |
|
input_file = gr.File(label="Upload a PDF file") |
|
output_text = gr.Textbox() |
|
|
|
interface = gr.Interface( |
|
fn=generate_question_answer_pairs, |
|
inputs=input_file, |
|
outputs=output_text, |
|
title=title, |
|
) |
|
interface.launch() |