Spaces:
Sleeping
Sleeping
import os | |
import pdfplumber | |
import gradio as gr | |
from transformers import pipeline | |
import torch | |
import spaces | |
zero = torch.Tensor([0]).cuda() | |
print(zero.device) | |
device = 0 if torch.cuda.is_available() else -1 | |
# Load LLM model for classification | |
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli",device=device) | |
# Define request types and subtypes | |
request_types = [ | |
"Adjustment", | |
"AU Transfer", | |
"Closing Notice", | |
"Commitment Change", | |
"Fee Payment", | |
"Money Movement - Inbound", | |
"Money Movement - Outbound" | |
] | |
sub_request_types = { | |
"Closing Notice": ["Reallocation Fees", "Amendment Fees", "Reallocation Principal"], | |
"Commitment Change": ["Cashless Roll", "Decrease", "Increase"], | |
"Fee Payment": ["Ongoing Fee", "Letter of Credit Fee"], | |
"Money Movement - Inbound": ["Principal", "Interest", "Principal + Interest", "Principal + Interest + Fee"], | |
"Money Movement - Outbound": ["Timebound", "Foreign Currency"] | |
} | |
# Function to extract text from PDFs | |
def extract_text_from_pdf(pdf_path): | |
with pdfplumber.open(pdf_path) as pdf: | |
text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text()) | |
return text | |
# Function to classify emails | |
def classify_text(text): | |
result = classifier(text, request_types) | |
main_category = result["labels"][0] | |
confidence = result["scores"][0] | |
if main_category in sub_request_types.keys(): | |
sub_type_result = classifier(text, sub_request_types.get(main_category, [])) | |
sub_category = sub_type_result["labels"][0] if sub_type_result["labels"] else "Unknown" | |
else: | |
sub_category = "Unknown" | |
return f"Request Type: {main_category}\nSub Request Type: {sub_category}\nConfidence Score: {confidence:.2f}" | |
# Gradio UI | |
def process_pdf(file): | |
text = extract_text_from_pdf(file.name) | |
return classify_text(text) | |
iface = gr.Interface( | |
fn=process_pdf, | |
inputs=gr.File(type="filepath", file_types=[".pdf"]), # Use 'filepath' instead of 'file' | |
outputs="text", | |
title="Email Request Type Classification", | |
description="Upload a PDF file containing loan servicing requests, and the model will classify its request type." | |
) | |
if __name__ == "__main__": | |
iface.launch() |