Spaces:

Libroru
/

Arnold

Runtime error

App Files Files Community

Libroru commited on Jan 15, 2024

Commit

612b7f5

verified ·

1 Parent(s): b2cb9a7

Upload 15 files

Browse files

Files changed (11) hide show

.gitattributes +2 -0
app.py +123 -47
img/.DS_Store +0 -0
img/icon-dark.png +3 -0
img/icon-light.png +3 -0
img/test_img.png +0 -0
microsofttt.py +154 -0
pdf_with_tables/test.pdf +0 -0
requirements.txt +7 -1
style.css +6 -8
theme.py +2 -2

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+img/icon-dark.png filter=lfs diff=lfs merge=lfs -text
+img/icon-light.png filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -1,25 +1,35 @@
-import os
-import gradio
-import time, asyncio
 from theme import CustomTheme
-from llama_index.llms import OpenAI
 from llama_index import (
-    ServiceContext,
     SimpleDirectoryReader,
-    VectorStoreIndex,
-    load_index_from_storage,
     StorageContext,
-    set_global_service_context,
 )
 bot_examples = [
     "Wie kannst du mir helfen?",
     "Welche Sprachen sprichst du?",
     "Wie trainiere ich meinen Bizeps?",
     "Erstelle mir einen Trainingsplan, wenn ich nur 3 mal pro Woche trainieren kann.",
-    "Berechne meinen BMI, wenn ich 75kg bei 175cm Körpergröße wiege.",
-    "Berechne mir meinen Kaloriendefizit, wenn ich in der Woche 0,2kg abnehmen möchte.",
-    "Berechne mir nochmal das Kaloriendefizit, wenn ich Männlich 19 bin.",
     "Wie wechsle ich meine Reifen?"
 ]
@@ -50,6 +60,61 @@ context_str = (
 chat_engine = None
 def setup_ai():
     """
     Setup the AI for use with querying questions to OpenAI.
@@ -59,31 +124,29 @@ def setup_ai():
     assigns the context_template and system_prompt used for manipulating
     the AI responses.
     """
-    global chat_engine, context_str, system_prompt
-    # Check if storage index exists
-    if not os.path.isdir("storage"):
-        print("Directory does not exist")
-        print("Building Index")
-        documents = SimpleDirectoryReader("data").load_data()
-        index = VectorStoreIndex.from_documents(documents)
-        index.storage_context.persist(persist_dir="storage")
-    else:
-        print("Directory does already exist")
-        print("Reusing index")
-        storage_context = StorageContext.from_defaults(persist_dir="storage")
-        index = load_index_from_storage(storage_context)
     api_key = os.environ["OPENAI_API_KEY"]
-    llm = OpenAI(temperature=0.1, model="gpt-4")
-    chat_engine = index.as_chat_engine(chat_mode="context", system_prompt=system_prompt, context_template=context_str)
-    service_context = ServiceContext.from_defaults(
-        llm=llm
     )
-    set_global_service_context(service_context)
 def response(message, history):
     """
@@ -99,18 +162,21 @@ def response(message, history):
     # If we don't assign an empty list if nothing is present,
     # then the program will-in the worst case-crash.
     chat_history = chat_engine.chat_history if chat_engine.chat_history is not None else []
-    print("Sending request to ChatGPT")
-    response = chat_engine.stream_chat(message, chat_history)
-    output_text = ""
-    for token in response.response_gen:
-        time.sleep(0.05)
         output_text += token
         yield output_text
 # For debugging, just to check if the UI looks good.
-def response_no_api(message, history):
     """
     Returns a default message.
     """
@@ -131,20 +197,30 @@ def main():
         elem_classes=["ask-button"],
     )
-    chat_interface = gradio.ChatInterface(
-        fn=response,
-        title="A.R.N.O.L.D.",
-        theme=CustomTheme(),
-        submit_btn=submit_button,
-        chatbot=chatbot,
-        examples=bot_examples,
-        css="style.css",
-    )
     chat_interface.queue()
     chat_interface.launch(
-        inbrowser=True
     )
 if __name__ == "__main__":
     main()

+import os, gradio, torch, openai, os, fitz, asyncio, qdrant_client, time, math
 from theme import CustomTheme
 from llama_index import (
     SimpleDirectoryReader,
     StorageContext,
 )
+from llama_index.multi_modal_llms import OpenAIMultiModal
+from llama_index.vector_stores.qdrant import QdrantVectorStore
+from llama_index.indices.multi_modal.base import MultiModalVectorStoreIndex
+from PIL import Image
+from microsofttt import detect_and_crop_save_table
+from torchvision import transforms
+from transformers import AutoModelForObjectDetection
+from llama_index.vector_stores.qdrant import QdrantVectorStore
+device = "cuda" if torch.cuda.is_available() else "cpu"
+openai.api_key = os.environ["OPENAI_API_KEY"]
+image_documents: None
+openai_mm_llm: None
 bot_examples = [
     "Wie kannst du mir helfen?",
     "Welche Sprachen sprichst du?",
     "Wie trainiere ich meinen Bizeps?",
     "Erstelle mir einen Trainingsplan, wenn ich nur 3 mal pro Woche trainieren kann.",
+    "Berechne meinen BMI, wenn ich männlich bin und 75kg bei 175cm Körpergröße wiege.",
+    "Berechne mir meinen Kaloriendefizit, wenn ich in der Woche 0,1kg abnehmen möchte.",
+    "Berechne mir nochmal das Kaloriendefizit, wenn ich Männlich 18 bin.",
     "Wie wechsle ich meine Reifen?"
 ]
 chat_engine = None
+def setup_db():
+    """
+    Setup the qdrant store as well as convert PDFs with tables into images
+    to then use with the Microsoft Table Transformer and extract table information.
+    """
+    if not os.path.exists("./qdrant_db"):
+        if not os.path.exists("./table_images"):
+            os.mkdir("./table_images/")
+        # Convert PDFs to images
+        for file in os.listdir("./pdf_with_tables"):
+            pdf_document = fitz.open("./pdf_with_tables/"+file)
+            for page_number in range(pdf_document.page_count):
+                # Get the page
+                page = pdf_document[page_number]
+                # Convert the page to an image
+                pix = page.get_pixmap()
+                # Create a Pillow Image object from the pixmap
+                image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+                # Save the image
+                image.save(f"./table_images/page_{page_number + 1}_{math.floor(time.time())}.png")
+            pdf_document.close()
+        # Crop images to tables
+        for image in os.listdir("./table_images"):
+            detect_and_crop_save_table("./table_images/"+image)
+            # Delete old uncropped image
+            os.remove("./table_images/"+image)
+    # Read text documents and images
+    text_documents = SimpleDirectoryReader("./data/").load_data()
+    image_documents = SimpleDirectoryReader("./table_images/").load_data()
+    # Create the text and image databases
+    client = qdrant_client.QdrantClient(path="qdrant_db")
+    text_store = QdrantVectorStore(
+        client=client, collection_name="text_collection"
+    )
+    image_store = QdrantVectorStore(
+        client=client, collection_name="image_collection"
+    )
+    # Create a storage_context for the chatbot from the databases
+    storage_context = StorageContext.from_defaults(
+        vector_store=text_store, image_store=image_store
+    )
+    return (text_documents, image_documents, storage_context)
 def setup_ai():
     """
     Setup the AI for use with querying questions to OpenAI.
     assigns the context_template and system_prompt used for manipulating
     the AI responses.
     """
+    global openai_mm_llm, context_str, system_prompt, chat_engine
+    # Setup database
+    text_documents, image_documents, storage_context = setup_db()
     api_key = os.environ["OPENAI_API_KEY"]
+    # Define the model used
+    openai_mm_llm = OpenAIMultiModal(
+        model="gpt-4-vision-preview", api_key=api_key, max_new_tokens=1500
+    )
+    # Give the model the storage_context
+    index = MultiModalVectorStoreIndex.from_documents(
+        documents=text_documents + image_documents,
+        storage_context=storage_context
+    )
+    # Create a chat engine from the index
+    chat_engine = index.as_chat_engine(
+        system_prompt=system_prompt,
+        context_str=context_str
     )
 def response(message, history):
     """
     # If we don't assign an empty list if nothing is present,
     # then the program will-in the worst case-crash.
     chat_history = chat_engine.chat_history if chat_engine.chat_history is not None else []
+    # Send query
+    _response = chat_engine.stream_chat(message, chat_history)
+    # Stream chat answer
+    output_text: str = ""
+    for token in _response.response_gen:
+        time.sleep(0.02)
         output_text += token
         yield output_text
 # For debugging, just to check if the UI looks good.
+def response_no_api(message, history) -> str:
     """
     Returns a default message.
     """
         elem_classes=["ask-button"],
     )
+    with gradio.Blocks(theme=CustomTheme(), css="style.css") as chat_interface:
+        gradio.Markdown(
+            """<div style='display: flex; justify-content: center; align-items: center; margin-right: 12px;'>
+                <img width='48px' style='margin-right: 12px;' src='/file/img/icon-light.png'/>
+                 ARNOLD
+               </div>""",
+            elem_classes=["arnold-title"]
+        )
+        gradio.ChatInterface(
+            fn=response,
+            theme=CustomTheme(),
+            submit_btn=submit_button,
+            chatbot=chatbot,
+            examples=bot_examples,
+            css="style.css",
+        )
     chat_interface.queue()
     chat_interface.launch(
+        inbrowser=True,
+        allowed_paths=["./img/"]
     )
 if __name__ == "__main__":
     main()

img/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

img/icon-dark.png ADDED Viewed

Git LFS Details

SHA256: cfaded4ad39679788929c6d2d532415d8f4593ccbc222802a9e8bfec1f5ae7fd
Pointer size: 132 Bytes
Size of remote file: 1.2 MB

img/icon-light.png ADDED Viewed

Git LFS Details

SHA256: 49ce2a7a23376ce538931edb8209cf1553e6322523f6019a89c4e9bc7cc094ee
Pointer size: 132 Bytes
Size of remote file: 1.17 MB

img/test_img.png ADDED Viewed

microsofttt.py ADDED Viewed

	@@ -0,0 +1,154 @@

+"""
+Microsoft Table Transformer Extension
+By Neils:
+https://docs.llamaindex.ai/en/stable/examples/multi_modal/multi_modal_pdf_tables.html#experiment-3-let-s-use-microsoft-table-transformer-to-crop-tables-from-the-images-and-see-if-it-gives-the-correct-answer
+"""
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+from matplotlib.patches import Patch
+import io
+from PIL import Image, ImageDraw
+import numpy as np
+import csv
+import pandas as pd
+from torchvision import transforms
+from transformers import AutoModelForObjectDetection
+import torch
+import openai
+import os
+import fitz
+device = "cuda" if torch.cuda.is_available() else "cpu"
+class MaxResize(object):
+    def __init__(self, max_size=800):
+        self.max_size = max_size
+    def __call__(self, image):
+        width, height = image.size
+        current_max_size = max(width, height)
+        scale = self.max_size / current_max_size
+        resized_image = image.resize(
+            (int(round(scale * width)), int(round(scale * height)))
+        )
+        return resized_image
+detection_transform = transforms.Compose(
+    [
+        MaxResize(800),
+        transforms.ToTensor(),
+        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
+    ]
+)
+structure_transform = transforms.Compose(
+    [
+        MaxResize(1000),
+        transforms.ToTensor(),
+        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
+    ]
+)
+# load table detection model
+# processor = TableTransformerImageProcessor(max_size=800)
+model = AutoModelForObjectDetection.from_pretrained(
+    "microsoft/table-transformer-detection", revision="no_timm"
+).to(device)
+# load table structure recognition model
+# structure_processor = TableTransformerImageProcessor(max_size=1000)
+structure_model = AutoModelForObjectDetection.from_pretrained(
+    "microsoft/table-transformer-structure-recognition-v1.1-all"
+).to(device)
+# for output bounding box post-processing
+def box_cxcywh_to_xyxy(x):
+    x_c, y_c, w, h = x.unbind(-1)
+    b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)]
+    return torch.stack(b, dim=1)
+def rescale_bboxes(out_bbox, size):
+    width, height = size
+    boxes = box_cxcywh_to_xyxy(out_bbox)
+    boxes = boxes * torch.tensor(
+        [width, height, width, height], dtype=torch.float32
+    )
+    return boxes
+def outputs_to_objects(outputs, img_size, id2label):
+    m = outputs.logits.softmax(-1).max(-1)
+    pred_labels = list(m.indices.detach().cpu().numpy())[0]
+    pred_scores = list(m.values.detach().cpu().numpy())[0]
+    pred_bboxes = outputs["pred_boxes"].detach().cpu()[0]
+    pred_bboxes = [
+        elem.tolist() for elem in rescale_bboxes(pred_bboxes, img_size)
+    ]
+    objects = []
+    for label, score, bbox in zip(pred_labels, pred_scores, pred_bboxes):
+        class_label = id2label[int(label)]
+        if not class_label == "no object":
+            objects.append(
+                {
+                    "label": class_label,
+                    "score": float(score),
+                    "bbox": [float(elem) for elem in bbox],
+                }
+            )
+    return objects
+def detect_and_crop_save_table(
+    file_path, cropped_table_directory="./table_images/"
+):
+    image = Image.open(file_path)
+    filename, _ = os.path.splitext(file_path.split("/")[-1])
+    if not os.path.exists(cropped_table_directory):
+        os.makedirs(cropped_table_directory)
+    # prepare image for the model
+    # pixel_values = processor(image, return_tensors="pt").pixel_values
+    pixel_values = detection_transform(image).unsqueeze(0).to(device)
+    # forward pass
+    with torch.no_grad():
+        outputs = model(pixel_values)
+    # postprocess to get detected tables
+    id2label = model.config.id2label
+    id2label[len(model.config.id2label)] = "no object"
+    detected_tables = outputs_to_objects(outputs, image.size, id2label)
+    print(f"number of tables detected {len(detected_tables)}")
+    for idx in range(len(detected_tables)):
+        #   # crop detected table out of image
+        cropped_table = image.crop(detected_tables[idx]["bbox"])
+        cropped_table.save(f"./{cropped_table_directory}/{filename}_{idx}.png")
+def plot_images(image_paths):
+    images_shown = 0
+    plt.figure(figsize=(16, 9))
+    for img_path in image_paths:
+        if os.path.isfile(img_path):
+            image = Image.open(img_path)
+            plt.subplot(2, 3, images_shown + 1)
+            plt.imshow(image)
+            plt.xticks([])
+            plt.yticks([])
+            images_shown += 1
+            if images_shown >= 9:
+                break

pdf_with_tables/test.pdf ADDED Viewed

Binary file (39.7 kB). View file

requirements.txt CHANGED Viewed

@@ -1,4 +1,10 @@
 gradio==4.4.1
 openai==1.1.1
 llama-index==0.9.15
-pypdf==3.17.1

+clip @ git+https://github.com/openai/CLIP.git
 gradio==4.4.1
 openai==1.1.1
 llama-index==0.9.15
+pypdf==3.17.1
+qdrant_client
+pyMuPDF
+tools
+frontend
+easyocr

style.css CHANGED Viewed

@@ -12,19 +12,15 @@
     padding: 0 !important;
 }
-div.gap>.stretch {
     display: none !important;
-}
 div.gap.panel>div.gr-group {
     position: absolute;
     bottom: 0;
 }
-h1 {
-    font-size: 48px !important;
-}
 .ask-button {
     background-color: var(--color-accent);
     font-weight: bold;
@@ -39,6 +35,8 @@ div.message-wrap {
     margin-bottom: 32px !important;
 }
-.message, .gallery-item, .ask-button, textarea {
-    font-family: "Arial" !important;
 }

     padding: 0 !important;
 }
+/*div.gap>.stretch {
     display: none !important;
+}*/
 div.gap.panel>div.gr-group {
     position: absolute;
     bottom: 0;
 }
 .ask-button {
     background-color: var(--color-accent);
     font-weight: bold;
     margin-bottom: 32px !important;
 }
+.arnold-title {
+    font-family: "Saira";
+    font-size: 48px !important;
+    text-align: center;
 }

theme.py CHANGED Viewed

@@ -7,7 +7,7 @@ class CustomTheme(Base):
     def __init__(self):
         super().__init__(
-            font=fonts.GoogleFont("Bruno Ace SC")
         )
         off_white = "#F0F0F0"
@@ -58,6 +58,6 @@ class CustomTheme(Base):
             color_accent_soft_dark=accent_soft_dark,
             border_color_accent_subdued_dark=accent_soft_dark,
-            block_radius="15px",
             container_radius="32px",
         )

     def __init__(self):
         super().__init__(
+            font=(fonts.GoogleFont("Inter"), fonts.GoogleFont("Saira"))
         )
         off_white = "#F0F0F0"
             color_accent_soft_dark=accent_soft_dark,
             border_color_accent_subdued_dark=accent_soft_dark,
+            block_radius="16px",
             container_radius="32px",
         )