Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	
		Ankur Goyal
		
	commited on
		
		
					Commit 
							
							·
						
						bc12901
	
1
								Parent(s):
							
							9c27f12
								
Initial Commit
Browse files- .gitignore +4 -0
- README.md +5 -6
- app.py +50 -0
- requirements.txt +3 -0
    	
        .gitignore
    ADDED
    
    | @@ -0,0 +1,4 @@ | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            venv
         | 
| 2 | 
            +
            *.swo
         | 
| 3 | 
            +
            *.swp
         | 
| 4 | 
            +
            *.pyc
         | 
    	
        README.md
    CHANGED
    
    | @@ -1,13 +1,12 @@ | |
| 1 | 
             
            ---
         | 
| 2 | 
            -
            title:  | 
| 3 | 
            -
            emoji:  | 
| 4 | 
            -
            colorFrom:  | 
| 5 | 
            -
            colorTo:  | 
| 6 | 
             
            sdk: streamlit
         | 
| 7 | 
             
            sdk_version: 1.10.0
         | 
| 8 | 
             
            app_file: app.py
         | 
| 9 | 
            -
            pinned:  | 
| 10 | 
            -
            license: mit
         | 
| 11 | 
             
            ---
         | 
| 12 |  | 
| 13 | 
             
            Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
         | 
|  | |
| 1 | 
             
            ---
         | 
| 2 | 
            +
            title: DocQuery
         | 
| 3 | 
            +
            emoji: 🦉
         | 
| 4 | 
            +
            colorFrom: gray
         | 
| 5 | 
            +
            colorTo: pink
         | 
| 6 | 
             
            sdk: streamlit
         | 
| 7 | 
             
            sdk_version: 1.10.0
         | 
| 8 | 
             
            app_file: app.py
         | 
| 9 | 
            +
            pinned: true
         | 
|  | |
| 10 | 
             
            ---
         | 
| 11 |  | 
| 12 | 
             
            Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
         | 
    	
        app.py
    ADDED
    
    | @@ -0,0 +1,50 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            os.environ["TOKENIZERS_PARALLELISM"] = "false"
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            import streamlit as st
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            import torch
         | 
| 8 | 
            +
            from docquery.pipeline import get_pipeline
         | 
| 9 | 
            +
            from docquery.document import load_bytes
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            device = "cuda" if torch.cuda.is_available() else "cpu"
         | 
| 12 | 
            +
            pipeline = get_pipeline(device=device)
         | 
| 13 | 
            +
             | 
| 14 | 
            +
             | 
| 15 | 
            +
            def process_document(file, question):
         | 
| 16 | 
            +
                # prepare encoder inputs
         | 
| 17 | 
            +
                document = load_document(file.name)
         | 
| 18 | 
            +
                return pipeline(question=question, **document.context)
         | 
| 19 | 
            +
             | 
| 20 | 
            +
             | 
| 21 | 
            +
            def ensure_list(x):
         | 
| 22 | 
            +
                if isinstance(x, list):
         | 
| 23 | 
            +
                    return x
         | 
| 24 | 
            +
                else:
         | 
| 25 | 
            +
                    return [x]
         | 
| 26 | 
            +
             | 
| 27 | 
            +
             | 
| 28 | 
            +
            st.title("DocQuery: Query Documents Using NLP")
         | 
| 29 | 
            +
            file = st.file_uploader("Upload a PDF or Image document")
         | 
| 30 | 
            +
            question = st.text_input("QUESTION", "")
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            document = None
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            if file is not None:
         | 
| 35 | 
            +
                col1, col2 = st.columns(2)
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                document = load_bytes(file, file.name)
         | 
| 38 | 
            +
                col1.image(document.preview, use_column_width=True)
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            if document is not None and question is not None and len(question) > 0:
         | 
| 41 | 
            +
                predictions = pipeline(question=question, **document.context)
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                col2.header("Probabilities")
         | 
| 44 | 
            +
                for p in ensure_list(predictions):
         | 
| 45 | 
            +
                    col2.subheader(f"{ p['answer'] }: { round(p['score'] * 100, 1)}%")
         | 
| 46 | 
            +
             | 
| 47 | 
            +
             | 
| 48 | 
            +
            "DocQuery uses LayoutLMv1 fine-tuned on DocVQA, a document visual question answering dataset, as well as SQuAD, which boosts its English-language comprehension. To use it, simply upload an image or PDF, type a question, and click 'submit', or click one of the examples to load them."
         | 
| 49 | 
            +
             | 
| 50 | 
            +
            "[Github Repo](https://github.com/impira/docquery)"
         | 
    	
        requirements.txt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            torch
         | 
| 2 | 
            +
            git+https://github.com/huggingface/transformers.git
         | 
| 3 | 
            +
            docquery
         | 
