Spaces:
				
			
			
	
			
			
		Build error
		
	
	
	
			
			
	
	
	
	
		
		
		Build error
		
	Commit 
							
							·
						
						8d717c1
	
1
								Parent(s):
							
							81be58e
								
cleanup
Browse files- app/VectorStore/index/id_to_uuid_3c194f90-478a-4f8e-a5ac-67776218c783.pkl +2 -2
- app/VectorStore/index/index_3c194f90-478a-4f8e-a5ac-67776218c783.bin +2 -2
- app/VectorStore/index/index_metadata_3c194f90-478a-4f8e-a5ac-67776218c783.pkl +1 -1
- app/VectorStore/index/uuid_to_id_3c194f90-478a-4f8e-a5ac-67776218c783.pkl +2 -2
- app/app.py +3 -2
- app/load_model.py +2 -5
- app/load_vectors.py +11 -25
- app/run.py +1 -1
    	
        app/VectorStore/index/id_to_uuid_3c194f90-478a-4f8e-a5ac-67776218c783.pkl
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
            -
            size  | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:b3fd923d38dbc7773fa8ddd035a3a12b35b36c0596120795d5441fa2631aa500
         | 
| 3 | 
            +
            size 7657
         | 
    	
        app/VectorStore/index/index_3c194f90-478a-4f8e-a5ac-67776218c783.bin
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
            -
            size  | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:e8012c468a836e45dec5264f07e79a82dd9b0cfbd57b7db82ab3e5f87659e004
         | 
| 3 | 
            +
            size 779728
         | 
    	
        app/VectorStore/index/index_metadata_3c194f90-478a-4f8e-a5ac-67776218c783.pkl
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 73
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:fe883ac5dc1e9c3d5b56fe942e1fef13b990df4e9b32e59c5eb7b12bba00e7c0
         | 
| 3 | 
             
            size 73
         | 
    	
        app/VectorStore/index/uuid_to_id_3c194f90-478a-4f8e-a5ac-67776218c783.pkl
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
            -
            size  | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:d94d83b22ad6a388ffd24e1151e31ff2b22aaee250d0a8e442f0744bc00cffda
         | 
| 3 | 
            +
            size 8970
         | 
    	
        app/app.py
    CHANGED
    
    | @@ -21,7 +21,7 @@ else: | |
| 21 |  | 
| 22 | 
             
                model_type = st.selectbox(
         | 
| 23 | 
             
                    'Select the Documents to be used to answer your question',
         | 
| 24 | 
            -
                    ('OpenAI', ' | 
| 25 |  | 
| 26 | 
             
                if model_type=='OpenAI':
         | 
| 27 | 
             
                    if 'openai_key' not in st.session_state:
         | 
| @@ -33,6 +33,7 @@ else: | |
| 33 | 
             
                        os.environ["OPENAI_API_KEY"] = st.session_state.openai_key
         | 
| 34 | 
             
                    llm= load_model.load_openai_model()
         | 
| 35 | 
             
                else:
         | 
|  | |
| 36 | 
             
                    llm = load_model.load_gpu_model("decapoda-research/llama-7b-hf")
         | 
| 37 |  | 
| 38 |  | 
| @@ -43,7 +44,7 @@ else: | |
| 43 |  | 
| 44 | 
             
                st.write('You selected:', option)
         | 
| 45 |  | 
| 46 | 
            -
                chain = load_model.create_chain(llm, collection=option)
         | 
| 47 | 
             
                try:
         | 
| 48 | 
             
                    query = st.text_area('Ask a question:', 'Hallo how are you today?')
         | 
| 49 | 
             
                    result = chain({"query": query})
         | 
|  | |
| 21 |  | 
| 22 | 
             
                model_type = st.selectbox(
         | 
| 23 | 
             
                    'Select the Documents to be used to answer your question',
         | 
| 24 | 
            +
                    ('OpenAI', 'Load local model') ) 
         | 
| 25 |  | 
| 26 | 
             
                if model_type=='OpenAI':
         | 
| 27 | 
             
                    if 'openai_key' not in st.session_state:
         | 
|  | |
| 33 | 
             
                        os.environ["OPENAI_API_KEY"] = st.session_state.openai_key
         | 
| 34 | 
             
                    llm= load_model.load_openai_model()
         | 
| 35 | 
             
                else:
         | 
| 36 | 
            +
                    # Add more models here
         | 
| 37 | 
             
                    llm = load_model.load_gpu_model("decapoda-research/llama-7b-hf")
         | 
| 38 |  | 
| 39 |  | 
|  | |
| 44 |  | 
| 45 | 
             
                st.write('You selected:', option)
         | 
| 46 |  | 
| 47 | 
            +
                chain = load_model.create_chain(llm, collection=option, model_name="hkunlp/instructor-large ")
         | 
| 48 | 
             
                try:
         | 
| 49 | 
             
                    query = st.text_area('Ask a question:', 'Hallo how are you today?')
         | 
| 50 | 
             
                    result = chain({"query": query})
         | 
    	
        app/load_model.py
    CHANGED
    
    | @@ -44,7 +44,7 @@ def load_cpu_model(): | |
| 44 | 
             
                return llm
         | 
| 45 |  | 
| 46 | 
             
            @st.cache_resource(max_entries =1)
         | 
| 47 | 
            -
            def load_gpu_model(used_model | 
| 48 | 
             
                torch.cuda.empty_cache()
         | 
| 49 | 
             
                tokenizer = LlamaTokenizer.from_pretrained(used_model)
         | 
| 50 |  | 
| @@ -113,10 +113,7 @@ def load_vectorstore(model_name, collection): | |
| 113 | 
             
                    )
         | 
| 114 | 
             
                    return vectorstore
         | 
| 115 |  | 
| 116 | 
            -
            def  | 
| 117 | 
            -
                pass
         | 
| 118 | 
            -
             | 
| 119 | 
            -
            def create_chain(_llm, collection, model_name = "hkunlp/instructor-large"):
         | 
| 120 | 
             
                vectorstore = load_vectorstore(model_name, collection)
         | 
| 121 | 
             
                retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
         | 
| 122 | 
             
                chain = RetrievalQA.from_chain_type(llm=_llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
         | 
|  | |
| 44 | 
             
                return llm
         | 
| 45 |  | 
| 46 | 
             
            @st.cache_resource(max_entries =1)
         | 
| 47 | 
            +
            def load_gpu_model(used_model):
         | 
| 48 | 
             
                torch.cuda.empty_cache()
         | 
| 49 | 
             
                tokenizer = LlamaTokenizer.from_pretrained(used_model)
         | 
| 50 |  | 
|  | |
| 113 | 
             
                    )
         | 
| 114 | 
             
                    return vectorstore
         | 
| 115 |  | 
| 116 | 
            +
            def create_chain(_llm, collection, model_name):
         | 
|  | |
|  | |
|  | |
| 117 | 
             
                vectorstore = load_vectorstore(model_name, collection)
         | 
| 118 | 
             
                retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
         | 
| 119 | 
             
                chain = RetrievalQA.from_chain_type(llm=_llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
         | 
    	
        app/load_vectors.py
    CHANGED
    
    | @@ -12,7 +12,7 @@ from hashlib import sha256 | |
| 12 | 
             
            import cloudpickle
         | 
| 13 | 
             
            import logging
         | 
| 14 | 
             
            import os
         | 
| 15 | 
            -
            from load_model import load_embedding
         | 
| 16 | 
             
            import torch
         | 
| 17 | 
             
            import re
         | 
| 18 | 
             
            import pathlib
         | 
| @@ -42,34 +42,20 @@ def create_collection(collection_name, model_name, client): | |
| 42 | 
             
                return True
         | 
| 43 |  | 
| 44 | 
             
            def create_and_add(collection_name, sub_docs, model_name):
         | 
| 45 | 
            -
                client_settings = chromadb.config.Settings(
         | 
| 46 | 
            -
                    chroma_db_impl="duckdb+parquet",
         | 
| 47 | 
            -
                    persist_directory=persist_directory,
         | 
| 48 | 
            -
                    anonymized_telemetry=False
         | 
| 49 | 
            -
                )
         | 
| 50 | 
            -
             | 
| 51 | 
            -
                client = chromadb.Client(client_settings)
         | 
| 52 | 
            -
                collection_name = collection_name # + "_" + re.sub('[^A-Za-z0-9]+', '', model_name)
         | 
| 53 | 
            -
             | 
| 54 | 
            -
                embeddings = load_embedding(model_name) 
         | 
| 55 | 
             
                logging.info(f"Adding documents to {collection_name}")
         | 
| 56 | 
            -
                 | 
| 57 | 
            -
             | 
| 58 | 
            -
                    embedding_function=embeddings,
         | 
| 59 | 
            -
                    client_settings=client_settings,
         | 
| 60 | 
            -
                    persist_directory=persist_directory,
         | 
| 61 | 
            -
                )
         | 
| 62 | 
             
                vectorstore.add_documents(documents=sub_docs, embedding=embeddings)
         | 
| 63 | 
             
                vectorstore.persist()
         | 
| 64 |  | 
| 65 | 
             
                # Test Vectorstore
         | 
| 66 | 
            -
                vectorstore2 = Chroma(
         | 
| 67 | 
            -
                collection_name=collection_name,
         | 
| 68 | 
            -
                embedding_function=embeddings,
         | 
| 69 | 
            -
                client_settings=client_settings,
         | 
| 70 | 
            -
                persist_directory=persist_directory,
         | 
| 71 | 
            -
                )
         | 
| 72 | 
            -
                print( vectorstore2.similarity_search_with_score(query="What are AXAs green Goals?", k=4) )
         | 
| 73 |  | 
| 74 | 
             
                return vectorstore
         | 
| 75 |  | 
| @@ -113,7 +99,7 @@ def load_from_web(urls, cache=True): | |
| 113 | 
             
                #update metadata
         | 
| 114 | 
             
                i=0
         | 
| 115 | 
             
                for doc in docs:
         | 
| 116 | 
            -
                    doc.metadata = {'source': docs_list[i], 'url': docs_list[i], ' | 
| 117 | 
             
                    i=i+1
         | 
| 118 | 
             
                return docs
         | 
| 119 |  | 
|  | |
| 12 | 
             
            import cloudpickle
         | 
| 13 | 
             
            import logging
         | 
| 14 | 
             
            import os
         | 
| 15 | 
            +
            from load_model import load_embedding, load_vectorstore
         | 
| 16 | 
             
            import torch
         | 
| 17 | 
             
            import re
         | 
| 18 | 
             
            import pathlib
         | 
|  | |
| 42 | 
             
                return True
         | 
| 43 |  | 
| 44 | 
             
            def create_and_add(collection_name, sub_docs, model_name):
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 45 | 
             
                logging.info(f"Adding documents to {collection_name}")
         | 
| 46 | 
            +
                embeddings = load_embedding(model_name) 
         | 
| 47 | 
            +
                vectorstore = load_vectorstore(model_name, collection_name)  
         | 
|  | |
|  | |
|  | |
|  | |
| 48 | 
             
                vectorstore.add_documents(documents=sub_docs, embedding=embeddings)
         | 
| 49 | 
             
                vectorstore.persist()
         | 
| 50 |  | 
| 51 | 
             
                # Test Vectorstore
         | 
| 52 | 
            +
                #vectorstore2 = Chroma(
         | 
| 53 | 
            +
                #collection_name=collection_name,
         | 
| 54 | 
            +
                #embedding_function=embeddings,
         | 
| 55 | 
            +
                #client_settings=client_settings,
         | 
| 56 | 
            +
                #persist_directory=persist_directory,
         | 
| 57 | 
            +
                #)
         | 
| 58 | 
            +
                #print( vectorstore2.similarity_search_with_score(query="What are AXAs green Goals?", k=4) )
         | 
| 59 |  | 
| 60 | 
             
                return vectorstore
         | 
| 61 |  | 
|  | |
| 99 | 
             
                #update metadata
         | 
| 100 | 
             
                i=0
         | 
| 101 | 
             
                for doc in docs:
         | 
| 102 | 
            +
                    doc.metadata = {'source': docs_list[i], 'url': docs_list[i], 'owner':'Heiko Wagner'}
         | 
| 103 | 
             
                    i=i+1
         | 
| 104 | 
             
                return docs
         | 
| 105 |  | 
    	
        app/run.py
    CHANGED
    
    | @@ -12,6 +12,6 @@ import cloudpickle | |
| 12 | 
             
            llm= load_model.load_openai_model()
         | 
| 13 |  | 
| 14 | 
             
            # %%
         | 
| 15 | 
            -
            chain = load_model.create_chain(llm, collection="heikospaper")
         | 
| 16 | 
             
            result = chain({"query": "What are AXAs green Goals?"})
         | 
| 17 | 
             
            print(result)
         | 
|  | |
| 12 | 
             
            llm= load_model.load_openai_model()
         | 
| 13 |  | 
| 14 | 
             
            # %%
         | 
| 15 | 
            +
            chain = load_model.create_chain(llm, collection="heikospaper", model_name="hkunlp/instructor-large")
         | 
| 16 | 
             
            result = chain({"query": "What are AXAs green Goals?"})
         | 
| 17 | 
             
            print(result)
         |