import gradio as gr import os from loaddataset import ExtractRagBenchData from createmilvusschema import CreateMilvusDbSchema from insertmilvushelper import EmbedAllDocumentsAndInsert from sentence_transformers import SentenceTransformer from searchmilvushelper import SearchTopKDocuments from finetuneresults import FineTuneAndRerankSearchResults from generationhelper import GenerateAnswer from formatresultshelper import FormatAndScores from calculatescores import CalculateScores from model import generate_response from huggingface_hub import login from huggingface_hub import whoami from huggingface_hub import dataset_info # Load embedding model QUERY_EMBEDDING_MODEL = SentenceTransformer('all-MiniLM-L6-v2') RERANKING_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2" PROMPT_MODEL = "llama-3.3-70b-specdec" EVAL_MODEL = "llama-3.3-70b-specdec" WINDOW_SIZE = 5 OVERLAP = 2 RETRIVE_TOP_K_SIZE=10 hf_token = os.getenv("HF_TOKEN") login(hf_token) rag_extracted_data = ExtractRagBenchData() print(rag_extracted_data.head(5)) #invoke create milvus db function try: db_collection = CreateMilvusDbSchema() except Exception as e: print(f"Error creating Milvus DB schema: {e}") #insert embdeding to milvus db """ EmbedAllDocumentsAndInsert(QUERY_EMBEDDING_MODEL, rag_extracted_data, db_collection, window_size=WINDOW_SIZE, overlap=OVERLAP) """ query = "what would the net revenue have been in 2015 if there wasn't a stipulated settlement from the business combination in october 2015?" results_for_top10_chunks = SearchTopKDocuments(db_collection, query, QUERY_EMBEDDING_MODEL, top_k=RETRIVE_TOP_K_SIZE) reranked_results = FineTuneAndRerankSearchResults(results_for_top10_chunks, rag_extracted_data, query, RERANKING_MODEL) answer = GenerateAnswer(query, reranked_results.head(3), PROMPT_MODEL) completion_result,relevant_sentence_keys,all_utilized_sentence_keys,support_keys,support_level = FormatAndScores(query, reranked_results.head(1), answer, EVAL_MODEL) print(relevant_sentence_keys) print(all_utilized_sentence_keys) print(support_keys) print(support_level) print(completion_result) document_id = reranked_results.head(1)['doc_id'].values[0] extarcted_row_for_given_id = rag_extracted_data[rag_extracted_data["id"]==document_id] score1, score2, score3 = CalculateScores(relevant_sentence_keys,all_utilized_sentence_keys,support_keys,support_level,extarcted_row_for_given_id) print(score1) print(score2) print(score3) def chatbot(prompt): return whoami() iface = gr.Interface(fn=chatbot, inputs="text", outputs="text", title="Capstone Project Group 10") if __name__ == "__main__": iface.launch()