import gradio as gr import openai import os import nltk import shutil import numpy as np import torch from datasets import load_dataset from langchain.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import Chroma from langchain.schema import Document from sentence_transformers import SentenceTransformer from sklearn.metrics import mean_squared_error, roc_auc_score from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity # ✅ Load Pretrained Model model_name = "bert-base-uncased" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") embedding_model = HuggingFaceEmbeddings(model_name=model_name) embedding_model.client.to(device) # ✅ Set OpenAI API Key (Replace with your own) openai.api_key = os.getenv("OPENAI_API_KEY") # ✅ Download NLTK Dependencies nltk.download('punkt') # ✅ Load RunGalileo Datasets ragbench = {} for dataset in ['covidqa', 'cuad', 'delucionqa', 'emanual', 'expertqa', 'finqa', 'hagrid', 'hotpotqa', 'msmarco', 'pubmedqa', 'tatqa', 'techqa']: ragbench[dataset] = load_dataset("rungalileo/ragbench", dataset) print("Datasets Loaded ✅") # ✅ Function to Chunk Documents def chunk_documents_semantic(documents, max_chunk_size=500): chunks = [] for doc in documents: sentences = nltk.sent_tokenize(doc) current_chunk = "" for sentence in sentences: if len(current_chunk) + len(sentence) <= max_chunk_size: current_chunk += sentence + " " else: chunks.append(current_chunk.strip()) current_chunk = sentence + " " if current_chunk: chunks.append(current_chunk.strip()) return chunks # ✅ Chunk the Entire Dataset chunked_ragbench = {} for dataset_name in ragbench.keys(): for split in ragbench[dataset_name].keys(): original_documents_full = ragbench[dataset_name][split]['documents'] chunked_documents_full = chunk_documents_semantic(original_documents_full) chunked_ragbench[split] = chunked_documents_full print("Chunking Completed ✅") # ✅ Setup ChromaDB persist_directory = "chroma_db_directory" if os.path.exists(persist_directory): shutil.rmtree(persist_directory) documents = [Document(page_content=chunk) for chunk in chunked_documents_full] vectordb = Chroma.from_documents( documents=documents, embedding=embedding_model, persist_directory=persist_directory ) vectordb.persist() # ✅ Retrieve Documents def retrieve_documents(question, k=5): docs = vectordb.similarity_search(question, k=k) if not docs: return ["⚠️ No relevant documents found. Try a different query."] return [doc.page_content for doc in docs] # ✅ Generate AI Response def generate_response(question, context): if not context or "No relevant documents found." in context: return "No relevant context available. Try a different query." full_prompt = f"Context: {context}\n\nQuestion: {question}" try: client = openai.OpenAI() response = client.chat.completions.create( model="gpt-4", messages=[ {"role": "system", "content": "You are an AI assistant that answers user queries based on the given context."}, {"role": "user", "content": full_prompt} ], max_tokens=300, temperature=0.7 ) return response.choices[0].message.content.strip() except Exception as e: return f"Error generating response: {str(e)}" # ✅ Compute Context Relevance, Utilization, Completeness, Adherence def compute_cosine_similarity(text1, text2): vectorizer = TfidfVectorizer() vectors = vectorizer.fit_transform([text1, text2]) return cosine_similarity(vectors[0], vectors[1])[0][0] def context_relevance(question, relevant_documents): combined_docs = " ".join(relevant_documents) return compute_cosine_similarity(question, combined_docs) def context_utilization(response, relevant_documents): combined_docs = " ".join(relevant_documents) return compute_cosine_similarity(response, combined_docs) def completeness(response, ground_truth_answer): return compute_cosine_similarity(response, ground_truth_answer) def adherence(response, relevant_documents): combined_docs = " ".join(relevant_documents) response_tokens = set(response.split()) relevant_tokens = set(combined_docs.split()) supported_tokens = response_tokens.intersection(relevant_tokens) return len(supported_tokens) / len(response_tokens) def compute_rmse(predicted_values, ground_truth_values): return np.sqrt(mean_squared_error(ground_truth_values, predicted_values)) # ✅ Full RAG Pipeline def rag_pipeline(question): retrieved_docs = retrieve_documents(question, k=5) context = " ".join(retrieved_docs) response = generate_response(question, context) # Compute Evaluation Metrics ground_truth_answer = "Sample ground truth answer from dataset" predicted_metrics = { "context_relevance": context_relevance(question, retrieved_docs), "context_utilization": context_utilization(response, retrieved_docs), "completeness": completeness(response, ground_truth_answer), "adherence": adherence(response, retrieved_docs) } return response, "\n\n".join(retrieved_docs), predicted_metrics # ✅ Gradio UI Interface iface = gr.Interface( fn=rag_pipeline, inputs=gr.Textbox(label="Enter your question"), outputs=[ gr.Textbox(label="Generated Response"), gr.Textbox(label="Retrieved Documents"), gr.JSON(label="Evaluation Metrics") ], title="RAG-Based QA System for RunGalileo", description="Enter a question and retrieve relevant documents with AI-generated response & evaluation metrics." ) # ✅ Launch the Gradio App if __name__ == "__main__": iface.launch()