Spaces:

ric9176
/

Chief-Joy-Officer

Sleeping

App Files Files Community

ric9176 commited on Feb 24

Commit

85fe3dc

1 Parent(s): b008f13

Update test output formats

Browse files

Files changed (7) hide show

app.py +1 -1
docs/evaluation_results.csv +0 -0
docs/evaluation_results.md +0 -0
docs/evaluation_results.txt +24 -0
docs/test_questions.md +12 -0
evaluate_rag.py +43 -17
rag.py +1 -1

app.py CHANGED Viewed

@@ -41,7 +41,7 @@ def retrieve_context(query: str) -> list[str]:
 tavily_tool = TavilySearchResults(max_results=5)
 tool_belt = [tavily_tool, retrieve_context]
-llm = ChatOpenAI(model="gpt-4o", temperature=0)
 model = llm.bind_tools(tool_belt)
 # Define system prompt

 tavily_tool = TavilySearchResults(max_results=5)
 tool_belt = [tavily_tool, retrieve_context]
+llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
 model = llm.bind_tools(tool_belt)
 # Define system prompt

docs/evaluation_results.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

docs/evaluation_results.md ADDED Viewed

The diff for this file is too large to render. See raw diff

docs/evaluation_results.txt ADDED Viewed

	@@ -0,0 +1,24 @@

+RAG System Evaluation Results
+==========================
+Test Dataset Size: 10 questions
+Test Questions:
+---------------
+1. Southbank Centre events this weekend
+2. What is the Imagine Festival at the Southbank Centre?
+3. What event is High Vis supporting at the Crystal Palace Bowl?
+4. What can you do for £24 in London?
+5. How can I get newsletters about the Chelsea Physic Garden's snowdrop festival and other events in London?
+6. Chelsea Physic Garden got what event this weekend?
+7. What activities are available at the Chelsea Physic Garden as mentioned in the newsletters?
+8. How can I find out about music concerts like the High Vis’s Electric Brixton show in London?
+9. What events in London in March 2025?
+10. What are some of the key events and activities planned in London for March 2025?
+11. What are some of the events and exhibitions happening in London this weekend that feature the work of Donald Rodney?
+12. What are some of the activities and events available at the Horniman Museum during the February half-term weekend for a tourist visiting London?
+Metric Scores:
+-------------
+| Metric                    | Score |
+|---------------------------|-------|

docs/test_questions.md ADDED Viewed

	@@ -0,0 +1,12 @@

+# Test Questions
+1. What activities or events can one attend at Alexandra Palace during the weekend in London?
+2. What are some of the events happening in London this weekend?
+3. What can visitors expect to see at The Courtauld Gallery?
+4. Who are the creators of the play 'Kyoto' and what is it about?
+5. What's at Sea Containers?
+6. What are some workshops and cultural events happening in London in March 2025 that tourists or locals can attend?
+7. What are some of the art and culture workshops and events that tourists or locals can attend in London in March 2025?
+8. What queer cinema event is happening in London in March 2025?
+9. Wher can I watch the best new queer cinema in London in March 2025?
+10. What festivals and events in London can I go to in March 2025?

evaluate_rag.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import nltk
 from typing import List
 from ragas.llms import LangchainLLMWrapper
@@ -14,6 +15,16 @@ from ragas.metrics import (
 )
 from ragas import evaluate, RunConfig, EvaluationDataset
 from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 from langchain_community.document_loaders import WebBaseLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -40,10 +51,12 @@ generator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())
 generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
 dataset = generator.generate_with_langchain_docs(docs, testset_size=10)
 # Print the generated test questions
 print("\nGenerated Test Questions:")
-for i, row in dataset.to_pandas().iterrows():
-    print(f"{i+1}. {row['question']}")
 # Set up the RAG pipeline for testing
 # Split documents
@@ -81,7 +94,7 @@ Context: {context}
 Answer:"""
 rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
-llm = ChatOpenAI(model="gpt-4")
 # Process each test question through the RAG pipeline
 for test_row in dataset:
@@ -100,8 +113,9 @@ for test_row in dataset:
 # Convert to evaluation dataset
 evaluation_dataset = EvaluationDataset.from_pandas(dataset.to_pandas())
 # Set up evaluator
-evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4"))
 # Run evaluation with all metrics
 custom_run_config = RunConfig(timeout=360)
@@ -120,16 +134,28 @@ result = evaluate(
     run_config=custom_run_config
 )
-# Print results
-print("\nEvaluation Results:")
-for metric, score in result.items():
-    print(f"{metric}: {score:.4f}")
-# Save results to file
-with open("docs/evaluation_results.txt", "w") as f:
-    f.write("RAG System Evaluation Results\n")
-    f.write("==========================\n\n")
-    f.write("Test Dataset Size: 10 questions\n\n")
-    f.write("Metric Scores:\n")
-    for metric, score in result.items():
-        f.write(f"{metric}: {score:.4f}\n")

 import os
+from dotenv import load_dotenv
 import nltk
 from typing import List
 from ragas.llms import LangchainLLMWrapper
 )
 from ragas import evaluate, RunConfig, EvaluationDataset
+# Load environment variables
+load_dotenv()
+# Ensure OpenAI API key is set
+if not os.getenv("OPENAI_API_KEY"):
+    raise ValueError("OPENAI_API_KEY not found in environment variables")
+os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
+os.environ["RAGAS_APP_TOKEN"] = os.getenv("RAGAS_APP_TOKEN")
 from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 from langchain_community.document_loaders import WebBaseLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
 dataset = generator.generate_with_langchain_docs(docs, testset_size=10)
+print(dataset.upload())
 # Print the generated test questions
 print("\nGenerated Test Questions:")
+for i, test_row in enumerate(dataset):
+    print(f"{i+1}. {test_row.eval_sample.user_input}")
 # Set up the RAG pipeline for testing
 # Split documents
 Answer:"""
 rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
+llm = ChatOpenAI(model="gpt-4o-mini")
 # Process each test question through the RAG pipeline
 for test_row in dataset:
 # Convert to evaluation dataset
 evaluation_dataset = EvaluationDataset.from_pandas(dataset.to_pandas())
 # Set up evaluator
+evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini"))
 # Run evaluation with all metrics
 custom_run_config = RunConfig(timeout=360)
     run_config=custom_run_config
 )
+print("RAW RESULT: ", result)
+print("Type of result: ", type(result))
+# Convert to pandas DataFrame for better formatting
+df = result.to_pandas()
+print("\nEvaluation Results as DataFrame:")
+print(df)
+try:
+    # Try to save as markdown
+    print("Attempting to save as markdown...")
+    import tabulate  # Try to import explicitly to verify installation
+    df.to_markdown("docs/evaluation_results.md", index=False)
+    print("Successfully saved as markdown!")
+except ImportError as e:
+    # Print detailed error message
+    print(f"Import Error: {e}")
+    print("Note: Install 'tabulate' package for markdown output. Falling back to CSV format.")
+    df.to_csv("docs/evaluation_results.csv", index=False)
+# Save test questions
+with open("docs/test_questions.md", "w") as f:
+    f.write("# Test Questions\n\n")
+    for i, test_row in enumerate(dataset):
+        f.write(f"{i+1}. {test_row.eval_sample.user_input}\n")

rag.py CHANGED Viewed

@@ -44,7 +44,7 @@ def create_rag_pipeline(collection_name: str = "rag_collection"):
     # Create text splitter for chunking
     def tiktoken_len(text):
-        tokens = tiktoken.encoding_for_model("gpt-4").encode(text)
         return len(tokens)
     text_splitter = RecursiveCharacterTextSplitter(

     # Create text splitter for chunking
     def tiktoken_len(text):
+        tokens = tiktoken.encoding_for_model("gpt-4o-mini").encode(text)
         return len(tokens)
     text_splitter = RecursiveCharacterTextSplitter(