ric9176 commited on
Commit
85fe3dc
·
1 Parent(s): b008f13

Update test output formats

Browse files
app.py CHANGED
@@ -41,7 +41,7 @@ def retrieve_context(query: str) -> list[str]:
41
  tavily_tool = TavilySearchResults(max_results=5)
42
  tool_belt = [tavily_tool, retrieve_context]
43
 
44
- llm = ChatOpenAI(model="gpt-4o", temperature=0)
45
  model = llm.bind_tools(tool_belt)
46
 
47
  # Define system prompt
 
41
  tavily_tool = TavilySearchResults(max_results=5)
42
  tool_belt = [tavily_tool, retrieve_context]
43
 
44
+ llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
45
  model = llm.bind_tools(tool_belt)
46
 
47
  # Define system prompt
docs/evaluation_results.csv ADDED
The diff for this file is too large to render. See raw diff
 
docs/evaluation_results.md ADDED
The diff for this file is too large to render. See raw diff
 
docs/evaluation_results.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ RAG System Evaluation Results
2
+ ==========================
3
+
4
+ Test Dataset Size: 10 questions
5
+
6
+ Test Questions:
7
+ ---------------
8
+ 1. Southbank Centre events this weekend
9
+ 2. What is the Imagine Festival at the Southbank Centre?
10
+ 3. What event is High Vis supporting at the Crystal Palace Bowl?
11
+ 4. What can you do for £24 in London?
12
+ 5. How can I get newsletters about the Chelsea Physic Garden's snowdrop festival and other events in London?
13
+ 6. Chelsea Physic Garden got what event this weekend?
14
+ 7. What activities are available at the Chelsea Physic Garden as mentioned in the newsletters?
15
+ 8. How can I find out about music concerts like the High Vis’s Electric Brixton show in London?
16
+ 9. What events in London in March 2025?
17
+ 10. What are some of the key events and activities planned in London for March 2025?
18
+ 11. What are some of the events and exhibitions happening in London this weekend that feature the work of Donald Rodney?
19
+ 12. What are some of the activities and events available at the Horniman Museum during the February half-term weekend for a tourist visiting London?
20
+
21
+ Metric Scores:
22
+ -------------
23
+ | Metric | Score |
24
+ |---------------------------|-------|
docs/test_questions.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Test Questions
2
+
3
+ 1. What activities or events can one attend at Alexandra Palace during the weekend in London?
4
+ 2. What are some of the events happening in London this weekend?
5
+ 3. What can visitors expect to see at The Courtauld Gallery?
6
+ 4. Who are the creators of the play 'Kyoto' and what is it about?
7
+ 5. What's at Sea Containers?
8
+ 6. What are some workshops and cultural events happening in London in March 2025 that tourists or locals can attend?
9
+ 7. What are some of the art and culture workshops and events that tourists or locals can attend in London in March 2025?
10
+ 8. What queer cinema event is happening in London in March 2025?
11
+ 9. Wher can I watch the best new queer cinema in London in March 2025?
12
+ 10. What festivals and events in London can I go to in March 2025?
evaluate_rag.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import nltk
3
  from typing import List
4
  from ragas.llms import LangchainLLMWrapper
@@ -14,6 +15,16 @@ from ragas.metrics import (
14
  )
15
  from ragas import evaluate, RunConfig, EvaluationDataset
16
 
 
 
 
 
 
 
 
 
 
 
17
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
18
  from langchain_community.document_loaders import WebBaseLoader
19
  from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -40,10 +51,12 @@ generator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())
40
  generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
41
  dataset = generator.generate_with_langchain_docs(docs, testset_size=10)
42
 
 
 
43
  # Print the generated test questions
44
  print("\nGenerated Test Questions:")
45
- for i, row in dataset.to_pandas().iterrows():
46
- print(f"{i+1}. {row['question']}")
47
 
48
  # Set up the RAG pipeline for testing
49
  # Split documents
@@ -81,7 +94,7 @@ Context: {context}
81
  Answer:"""
82
 
83
  rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
84
- llm = ChatOpenAI(model="gpt-4")
85
 
86
  # Process each test question through the RAG pipeline
87
  for test_row in dataset:
@@ -100,8 +113,9 @@ for test_row in dataset:
100
  # Convert to evaluation dataset
101
  evaluation_dataset = EvaluationDataset.from_pandas(dataset.to_pandas())
102
 
 
103
  # Set up evaluator
104
- evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4"))
105
 
106
  # Run evaluation with all metrics
107
  custom_run_config = RunConfig(timeout=360)
@@ -120,16 +134,28 @@ result = evaluate(
120
  run_config=custom_run_config
121
  )
122
 
123
- # Print results
124
- print("\nEvaluation Results:")
125
- for metric, score in result.items():
126
- print(f"{metric}: {score:.4f}")
127
-
128
- # Save results to file
129
- with open("docs/evaluation_results.txt", "w") as f:
130
- f.write("RAG System Evaluation Results\n")
131
- f.write("==========================\n\n")
132
- f.write("Test Dataset Size: 10 questions\n\n")
133
- f.write("Metric Scores:\n")
134
- for metric, score in result.items():
135
- f.write(f"{metric}: {score:.4f}\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ from dotenv import load_dotenv
3
  import nltk
4
  from typing import List
5
  from ragas.llms import LangchainLLMWrapper
 
15
  )
16
  from ragas import evaluate, RunConfig, EvaluationDataset
17
 
18
+ # Load environment variables
19
+ load_dotenv()
20
+
21
+ # Ensure OpenAI API key is set
22
+ if not os.getenv("OPENAI_API_KEY"):
23
+ raise ValueError("OPENAI_API_KEY not found in environment variables")
24
+
25
+ os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
26
+ os.environ["RAGAS_APP_TOKEN"] = os.getenv("RAGAS_APP_TOKEN")
27
+
28
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
29
  from langchain_community.document_loaders import WebBaseLoader
30
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
51
  generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
52
  dataset = generator.generate_with_langchain_docs(docs, testset_size=10)
53
 
54
+ print(dataset.upload())
55
+
56
  # Print the generated test questions
57
  print("\nGenerated Test Questions:")
58
+ for i, test_row in enumerate(dataset):
59
+ print(f"{i+1}. {test_row.eval_sample.user_input}")
60
 
61
  # Set up the RAG pipeline for testing
62
  # Split documents
 
94
  Answer:"""
95
 
96
  rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
97
+ llm = ChatOpenAI(model="gpt-4o-mini")
98
 
99
  # Process each test question through the RAG pipeline
100
  for test_row in dataset:
 
113
  # Convert to evaluation dataset
114
  evaluation_dataset = EvaluationDataset.from_pandas(dataset.to_pandas())
115
 
116
+
117
  # Set up evaluator
118
+ evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini"))
119
 
120
  # Run evaluation with all metrics
121
  custom_run_config = RunConfig(timeout=360)
 
134
  run_config=custom_run_config
135
  )
136
 
137
+ print("RAW RESULT: ", result)
138
+ print("Type of result: ", type(result))
139
+
140
+ # Convert to pandas DataFrame for better formatting
141
+ df = result.to_pandas()
142
+ print("\nEvaluation Results as DataFrame:")
143
+ print(df)
144
+
145
+ try:
146
+ # Try to save as markdown
147
+ print("Attempting to save as markdown...")
148
+ import tabulate # Try to import explicitly to verify installation
149
+ df.to_markdown("docs/evaluation_results.md", index=False)
150
+ print("Successfully saved as markdown!")
151
+ except ImportError as e:
152
+ # Print detailed error message
153
+ print(f"Import Error: {e}")
154
+ print("Note: Install 'tabulate' package for markdown output. Falling back to CSV format.")
155
+ df.to_csv("docs/evaluation_results.csv", index=False)
156
+
157
+ # Save test questions
158
+ with open("docs/test_questions.md", "w") as f:
159
+ f.write("# Test Questions\n\n")
160
+ for i, test_row in enumerate(dataset):
161
+ f.write(f"{i+1}. {test_row.eval_sample.user_input}\n")
rag.py CHANGED
@@ -44,7 +44,7 @@ def create_rag_pipeline(collection_name: str = "rag_collection"):
44
 
45
  # Create text splitter for chunking
46
  def tiktoken_len(text):
47
- tokens = tiktoken.encoding_for_model("gpt-4").encode(text)
48
  return len(tokens)
49
 
50
  text_splitter = RecursiveCharacterTextSplitter(
 
44
 
45
  # Create text splitter for chunking
46
  def tiktoken_len(text):
47
+ tokens = tiktoken.encoding_for_model("gpt-4o-mini").encode(text)
48
  return len(tokens)
49
 
50
  text_splitter = RecursiveCharacterTextSplitter(