Yaswanth123 commited on
Commit
087a9e2
·
verified ·
1 Parent(s): 685013e

Create dspy_modules.py

Browse files
Files changed (1) hide show
  1. dspy_modules.py +224 -0
dspy_modules.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # api/dspy_modules.py
2
+ import dspy
3
+ import json
4
+ import logging
5
+ from typing import Optional, Dict, Any, List
6
+
7
+ from dspy_signatures import (
8
+ InitialResourceSummarySignature, DynamicSummarizationSignature,
9
+ SyllabusNoResourcesSignature, SyllabusWithRawTextSignature, SyllabusWithSummariesSignature,
10
+ SyllabusNegotiationSignature, LearningStyleSignature, PersonaPromptBodyPredictSignature,
11
+ GenericInteractionSignature
12
+ )
13
+
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class InitialResourceSummarizer(dspy.Module):
19
+ def __init__(self):
20
+ super().__init__()
21
+ self.summarize = dspy.Predict(InitialResourceSummarySignature)
22
+
23
+ def forward(self, extracted_basedata_dict: Dict[str, str]):
24
+ # Convert dict to JSON string for the input field
25
+ json_input_str = json.dumps(extracted_basedata_dict, indent=2)
26
+ prediction = self.summarize(resource_excerpts_json=json_input_str)
27
+ return prediction.summary_report # Means Return Output and There is
28
+
29
+
30
+
31
+ class DynamicResourceSummarizerModule(dspy.Module):
32
+ def __init__(self):
33
+ super().__init__()
34
+ # Using Predict, as the task is to generate a structured string based on clear instructions.
35
+ # If formatting is tricky, ChainOfThought could be an alternative.
36
+ self.generate_json_summary = dspy.Predict(DynamicSummarizationSignature)
37
+
38
+ def forward(self,
39
+ resource_content: str,
40
+ resource_identifier: str,
41
+ conversation_history_str: str, # Takes the list of dicts
42
+ max_length: int = 100000 # Consistent with your original function
43
+ ) -> Optional[Dict[str, Any]]: # Returns a Python dict or None
44
+
45
+ if not resource_content.strip():
46
+ print(f"[DynamicResourceSummarizerModule] Skipping empty resource: {resource_identifier}")
47
+ return None
48
+
49
+ truncated_content = resource_content[:max_length]
50
+ if len(resource_content) > max_length:
51
+ print(f"[DynamicResourceSummarizerModule] INFO: Resource '{resource_identifier}' truncated to {max_length} chars.")
52
+
53
+ # Format conversation history for the signature's input field
54
+
55
+ try:
56
+ # Call the DSPy Predictor
57
+ prediction = self.generate_json_summary(
58
+ conversation_history_str=conversation_history_str,
59
+ resource_identifier_str=resource_identifier,
60
+ learning_material_excerpt_str=truncated_content
61
+ )
62
+ raw_json_string_output = prediction.json_summary_str
63
+
64
+ # Parse the JSON string output from the LLM
65
+ # (Similar parsing logic as in your original summarize_single_resource_dynamically)
66
+ cleaned_json_str = raw_json_string_output.strip()
67
+ if cleaned_json_str.startswith("```json"):
68
+ cleaned_json_str = cleaned_json_str[len("```json"):]
69
+ elif cleaned_json_str.startswith("```"):
70
+ cleaned_json_str = cleaned_json_str[len("```"):]
71
+ if cleaned_json_str.endswith("```"):
72
+ cleaned_json_str = cleaned_json_str[:-len("```")]
73
+ cleaned_json_str = cleaned_json_str.strip()
74
+ print("1")
75
+ print(cleaned_json_str)
76
+
77
+ if not cleaned_json_str:
78
+ print(f"WARN [DynamicResourceSummarizerModule]: LLM returned empty string for JSON summary for '{resource_identifier}'.")
79
+ return {"resource_identifier": resource_identifier, "raw_summary_text": raw_json_string_output, "is_fallback": True, "error": "Empty JSON string"}
80
+
81
+ try:
82
+ summary_data_dict = json.loads(cleaned_json_str)
83
+ if isinstance(summary_data_dict, dict) and "resource_identifier" in summary_data_dict:
84
+ return summary_data_dict # Success!
85
+ else:
86
+ print(f"WARN [DynamicResourceSummarizerModule]: For '{resource_identifier}', LLM produced non-standard JSON structure after cleaning. Output: {raw_json_string_output[:200]}...")
87
+ return {"resource_identifier": resource_identifier, "raw_summary_text": raw_json_string_output, "is_fallback": True, "error": "Non-standard JSON structure"}
88
+ except json.JSONDecodeError:
89
+ print(f"WARN [DynamicResourceSummarizerModule]: Could not parse JSON from LLM summary for '{resource_identifier}'. Raw output: {raw_json_string_output[:200]}...")
90
+ return {"resource_identifier": resource_identifier, "raw_summary_text": raw_json_string_output, "is_fallback": True, "error": "JSONDecodeError"}
91
+
92
+ except Exception as e:
93
+ print(f"ERROR [DynamicResourceSummarizerModule]: Unexpected error during summarization for '{resource_identifier}': {e}")
94
+ import traceback
95
+ traceback.print_exc()
96
+ return {"resource_identifier": resource_identifier, "raw_summary_text": str(e), "is_fallback": True, "error": str(type(e).__name__)}
97
+ class SyllabusGeneratorRouter(dspy.Module):
98
+ def __init__(self):
99
+ super().__init__()
100
+ # Use ChainOfThought for potentially better structured output for syllabus generation
101
+ self.gen_no_resources = dspy.Predict(SyllabusNoResourcesSignature)
102
+ self.gen_with_raw = dspy.Predict(SyllabusWithRawTextSignature)
103
+ self.gen_with_summaries = dspy.Predict(SyllabusWithSummariesSignature)
104
+
105
+ def forward(self,
106
+ conversation_history_str: str,
107
+ #task_description: str,
108
+ resource_type: str, # "NONE", "RAW_TEXT", "SUMMARIES"
109
+ resource_content: Optional[str] = None, # Actual raw text or JSON summaries string
110
+ # existing_syllabus_xml: Optional[str] = None Not needed
111
+ ) -> str: # Returns the syllabus_xml string
112
+
113
+ common_args = {
114
+ "learning_conversation": conversation_history_str,
115
+ #"task_description": #task_description,
116
+ # "existing_syllabus_xml": existing_syllabus_xml if existing_syllabus_xml else "None"
117
+ }
118
+
119
+ if resource_type == "NONE":
120
+ prediction = self.gen_no_resources(**common_args)
121
+
122
+ elif resource_type == "RAW_TEXT":
123
+ if not resource_content: raise ValueError("resource_content needed for RAW_TEXT type")
124
+ prediction = self.gen_with_raw(raw_resource_excerpts_json=resource_content, **common_args)
125
+ # prediction = await self.gen_with_raw.call(raw_resource_excerpts=resource_content, **common_args)
126
+ elif resource_type == "SUMMARIES":
127
+ if not resource_content: raise ValueError("resource_content needed for SUMMARIES type (should be JSON string)")
128
+ prediction = self.gen_with_summaries(resource_summaries_json=resource_content, **common_args)
129
+ else:
130
+ raise ValueError(f"Unknown resource_type: {resource_type}")
131
+
132
+ # Post-process to ensure <syllabus> tags, as in your previous SyllabusGenerator
133
+ content = prediction.syllabus_xml.strip()
134
+ if not content.lower().startswith("<syllabus>"):
135
+ content = f"<syllabus>\n{content}"
136
+ if not content.lower().endswith("</syllabus>"):
137
+ content = f"{content}\n</syllabus>"
138
+ return content
139
+
140
+ class ConversationManager(dspy.Module):
141
+ def __init__(self):
142
+ super().__init__()
143
+ # Using Predict as the Signature is now quite detailed.
144
+ # If the LLM struggles to follow the conditional logic for display_text,
145
+ # ChainOfThought might be needed, or more explicit examples in the Signature.
146
+ self.manage = dspy.Predict(SyllabusNegotiationSignature)
147
+
148
+ def forward(self, conversation_history_str: str, current_syllabus_xml: str, user_input: str):
149
+ # The user_input is the latest turn, but the full context is in conversation_history.
150
+ # The Signature is designed to look at the user_input in context of the whole history.
151
+ prediction = self.manage(
152
+ conversation_history_str=conversation_history_str,
153
+ current_syllabus_xml=current_syllabus_xml,
154
+ user_input=user_input, # Pass the latest user input specifically
155
+ # resource_summary=resource_summary
156
+ )
157
+
158
+ action = prediction.action_code.strip().upper()
159
+ text_to_display = prediction.display_text.strip()
160
+
161
+ # Enforce display_text rules based on the Signature's instructions
162
+ if action in ["GENERATE", "MODIFY", "FINALIZE"]:
163
+ if text_to_display and text_to_display.upper() != "[NO_DISPLAY_TEXT]":
164
+ print(f"[ConversationManager WARNING] Action '{action}' returned with display_text: '{text_to_display}'. Forcing to empty as per rules.")
165
+ text_to_display = "" # Enforce empty
166
+ elif text_to_display.upper() == "[NO_DISPLAY_TEXT]":
167
+ text_to_display = ""
168
+
169
+ # For PERSONA, allow brief confirmation or empty. If it's placeholder, make empty.
170
+ if action == "PERSONA" and text_to_display.upper() == "[NO_DISPLAY_TEXT]":
171
+ text_to_display = ""
172
+
173
+ return action, text_to_display
174
+
175
+ class LearningStyleQuestioner(dspy.Module):
176
+ def __init__(self):
177
+ super().__init__()
178
+ self.ask = dspy.Predict(LearningStyleSignature)
179
+
180
+ def forward(self, conversation_history_str: str):
181
+ prediction = self.ask(conversation_history_with_final_syllabus=conversation_history_str)
182
+ return prediction.question_to_user
183
+
184
+
185
+ class PersonaPromptGenerator(dspy.Module):
186
+ def __init__(self):
187
+ super().__init__()
188
+ # Switched to dspy.Predict with the new signature
189
+ self.generate_prompt_body = dspy.Predict(PersonaPromptBodyPredictSignature)
190
+
191
+ def forward(self,conversation_history_str: str):
192
+ try:
193
+ # Call the dspy.Predict instance
194
+ prediction_object = self.generate_prompt_body(
195
+ conversation_history_with_style_and_syllabus_context=conversation_history_str
196
+ )
197
+
198
+ prompt_body = prediction_object.prompt_body_text
199
+
200
+ if not prompt_body or not prompt_body.strip():
201
+ print("[PersonaPromptGenerator] Error: LLM returned an empty or whitespace-only prompt body.")
202
+ return None # Or a default fallback string
203
+
204
+ return prompt_body.strip() # Return the generated text
205
+
206
+ except Exception as e:
207
+ print(f"[PersonaPromptGenerator] Error in forward pass: {e}")
208
+ import traceback
209
+ traceback.print_exc()
210
+ return None # Or a default fallback string
211
+
212
+
213
+ class ExplainerModule(dspy.Module): # Renamed for clarity
214
+ def __init__(self):
215
+ super().__init__()
216
+ self.explain = dspy.Predict(GenericInteractionSignature)
217
+
218
+ def forward(self, system_instructions_str: str, history_str: str, user_query_str: str) -> str: # Made async
219
+ prediction = self.explain( # await predict
220
+ system_instructions=system_instructions_str,
221
+ history=history_str,
222
+ user_query=user_query_str
223
+ )
224
+ return prediction.response.strip()