File size: 10,983 Bytes
087a9e2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
# api/dspy_modules.py
import dspy
import json
import logging
from typing import Optional, Dict, Any, List
from dspy_signatures import (
InitialResourceSummarySignature, DynamicSummarizationSignature,
SyllabusNoResourcesSignature, SyllabusWithRawTextSignature, SyllabusWithSummariesSignature,
SyllabusNegotiationSignature, LearningStyleSignature, PersonaPromptBodyPredictSignature,
GenericInteractionSignature
)
logger = logging.getLogger(__name__)
class InitialResourceSummarizer(dspy.Module):
def __init__(self):
super().__init__()
self.summarize = dspy.Predict(InitialResourceSummarySignature)
def forward(self, extracted_basedata_dict: Dict[str, str]):
# Convert dict to JSON string for the input field
json_input_str = json.dumps(extracted_basedata_dict, indent=2)
prediction = self.summarize(resource_excerpts_json=json_input_str)
return prediction.summary_report # Means Return Output and There is
class DynamicResourceSummarizerModule(dspy.Module):
def __init__(self):
super().__init__()
# Using Predict, as the task is to generate a structured string based on clear instructions.
# If formatting is tricky, ChainOfThought could be an alternative.
self.generate_json_summary = dspy.Predict(DynamicSummarizationSignature)
def forward(self,
resource_content: str,
resource_identifier: str,
conversation_history_str: str, # Takes the list of dicts
max_length: int = 100000 # Consistent with your original function
) -> Optional[Dict[str, Any]]: # Returns a Python dict or None
if not resource_content.strip():
print(f"[DynamicResourceSummarizerModule] Skipping empty resource: {resource_identifier}")
return None
truncated_content = resource_content[:max_length]
if len(resource_content) > max_length:
print(f"[DynamicResourceSummarizerModule] INFO: Resource '{resource_identifier}' truncated to {max_length} chars.")
# Format conversation history for the signature's input field
try:
# Call the DSPy Predictor
prediction = self.generate_json_summary(
conversation_history_str=conversation_history_str,
resource_identifier_str=resource_identifier,
learning_material_excerpt_str=truncated_content
)
raw_json_string_output = prediction.json_summary_str
# Parse the JSON string output from the LLM
# (Similar parsing logic as in your original summarize_single_resource_dynamically)
cleaned_json_str = raw_json_string_output.strip()
if cleaned_json_str.startswith("```json"):
cleaned_json_str = cleaned_json_str[len("```json"):]
elif cleaned_json_str.startswith("```"):
cleaned_json_str = cleaned_json_str[len("```"):]
if cleaned_json_str.endswith("```"):
cleaned_json_str = cleaned_json_str[:-len("```")]
cleaned_json_str = cleaned_json_str.strip()
print("1")
print(cleaned_json_str)
if not cleaned_json_str:
print(f"WARN [DynamicResourceSummarizerModule]: LLM returned empty string for JSON summary for '{resource_identifier}'.")
return {"resource_identifier": resource_identifier, "raw_summary_text": raw_json_string_output, "is_fallback": True, "error": "Empty JSON string"}
try:
summary_data_dict = json.loads(cleaned_json_str)
if isinstance(summary_data_dict, dict) and "resource_identifier" in summary_data_dict:
return summary_data_dict # Success!
else:
print(f"WARN [DynamicResourceSummarizerModule]: For '{resource_identifier}', LLM produced non-standard JSON structure after cleaning. Output: {raw_json_string_output[:200]}...")
return {"resource_identifier": resource_identifier, "raw_summary_text": raw_json_string_output, "is_fallback": True, "error": "Non-standard JSON structure"}
except json.JSONDecodeError:
print(f"WARN [DynamicResourceSummarizerModule]: Could not parse JSON from LLM summary for '{resource_identifier}'. Raw output: {raw_json_string_output[:200]}...")
return {"resource_identifier": resource_identifier, "raw_summary_text": raw_json_string_output, "is_fallback": True, "error": "JSONDecodeError"}
except Exception as e:
print(f"ERROR [DynamicResourceSummarizerModule]: Unexpected error during summarization for '{resource_identifier}': {e}")
import traceback
traceback.print_exc()
return {"resource_identifier": resource_identifier, "raw_summary_text": str(e), "is_fallback": True, "error": str(type(e).__name__)}
class SyllabusGeneratorRouter(dspy.Module):
def __init__(self):
super().__init__()
# Use ChainOfThought for potentially better structured output for syllabus generation
self.gen_no_resources = dspy.Predict(SyllabusNoResourcesSignature)
self.gen_with_raw = dspy.Predict(SyllabusWithRawTextSignature)
self.gen_with_summaries = dspy.Predict(SyllabusWithSummariesSignature)
def forward(self,
conversation_history_str: str,
#task_description: str,
resource_type: str, # "NONE", "RAW_TEXT", "SUMMARIES"
resource_content: Optional[str] = None, # Actual raw text or JSON summaries string
# existing_syllabus_xml: Optional[str] = None Not needed
) -> str: # Returns the syllabus_xml string
common_args = {
"learning_conversation": conversation_history_str,
#"task_description": #task_description,
# "existing_syllabus_xml": existing_syllabus_xml if existing_syllabus_xml else "None"
}
if resource_type == "NONE":
prediction = self.gen_no_resources(**common_args)
elif resource_type == "RAW_TEXT":
if not resource_content: raise ValueError("resource_content needed for RAW_TEXT type")
prediction = self.gen_with_raw(raw_resource_excerpts_json=resource_content, **common_args)
# prediction = await self.gen_with_raw.call(raw_resource_excerpts=resource_content, **common_args)
elif resource_type == "SUMMARIES":
if not resource_content: raise ValueError("resource_content needed for SUMMARIES type (should be JSON string)")
prediction = self.gen_with_summaries(resource_summaries_json=resource_content, **common_args)
else:
raise ValueError(f"Unknown resource_type: {resource_type}")
# Post-process to ensure <syllabus> tags, as in your previous SyllabusGenerator
content = prediction.syllabus_xml.strip()
if not content.lower().startswith("<syllabus>"):
content = f"<syllabus>\n{content}"
if not content.lower().endswith("</syllabus>"):
content = f"{content}\n</syllabus>"
return content
class ConversationManager(dspy.Module):
def __init__(self):
super().__init__()
# Using Predict as the Signature is now quite detailed.
# If the LLM struggles to follow the conditional logic for display_text,
# ChainOfThought might be needed, or more explicit examples in the Signature.
self.manage = dspy.Predict(SyllabusNegotiationSignature)
def forward(self, conversation_history_str: str, current_syllabus_xml: str, user_input: str):
# The user_input is the latest turn, but the full context is in conversation_history.
# The Signature is designed to look at the user_input in context of the whole history.
prediction = self.manage(
conversation_history_str=conversation_history_str,
current_syllabus_xml=current_syllabus_xml,
user_input=user_input, # Pass the latest user input specifically
# resource_summary=resource_summary
)
action = prediction.action_code.strip().upper()
text_to_display = prediction.display_text.strip()
# Enforce display_text rules based on the Signature's instructions
if action in ["GENERATE", "MODIFY", "FINALIZE"]:
if text_to_display and text_to_display.upper() != "[NO_DISPLAY_TEXT]":
print(f"[ConversationManager WARNING] Action '{action}' returned with display_text: '{text_to_display}'. Forcing to empty as per rules.")
text_to_display = "" # Enforce empty
elif text_to_display.upper() == "[NO_DISPLAY_TEXT]":
text_to_display = ""
# For PERSONA, allow brief confirmation or empty. If it's placeholder, make empty.
if action == "PERSONA" and text_to_display.upper() == "[NO_DISPLAY_TEXT]":
text_to_display = ""
return action, text_to_display
class LearningStyleQuestioner(dspy.Module):
def __init__(self):
super().__init__()
self.ask = dspy.Predict(LearningStyleSignature)
def forward(self, conversation_history_str: str):
prediction = self.ask(conversation_history_with_final_syllabus=conversation_history_str)
return prediction.question_to_user
class PersonaPromptGenerator(dspy.Module):
def __init__(self):
super().__init__()
# Switched to dspy.Predict with the new signature
self.generate_prompt_body = dspy.Predict(PersonaPromptBodyPredictSignature)
def forward(self,conversation_history_str: str):
try:
# Call the dspy.Predict instance
prediction_object = self.generate_prompt_body(
conversation_history_with_style_and_syllabus_context=conversation_history_str
)
prompt_body = prediction_object.prompt_body_text
if not prompt_body or not prompt_body.strip():
print("[PersonaPromptGenerator] Error: LLM returned an empty or whitespace-only prompt body.")
return None # Or a default fallback string
return prompt_body.strip() # Return the generated text
except Exception as e:
print(f"[PersonaPromptGenerator] Error in forward pass: {e}")
import traceback
traceback.print_exc()
return None # Or a default fallback string
class ExplainerModule(dspy.Module): # Renamed for clarity
def __init__(self):
super().__init__()
self.explain = dspy.Predict(GenericInteractionSignature)
def forward(self, system_instructions_str: str, history_str: str, user_query_str: str) -> str: # Made async
prediction = self.explain( # await predict
system_instructions=system_instructions_str,
history=history_str,
user_query=user_query_str
)
return prediction.response.strip() |