File size: 10,983 Bytes
087a9e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
# api/dspy_modules.py
import dspy
import json
import logging
from typing import Optional, Dict, Any, List

from dspy_signatures import (
    InitialResourceSummarySignature, DynamicSummarizationSignature,
    SyllabusNoResourcesSignature, SyllabusWithRawTextSignature, SyllabusWithSummariesSignature,
    SyllabusNegotiationSignature, LearningStyleSignature, PersonaPromptBodyPredictSignature,
    GenericInteractionSignature
)


logger = logging.getLogger(__name__)


class InitialResourceSummarizer(dspy.Module):
    def __init__(self):
        super().__init__()
        self.summarize = dspy.Predict(InitialResourceSummarySignature)

    def forward(self, extracted_basedata_dict: Dict[str, str]):
        # Convert dict to JSON string for the input field
        json_input_str = json.dumps(extracted_basedata_dict, indent=2)
        prediction =  self.summarize(resource_excerpts_json=json_input_str)
        return prediction.summary_report # Means Return Output and There is



class DynamicResourceSummarizerModule(dspy.Module):
    def __init__(self):
        super().__init__()
        # Using Predict, as the task is to generate a structured string based on clear instructions.
        # If formatting is tricky, ChainOfThought could be an alternative.
        self.generate_json_summary = dspy.Predict(DynamicSummarizationSignature)

    def forward(self,
                resource_content: str,
                resource_identifier: str,
                conversation_history_str: str, # Takes the list of dicts
                max_length: int = 100000 # Consistent with your original function
               ) -> Optional[Dict[str, Any]]: # Returns a Python dict or None

        if not resource_content.strip():
            print(f"[DynamicResourceSummarizerModule] Skipping empty resource: {resource_identifier}")
            return None

        truncated_content = resource_content[:max_length]
        if len(resource_content) > max_length:
            print(f"[DynamicResourceSummarizerModule] INFO: Resource '{resource_identifier}' truncated to {max_length} chars.")

        # Format conversation history for the signature's input field

        try:
            # Call the DSPy Predictor
            prediction =   self.generate_json_summary(
                conversation_history_str=conversation_history_str,
                resource_identifier_str=resource_identifier,
                learning_material_excerpt_str=truncated_content
            )
            raw_json_string_output = prediction.json_summary_str

            # Parse the JSON string output from the LLM
            # (Similar parsing logic as in your original summarize_single_resource_dynamically)
            cleaned_json_str = raw_json_string_output.strip()
            if cleaned_json_str.startswith("```json"):
                cleaned_json_str = cleaned_json_str[len("```json"):]
            elif cleaned_json_str.startswith("```"):
                cleaned_json_str = cleaned_json_str[len("```"):]
            if cleaned_json_str.endswith("```"):
                cleaned_json_str = cleaned_json_str[:-len("```")]
            cleaned_json_str = cleaned_json_str.strip()
            print("1")
            print(cleaned_json_str)

            if not cleaned_json_str:
                print(f"WARN [DynamicResourceSummarizerModule]: LLM returned empty string for JSON summary for '{resource_identifier}'.")
                return {"resource_identifier": resource_identifier, "raw_summary_text": raw_json_string_output, "is_fallback": True, "error": "Empty JSON string"}

            try:
                summary_data_dict = json.loads(cleaned_json_str)
                if isinstance(summary_data_dict, dict) and "resource_identifier" in summary_data_dict:
                    return summary_data_dict # Success!
                else:
                    print(f"WARN [DynamicResourceSummarizerModule]: For '{resource_identifier}', LLM produced non-standard JSON structure after cleaning. Output: {raw_json_string_output[:200]}...")
                    return {"resource_identifier": resource_identifier, "raw_summary_text": raw_json_string_output, "is_fallback": True, "error": "Non-standard JSON structure"}
            except json.JSONDecodeError:
                print(f"WARN [DynamicResourceSummarizerModule]: Could not parse JSON from LLM summary for '{resource_identifier}'. Raw output: {raw_json_string_output[:200]}...")
                return {"resource_identifier": resource_identifier, "raw_summary_text": raw_json_string_output, "is_fallback": True, "error": "JSONDecodeError"}

        except Exception as e:
            print(f"ERROR [DynamicResourceSummarizerModule]: Unexpected error during summarization for '{resource_identifier}': {e}")
            import traceback
            traceback.print_exc()
            return {"resource_identifier": resource_identifier, "raw_summary_text": str(e), "is_fallback": True, "error": str(type(e).__name__)}
class SyllabusGeneratorRouter(dspy.Module):
    def __init__(self):
        super().__init__()
        # Use ChainOfThought for potentially better structured output for syllabus generation
        self.gen_no_resources = dspy.Predict(SyllabusNoResourcesSignature)
        self.gen_with_raw = dspy.Predict(SyllabusWithRawTextSignature)
        self.gen_with_summaries = dspy.Predict(SyllabusWithSummariesSignature)

    def forward(self,
                conversation_history_str: str,
                #task_description: str,
                resource_type: str, # "NONE", "RAW_TEXT", "SUMMARIES"
                resource_content: Optional[str] = None, # Actual raw text or JSON summaries string
                # existing_syllabus_xml: Optional[str] = None Not needed
               ) -> str: # Returns the syllabus_xml string

        common_args = {
            "learning_conversation": conversation_history_str,
            #"task_description": #task_description,
            # "existing_syllabus_xml": existing_syllabus_xml if existing_syllabus_xml else "None"
        }

        if resource_type == "NONE":
            prediction =  self.gen_no_resources(**common_args)
        
        elif resource_type == "RAW_TEXT":
            if not resource_content: raise ValueError("resource_content needed for RAW_TEXT type")
            prediction =  self.gen_with_raw(raw_resource_excerpts_json=resource_content, **common_args)
            # prediction = await self.gen_with_raw.call(raw_resource_excerpts=resource_content, **common_args)
        elif resource_type == "SUMMARIES":
            if not resource_content: raise ValueError("resource_content needed for SUMMARIES type (should be JSON string)")
            prediction =  self.gen_with_summaries(resource_summaries_json=resource_content, **common_args)
        else:
            raise ValueError(f"Unknown resource_type: {resource_type}")

        # Post-process to ensure <syllabus> tags, as in your previous SyllabusGenerator
        content = prediction.syllabus_xml.strip()
        if not content.lower().startswith("<syllabus>"):
            content = f"<syllabus>\n{content}"
        if not content.lower().endswith("</syllabus>"):
            content = f"{content}\n</syllabus>"
        return content

class ConversationManager(dspy.Module):
    def __init__(self):
        super().__init__()
        # Using Predict as the Signature is now quite detailed.
        # If the LLM struggles to follow the conditional logic for display_text,
        # ChainOfThought might be needed, or more explicit examples in the Signature.
        self.manage = dspy.Predict(SyllabusNegotiationSignature)

    def forward(self, conversation_history_str: str, current_syllabus_xml: str, user_input: str):
        # The user_input is the latest turn, but the full context is in conversation_history.
        # The Signature is designed to look at the user_input in context of the whole history.
        prediction =  self.manage(
            conversation_history_str=conversation_history_str,
            current_syllabus_xml=current_syllabus_xml,
            user_input=user_input, # Pass the latest user input specifically
            # resource_summary=resource_summary
        )

        action = prediction.action_code.strip().upper()
        text_to_display = prediction.display_text.strip()

        # Enforce display_text rules based on the Signature's instructions
        if action in ["GENERATE", "MODIFY", "FINALIZE"]:
            if text_to_display and text_to_display.upper() != "[NO_DISPLAY_TEXT]":
                print(f"[ConversationManager WARNING] Action '{action}' returned with display_text: '{text_to_display}'. Forcing to empty as per rules.")
            text_to_display = "" # Enforce empty
        elif text_to_display.upper() == "[NO_DISPLAY_TEXT]":
            text_to_display = ""

        # For PERSONA, allow brief confirmation or empty. If it's placeholder, make empty.
        if action == "PERSONA" and text_to_display.upper() == "[NO_DISPLAY_TEXT]":
            text_to_display = ""

        return action, text_to_display
    
class LearningStyleQuestioner(dspy.Module):
    def __init__(self):
        super().__init__()
        self.ask = dspy.Predict(LearningStyleSignature)

    def forward(self, conversation_history_str: str):
        prediction =  self.ask(conversation_history_with_final_syllabus=conversation_history_str)
        return prediction.question_to_user


class PersonaPromptGenerator(dspy.Module):
    def __init__(self):
        super().__init__()
        # Switched to dspy.Predict with the new signature
        self.generate_prompt_body = dspy.Predict(PersonaPromptBodyPredictSignature)

    def forward(self,conversation_history_str: str):
      try:
        # Call the dspy.Predict instance
        prediction_object =  self.generate_prompt_body(
            conversation_history_with_style_and_syllabus_context=conversation_history_str
        )

        prompt_body = prediction_object.prompt_body_text

        if not prompt_body or not prompt_body.strip():
            print("[PersonaPromptGenerator] Error: LLM returned an empty or whitespace-only prompt body.")
            return None # Or a default fallback string

        return prompt_body.strip() # Return the generated text

      except Exception as e:
        print(f"[PersonaPromptGenerator] Error in forward pass: {e}")
        import traceback
        traceback.print_exc()
        return None # Or a default fallback string


class ExplainerModule(dspy.Module): # Renamed for clarity
    def __init__(self):
        super().__init__()
        self.explain = dspy.Predict(GenericInteractionSignature)

    def forward(self, system_instructions_str: str, history_str: str, user_query_str: str) -> str: # Made async
        prediction =   self.explain( # await predict
            system_instructions=system_instructions_str,
            history=history_str,
            user_query=user_query_str
        )
        return prediction.response.strip()