architojha commited on
Commit
4067b64
·
1 Parent(s): 9c6dd31

adding files

Browse files
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ /.env
2
+ poetry.lock
3
+ __pycache__
4
+ storage/
5
+ storage
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+ CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "7860"]
config.yaml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ userPreference:
2
+ llmService: 'openai' # 'ollama' or 'openai' or 'groq'
3
+ ModuleList: [
4
+ 'IngestData',
5
+ 'AugmentData',
6
+ 'GenerateData',
7
+ 'SearchData',
8
+ 'Train',
9
+ 'Evaluate',
10
+ 'TriggerDeployment',
11
+ 'ComparePerformance']
12
+
13
+ OpenAIConfig:
14
+ llm: "gpt-4o-mini"
15
+ embeddingModel: "text-embedding-3-small"
16
+
17
+ OllamaConfig:
18
+ llm: "llama3.2:latest"
19
+ baseURL: "http://localhost:11434"
20
+
21
+ GroqConfig:
22
+ llm: 'llama-3.3-70b-versatile'
23
+ hfEmbedding: "BAAI/bge-small-en-v1.5"
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python>=3.11,<3.12
2
+ llama-index==0.12.8
3
+ llama-index-core
4
+ llama-index-llms-openai==0.3.12
5
+ llama-index-utils-workflow==0.3.0
6
+ fastapi==0.115.6
7
+ uvicorn
8
+ python-dotenv==1.0.1
9
+ pydantic==2.10.4
10
+ pydantic-settings==2.7.0
11
+ pyyaml==6.0.2
12
+ llama-index-llms-ollama==0.5.0
13
+ llama-index-llms-groq==0.3.1
14
+ llama-index-embeddings-huggingface==0.4.0
15
+ llama-index-embeddings-ollama==0.5.0
16
+ phidata==2.7.7
17
+ groq==0.14.0
18
+ duckduckgo-search==7.2.1
src/__init__.py ADDED
File without changes
src/main.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from src.routers.discovery import router as interview_router
3
+ from src.routers.context import router as context_router
4
+ from src.routers.graph import router as graph_router
5
+ from src.routers.analysis import router as analysis_router
6
+ from src.routers.analyze_generate_graph import router as analyze_generate_graph_router
7
+ from src.utils.settings import settings
8
+
9
+ app = FastAPI(title="Franky Workflows for User Intent Recognition")
10
+
11
+ app.include_router(interview_router, prefix="/api/intent")
12
+ app.include_router(context_router, prefix="/api/intent")
13
+ app.include_router(graph_router, prefix="/api/graph")
14
+ app.include_router(analysis_router, prefix="/api/ml-analysis")
15
+ app.include_router(analyze_generate_graph_router, prefix="/api/analyze-generate-graph")
16
+
17
+
18
+ @app.get("/")
19
+ async def read_root():
20
+ return "Franky Workflows are up!"
src/models/__init__.py ADDED
File without changes
src/models/analysis_models.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Iterator, List, Optional
2
+ from enum import Enum
3
+ from pydantic import BaseModel, Field
4
+
5
+
6
+ class InputModel(BaseModel):
7
+ problem_statement: str = Field(
8
+ default=None,
9
+ description="Contains the description of the problem statement or task"
10
+ )
11
+
12
+ class MLTaskType(str, Enum):
13
+ CLASSIFICATION = "classification"
14
+ REGRESSION = "regression"
15
+ CLUSTERING = "clustering"
16
+ NLP = "natural_language_processing"
17
+ COMPUTER_VISION = "computer_vision"
18
+ TIME_SERIES = "time_series"
19
+ ANOMALY_DETECTION = "anomaly_detection"
20
+ RECOMMENDATION = "recommendation"
21
+ OTHER = "other"
22
+
23
+
24
+ class ModelResponseStatus(BaseModel):
25
+ """Technical specification for ML implementation"""
26
+ data_source: str = Field(
27
+ # default="...",
28
+ description="Required data sources and their characteristics"
29
+ )
30
+ data_format: str = Field(
31
+ # default="...",
32
+ description="Expected format of input data"
33
+ )
34
+ additional_data_requirement: bool = Field(
35
+ # default=False,
36
+ description="Whether additional data is needed"
37
+ )
38
+ constraints: str = Field(
39
+ # default="...",
40
+ description="Business and technical constraints"
41
+ )
42
+ task: MLTaskType = Field(
43
+ # default=MLTaskType.OTHER,
44
+ description="Type of ML task"
45
+ )
46
+ models: List[str] = Field(
47
+ # default=["..."],
48
+ description="Suggested ML models"
49
+ )
50
+ hyperparameters: List[str] = Field(
51
+ # default=["..."],
52
+ description="Key hyperparameters to consider"
53
+ )
54
+ eval_metrics: List[str] = Field(
55
+ # default=["..."],
56
+ description="Evaluation metrics for the solution"
57
+ )
58
+ technical_requirements: str = Field(
59
+ # default="...",
60
+ description="Technical implementation requirements"
61
+ )
62
+
63
+
64
+ class RequirementsAnalysis(BaseModel):
65
+ """Initial analysis of business requirements"""
66
+ model_response: ModelResponseStatus
67
+ unclear_points: List[str] = Field(
68
+ default_factory=list,
69
+ description="Points needing clarification"
70
+ )
71
+ search_queries: List[str] = Field(
72
+ default_factory=list,
73
+ description="Topics to research"
74
+ )
75
+ business_understanding: str = Field(
76
+ description="Summary of business problem understanding"
77
+ )
78
+
79
+
80
+ class TechnicalResearch(BaseModel):
81
+ """Results from technical research"""
82
+ model_response: ModelResponseStatus
83
+ research_findings: str = Field(
84
+ description="Key findings from research"
85
+ )
86
+ reference_implementations: List[str] = Field(
87
+ default_factory=list,
88
+ description="Similar implementation examples found"
89
+ )
90
+ sources: List[str] = Field(
91
+ default_factory=list,
92
+ description="Sources of information"
93
+ )
94
+
95
+
96
+ # Implementation Planning Models
97
+ class ComponentType(str, Enum):
98
+ DATA_PIPELINE = "data_pipeline"
99
+ PREPROCESSOR = "preprocessor"
100
+ MODEL = "model"
101
+ EVALUATOR = "evaluator"
102
+ INFERENCE = "inference"
103
+ MONITORING = "monitoring"
104
+ UTILITY = "utility"
105
+
106
+
107
+ class ParameterSpec(BaseModel):
108
+ """Specification for a single parameter"""
109
+ name: str = Field(description="Name of the parameter")
110
+ param_type: str = Field(description="Type of the parameter")
111
+ description: str = Field(description="Description of the parameter")
112
+ default_value: str = Field(description="Default value if any")
113
+ required: bool = Field(description="Whether the parameter is required")
114
+
115
+
116
+ class ConfigParam(BaseModel):
117
+ """Specification for a configuration parameter"""
118
+ name: str = Field(description="Name of the configuration parameter")
119
+ value_type: str = Field(description="Type of value expected")
120
+ description: str = Field(description="Description of the configuration parameter")
121
+ default: str = Field(description="Default value if any")
122
+
123
+
124
+ class FunctionSpec(BaseModel):
125
+ """Detailed specification for a single function"""
126
+ name: str = Field(description="Name of the function")
127
+ description: str = Field(description="Detailed description of function's purpose")
128
+ input_params: List[ParameterSpec] = Field(
129
+ description="List of input parameters and their specifications"
130
+ )
131
+ return_type: str = Field(description="Return type and description")
132
+ dependencies: List[str] = Field(
133
+ description="Required dependencies/imports"
134
+ )
135
+ error_handling: List[str] = Field(
136
+ description="Expected errors and handling strategies"
137
+ )
138
+
139
+
140
+ class ComponentSpec(BaseModel):
141
+ """Specification for a component (module) of the system"""
142
+ name: str = Field(description="Name of the component")
143
+ type: ComponentType = Field(description="Type of component")
144
+ description: str = Field(description="Detailed description of component's purpose")
145
+ functions: List[FunctionSpec] = Field(description="Functions within this component")
146
+ dependencies: List[str] = Field(
147
+ description="External package dependencies"
148
+ )
149
+ config_params: List[ConfigParam] = Field(
150
+ description="Configuration parameters needed"
151
+ )
152
+
153
+
154
+ class ImplementationPlan(BaseModel):
155
+ """Complete implementation plan for the ML system"""
156
+ components: List[ComponentSpec] = Field(description="System components")
157
+ system_requirements: List[str] = Field(
158
+ description="System-level requirements and dependencies"
159
+ )
160
+ deployment_notes: str = Field(
161
+ description="Notes on deployment and infrastructure"
162
+ )
163
+ testing_strategy: str = Field(
164
+ description="Strategy for testing components"
165
+ )
166
+ implementation_order: List[str] = Field(
167
+ description="Suggested order of implementation"
168
+ )
src/models/context_events.py ADDED
File without changes
src/models/discovery_events.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index.core.workflow import Event
2
+
3
+
4
+ class GetModulesEvent(Event):
5
+ """
6
+ Event to get modules. Outputs accepted modules for the task.
7
+ """
8
+ task: str
9
+ modules: str
10
+
11
+
12
+ class RefineModulesEvent(Event):
13
+ """
14
+ Event to refine modules. Outputs refined and adapted modules.
15
+ """
16
+ task: str
17
+ refined_modules: str
18
+
19
+
20
+ class ReasoningStructureEvent(Event):
21
+ """
22
+ Event to create reasoning structure. Outputs final reasoning structure.
23
+ """
24
+ task: str
25
+ reasoning_structure: str
26
+
27
+
28
+ # TODO: Add JudgeEvent(Event) here which analyses context, judges if requirements complete,
29
+ # and emits either loop or StopEvent.
src/models/schemas.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from pydantic import BaseModel
3
+
4
+
5
+ class WorkflowSchema(BaseModel):
6
+ data_source: str
7
+ data_format: str
8
+ additional_data_requirement: bool
9
+ constraints: str
10
+ available_preprocess_script: bool
11
+ preprocess_script: str
12
+ recommended_preprocess_steps: List[str]
13
+ task: str
14
+ models: List[str]
15
+ hyperparameters: List[str]
16
+ eval_metrics: List[str]
17
+ deploy_constraints: str
18
+
19
+
20
+ class IntentFilesSchema(BaseModel):
21
+ context: str
22
+ open_questions: List[str]
23
+ data_source: str
24
+ data_format: str
25
+ additional_data_requirement: bool
26
+ constraints: str
27
+ available_preprocess_script: bool
28
+ preprocess_script: str
29
+ recommended_preprocess_steps: List[str]
30
+ task: str
31
+ models: List[str]
32
+ hyperparameters: List[str]
33
+ eval_metrics: List[str]
34
+ deploy_constraints: str
35
+
36
+
37
+ class IntentInterviewsSchema(BaseModel):
38
+ context: str
39
+ completion_status: bool
40
+ result: str
41
+ data_source: str
42
+ data_format: str
43
+ additional_data_requirement: bool
44
+ constraints: str
45
+ available_preprocess_script: bool
46
+ preprocess_script: str
47
+ recommended_preprocess_steps: List[str]
48
+ task: str
49
+ models: List[str]
50
+ hyperparameters: List[str]
51
+ eval_metrics: List[str]
52
+ deploy_constraints: str
53
+
54
+
55
+ class IntentRequestData(BaseModel):
56
+ query: str
57
+ context: str
58
+ count: int
59
+ complete: bool
60
+
61
+
62
+ class IntentResponseData(BaseModel):
63
+ context: str
64
+ result: str
65
+ count: int
66
+ complete: bool
src/models/workflow_graph.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index.core.workflow import Event
2
+ from pydantic import BaseModel
3
+ from typing import List
4
+
5
+
6
+ class Edge(BaseModel):
7
+ source: str
8
+ target: str
9
+ desc: str
10
+
11
+
12
+ class Node(BaseModel):
13
+ node_id: str
14
+ name: str
15
+
16
+
17
+ class Graph(BaseModel):
18
+ nodes: List[Node]
19
+ edges: List[Edge]
20
+
21
+
22
+ class GetResponseEvent(Event):
23
+ project_details: str
24
+ rawResponse: str
25
+
26
+
27
+ class ConstructGraphEvent(Event):
28
+ workflowGraph: Graph
29
+
30
+
31
+ class GraphInputSchema(BaseModel):
32
+ desc: str
33
+
34
+
35
+ class GraphOutputSchema(BaseModel):
36
+ graph: Graph
src/routers/__init__.py ADDED
File without changes
src/routers/analysis.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Response
2
+ from src.models.analysis_models import InputModel
3
+ from src.workflows.analysis_workflow import MLAnalysisWorkflow, MLImplementationPlanner
4
+ from datetime import datetime
5
+ from phi.storage.workflow.sqlite import SqlWorkflowStorage
6
+ from phi.utils.pprint import pprint_run_response
7
+ from phi.utils.log import logger
8
+ from typing import Iterator
9
+
10
+ router = APIRouter()
11
+
12
+ @router.get("/")
13
+ async def read_root():
14
+ return Response("Ml-Analysis workflow from user problem is Up!")
15
+
16
+ @router.post("/analyze-problem")
17
+ async def analyze_problem(data: InputModel):
18
+
19
+ analysis_workflow = MLAnalysisWorkflow(
20
+ session_id=f"ml-analysis-{datetime.now().strftime('%Y%m%d_%H%M%S')}",
21
+ storage=SqlWorkflowStorage(
22
+ table_name="ml_analysis_workflows",
23
+ db_file="storage/workflows.db"
24
+ )
25
+ )
26
+
27
+ analysis_response: Iterator[RunResponse] = analysis_workflow.run(data.problem_statement)
28
+
29
+ pprint_run_response(analysis_response, markdown=True)
30
+
31
+ requirements_result = analysis_workflow.requirements_analyst.run_response.content if analysis_workflow.requirements_analyst.run_response else None
32
+ research_result = analysis_workflow.technical_researcher.run_response.content if analysis_workflow.technical_researcher.run_response else None
33
+
34
+ if requirements_result:
35
+ logger.info("===Planning Phase===")
36
+ planning_workflow = MLImplementationPlanner(
37
+ session_id=f"ml-planning-{datetime.now().strftime('%Y%m%d_%H%M%S')}",
38
+ storage=SqlWorkflowStorage(
39
+ table_name="ml_planning_workflows",
40
+ db_file="storage/workflows.db"
41
+ )
42
+ )
43
+
44
+ # run and print planning workflow
45
+ planning_response_stream: Iterator[RunResponse] = planning_workflow.run(requirements_result, research_result)
46
+
47
+ pprint_run_response(planning_response_stream, markdown=True)
48
+
49
+ return {"Response": planning_workflow.writer.run_response.content}
50
+
51
+ else:
52
+ return {"Error": "Requirements analysis did not complete successfully."}
53
+
src/routers/analyze_generate_graph.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Response
2
+ from src.models.analysis_models import InputModel
3
+ from src.workflows.analysis_workflow import MLAnalysisWorkflow, MLImplementationPlanner
4
+ from datetime import datetime
5
+ from phi.storage.workflow.sqlite import SqlWorkflowStorage
6
+ from phi.utils.pprint import pprint_run_response
7
+ from phi.utils.log import logger
8
+ from typing import Iterator
9
+ from llama_index.core.settings import Settings
10
+ from src.models.workflow_graph import GraphInputSchema, GraphOutputSchema
11
+ from src.workflows.graph_workflow import DesignGraphWorkflow
12
+
13
+ router = APIRouter()
14
+
15
+ async def analyze_problem(problem_statement: str):
16
+
17
+ analysis_workflow = MLAnalysisWorkflow(
18
+ session_id=f"ml-analysis-{datetime.now().strftime('%Y%m%d_%H%M%S')}",
19
+ storage=SqlWorkflowStorage(
20
+ table_name="ml_analysis_workflows",
21
+ db_file="storage/workflows.db"
22
+ )
23
+ )
24
+
25
+ analysis_response: Iterator[RunResponse] = analysis_workflow.run(problem_statement)
26
+
27
+ pprint_run_response(analysis_response, markdown=True)
28
+
29
+ requirements_result = analysis_workflow.requirements_analyst.run_response.content if analysis_workflow.requirements_analyst.run_response else None
30
+ research_result = analysis_workflow.technical_researcher.run_response.content if analysis_workflow.technical_researcher.run_response else None
31
+
32
+ if requirements_result:
33
+ logger.info("===Planning Phase===")
34
+ planning_workflow = MLImplementationPlanner(
35
+ session_id=f"ml-planning-{datetime.now().strftime('%Y%m%d_%H%M%S')}",
36
+ storage=SqlWorkflowStorage(
37
+ table_name="ml_planning_workflows",
38
+ db_file="storage/workflows.db"
39
+ )
40
+ )
41
+
42
+ planning_response_stream: Iterator[RunResponse] = planning_workflow.run(requirements_result, research_result)
43
+
44
+ pprint_run_response(planning_response_stream, markdown=True)
45
+
46
+ return planning_workflow.writer.run_response.content
47
+
48
+ else:
49
+ return "Requirements analysis did not complete successfully."
50
+
51
+ @router.post("/", response_model=GraphOutputSchema)
52
+ async def analyzer_generate_graph(data: InputModel):
53
+
54
+ task_description = await analyze_problem(data.problem_statement)
55
+
56
+ try:
57
+ graph_workflow = DesignGraphWorkflow(timeout=60, verbose=True)
58
+ graph_result = await graph_workflow.run(_project_description=task_description, llm=Settings._llm)
59
+
60
+ return GraphOutputSchema(graph=graph_result)
61
+
62
+ except Exception as e:
63
+ return {"detail": f"Error processing {e}"}
src/routers/context.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from llama_index.core.settings import Settings
3
+
4
+ # configurations
5
+ router = APIRouter()
6
+
7
+ @router.get("/")
8
+ async def read_root():
9
+ return "Script Reader Workflow for User Intent from files is up!"
10
+
11
+
12
+ @router.post("/context/")
13
+ async def extract_code_context():
14
+ try:
15
+ response = ""
16
+ return response
17
+ except Exception as e:
18
+ return {"detail": f"Error processing {e}"}
src/routers/discovery.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from llama_index.core.settings import Settings
3
+ from src.models.schemas import IntentRequestData, IntentResponseData
4
+ from src.workflows.workflow_discovery import SelfDiscoverWorkflow, JudgeWorkflow
5
+ from src.workflows.reasoning_modules import ML_EXPERT_PROMPT_TEMPLATE
6
+
7
+ # configurations
8
+ router = APIRouter()
9
+
10
+
11
+ @router.get("/")
12
+ async def read_root():
13
+ return "Self-Discovery Workflow for User Intent Interview is up!"
14
+
15
+
16
+ @router.post("/interview/", response_model=IntentResponseData)
17
+ async def interview_user(data: IntentRequestData):
18
+ try:
19
+
20
+ interview_workflow = SelfDiscoverWorkflow()
21
+ task = ML_EXPERT_PROMPT_TEMPLATE.format(query=data.query)
22
+ workflow_handler = interview_workflow.run(task=task, llm=Settings._llm)
23
+ intermediate_result = await workflow_handler
24
+ context = await workflow_handler.ctx.get("workflow_result")
25
+
26
+ judge_workflow = JudgeWorkflow()
27
+ completion_status = await judge_workflow.run(judging_context=intermediate_result, llm=Settings._llm)
28
+
29
+ return IntentResponseData(context=str(context),
30
+ result=intermediate_result,
31
+ count=data.count + 1,
32
+ complete=completion_status)
33
+ except Exception as e:
34
+ return {"detail": f"Error processing {e}"}
src/routers/graph.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from llama_index.core.settings import Settings
3
+ from src.models.workflow_graph import GraphInputSchema, GraphOutputSchema
4
+ from src.workflows.graph_workflow import DesignGraphWorkflow
5
+
6
+ # configurations
7
+ router = APIRouter()
8
+
9
+ @router.get("/", response_model=GraphInputSchema)
10
+ async def read_root():
11
+ return {'desc':"Graph Creation Workflow on User Intent is up!"}
12
+
13
+
14
+ @router.post("/design/", response_model=GraphOutputSchema)
15
+ async def interview_user(data: GraphInputSchema):
16
+ try:
17
+
18
+ graph_workflow = DesignGraphWorkflow(timeout=60, verbose=True)
19
+ graph_result = await graph_workflow.run(_project_description=data.desc, llm=Settings._llm)
20
+
21
+ return GraphOutputSchema(graph=graph_result)
22
+
23
+ except Exception as e:
24
+ return {"detail": f"Error processing {e}"}
src/utils/helper.py ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from src.models.workflow_graph import Edge, Node, Graph
3
+
4
+ class HelperClass:
5
+
6
+ @staticmethod
7
+ def _build_prompt(project_desc: str, modules: list) -> str:
8
+ return f'''
9
+ You are an advanced AI tasked with constructing a directed graph/flow based on a set of available modules and a project description. Each module in the flow represents a node, and each edge defines the task connecting these nodes.
10
+ Your output should adhere strictly to the following rules: Dont give me any code and dont mention 'json' at the top of the response.
11
+ There should not be any extra output (even a single word) besides the output required.
12
+
13
+ The flow of nodes and tasks must be determined by analyzing the provided project description.
14
+ The modules chosen must form a complete pipeline suitable for the tasks in the project description.
15
+
16
+
17
+
18
+ -Steps-
19
+ 1. Parse the project description to identify the tasks and operations required to form the flow.
20
+ - For each task, determine which module (from the available list) best fits the task description.
21
+ - Assign a unique identifier to every instance of a module.
22
+ - Example: If the "Train" module is used twice for lets say training 2 different model, name them both Train with a unique id to both of them.
23
+ - For each identified node:
24
+ - Node ID: Generate a Unique identifier for the module instance (5 digit random string and integer combined and in lower case).
25
+ - Module Name: Name of the module from the available list.
26
+
27
+ Format each node as:
28
+ <unique Node ID>Module Name</unique Node ID>
29
+
30
+ 2. Construct Edges Between Nodes
31
+ - Determine the logical sequence of tasks from the project description.
32
+ - Identify source and target modules for each transition based on the task flow.
33
+ - The graph begins with a special "Start" node. Edges must connect from <Start> to the first module in the pipeline.
34
+ - For each connection, output the following information:
35
+ - Source Node: The unique ID of the starting module. ( Used the ids for each module generated in Step 1)
36
+ - Target Node: The unique ID of the destination module. ( Used the ids for each module generated in Step 1)
37
+ - Task Description: A short descripiton of what is happening during the transition.
38
+
39
+ Format each edge as:
40
+ <Edge index>( sourceNode="<Node ID>" | targetNode="<Node ID>" | task="<Task Description>" )<
41
+
42
+
43
+ ######################
44
+ -Examples-
45
+ ######################
46
+ Example 1:
47
+
48
+ Input: Project Description:
49
+
50
+ This project implements an automated quality control system for manufacturing using a modular machine learning pipeline. Data from high-resolution product images and metadata is ingested and augmented to enhance diversity and balance.
51
+ Task A trains a CNN for defect detection, while Task B trains a transformer-based model for quality classification. Both models are rigorously evaluated and compared against predefined benchmarks. Successful models are deployed for real-time defect monitoring and automated grading via integration with production and ERP systems.
52
+
53
+ Input: Available Modules:
54
+
55
+ ['IngestData',
56
+ 'AugmentData',
57
+ 'GenerateData',
58
+ 'SearchData',
59
+ 'Train',
60
+ 'Evaluate',
61
+ 'TriggerDeployment',
62
+ 'ComparePerformance']
63
+
64
+ -------------------------------------
65
+
66
+ Flow Generated by LLM: ( This will not be Input )
67
+
68
+ IngestData -> AugmentData
69
+ AugmentData -> Train (for Task A)
70
+ AugmentData -> Train (for Task B)
71
+ Train (model A) -> Evaluate (test model A)
72
+ Train (model B) -> Evaluate (test model B)
73
+ Evaluate (test model A) -> ComparePerformance
74
+ Evaluate (test model B) -> ComparePerformance
75
+ ComparePerformance -> TriggerDeployment
76
+
77
+
78
+ ################
79
+
80
+ Output:
81
+
82
+ <p83fd>IngestData</p83fd>
83
+ <sb9ba>AugmentData</sb9ba>
84
+ <bxt2w>Train A</bxt2w>
85
+ <d1ep3>Train B</d1ep3>
86
+ <b9lca>Evaluate A</b9lca>
87
+ <5w01f>Evaluate B</5w01f>
88
+ <z4bun>ComparePerformance</z4bun>
89
+ <zj2pb>TriggerDeployment</zj2pb>
90
+ <Edge 1>( sourceNode="<Start>" | targetNode="<p83fd>" | task = "From Start node to Ingesting Data" )</Edge 1>
91
+ <Edge 2>( sourceNode="<p83fd>" | targetNode="<sb9ba>" | task = "Ingesting Data to Augment Data" )</Edge 2>
92
+ <Edge 3>( sourceNode="<sb9ba>" | targetNode="<bxt2w>" | task = "Augmenting Data to Train model A" )</Edge 3>
93
+ <Edge 4>( sourceNode="<sb9ba>" | targetNode="<d1ep3>" | task = "Augmenting Data to Train model B" )</Edge 4>
94
+ <Edge 5>( sourceNode="<bxt2w>" | targetNode="<b9lca>" | task = "Training A to Evaluate model A" )</Edge 5>
95
+ <Edge 6>( sourceNode="<d1ep3>" | targetNode="<5w01f>" | task = "Training B to Evaluate model B" )</Edge 6>
96
+ <Edge 7>( sourceNode="<b9lca>" | targetNode="<z4bun>" | task = "Evaluate model A to Compare Performance" )</Edge 7>
97
+ <Edge 8>( sourceNode="<5w01f>" | targetNode="<z4bun>" | task = "Evaluate model B to Compare Performance" )</Edge 8>
98
+ <Edge 9>( sourceNode="<z4bun>" | targetNode="<zj2pb>" | task = "Compare Performance to Trigger Deployment" )</Edge 9>
99
+
100
+ #############################
101
+
102
+ Example 2:
103
+
104
+ Input: Project Description:
105
+
106
+ This project develops an automated crop health monitoring system using a modular machine learning pipeline. Data from satellite and drone imagery is ingested and preprocessed, followed by augmentation techniques to increase diversity and balance.
107
+ Synthetic data is generated to simulate various crop conditions, enhancing model robustness. The pipeline trains a deep learning model to classify crop health, evaluates its performance on key metrics such as accuracy and recall, and identifies areas for improvement.
108
+ Once performance benchmarks are met, the system is deployed for real-time crop monitoring, enabling farmers to make informed decisions and optimize agricultural productivity efficiently.
109
+
110
+ Input: Available Modules:
111
+
112
+ ['IngestData',
113
+ 'AugmentData',
114
+ 'GenerateData',
115
+ 'SearchData',
116
+ 'Train',
117
+ 'Evaluate',
118
+ 'TriggerDeployment',
119
+ 'ComparePerformance']
120
+
121
+ -------------------------------------
122
+
123
+ Flow Generated by LLM: ( This will not be Input )
124
+
125
+ IngestData -> AugmentData
126
+ AugmentData -> GenerateData
127
+ GenerateData -> Train
128
+ Train -> Evaluate
129
+ Evaluate -> TriggerDeployment
130
+
131
+ ################
132
+
133
+ Output:
134
+
135
+ <p001>IngestData</p001>
136
+ <p002>AugmentData</p002>
137
+ <p003>GenerateData</p003>
138
+ <p004>Train</p004>
139
+ <p005>Evaluate</p005>
140
+ <p006>TriggerDeployment</p006>
141
+ <Edge 1>( sourceNode="<Start>" | targetNode="<p001>" | task="From Start to Ingesting Data" )</Edge 1>
142
+ <Edge 2>( sourceNode="<p001>" | targetNode="<p002>" | task="Ingesting Data to Augmenting Data" )</Edge 2>
143
+ <Edge 3>( sourceNode="<p002>" | targetNode="<p003>" | task="Augmenting Data to Generating Synthetic Data" )</Edge 3>
144
+ <Edge 4>( sourceNode="<p003>" | targetNode="<p004>" | task="Generating Data to Training Model" )</Edge 4>
145
+ <Edge 5>( sourceNode="<p004>" | targetNode="<p005>" | task="Training Model to Evaluating Performance" )</Edge 5>
146
+ <Edge 6>( sourceNode="<p005>" | targetNode="<p006>" | task="Evaluating Model to Triggering Deployment" )</Edge 6>
147
+
148
+ #############################
149
+
150
+ Example 3:
151
+
152
+ Input: Project Descripiont:
153
+
154
+ This project implements a robust machine learning pipeline for iterative model improvement. Data is ingested and preprocessed, followed by augmentation to enhance diversity and balance.
155
+ An initial model is trained on the augmented data. The pipeline then applies further data augmentation techniques tailored to improve underperforming areas, followed by retraining the model for enhanced accuracy.
156
+ The improved model is rigorously evaluated on a test dataset to ensure it meets predefined performance benchmarks. Upon achieving the desired metrics, the best-performing model is deployed to production, ensuring reliable and efficient real-world performance tailored to the project's objectives.
157
+
158
+ Input: Available Modules:
159
+
160
+ ['IngestData',
161
+ 'AugmentData',
162
+ 'GenerateData',
163
+ 'SearchData',
164
+ 'Train',
165
+ 'Evaluate',
166
+ 'TriggerDeployment',
167
+ 'ComparePerformance']
168
+
169
+ -------------------------------------
170
+
171
+ Flow Generated by LLM: ( This will not be Input )
172
+
173
+ IngestData -> AugmentData (Stage 1)
174
+ AugmentData (Stage 1) -> Train (Stage 1)
175
+ Train (Stage 1) -> AugmentData (Stage 2)
176
+ AugmentData (Stage 2) -> Train (Stage 2)
177
+ Train (Stage 2) -> Evaluate
178
+ Evaluate -> TriggerDeployment
179
+
180
+ ################
181
+
182
+ Output:
183
+
184
+ <m001>IngestData</m001>
185
+ <m002>AugmentData Stage 1</m002>
186
+ <m003>Train Stage 1</m003>
187
+ <m004>AugmentData Stage 2</m004>
188
+ <m005>Train Stage 2</m005>
189
+ <m006>Evaluate</m006>
190
+ <m007>TriggerDeployment</m007>
191
+ <Edge 1>( sourceNode="<Start>" | targetNode="<m001>" | task="From Start to Ingesting Data" )</Edge 1>
192
+ <Edge 2>( sourceNode="<m001>" | targetNode="<m002>" | task="Ingesting Data to Augmenting Data Stage 1" )</Edge 2>
193
+ <Edge 3>( sourceNode="<m002>" | targetNode="<m003>" | task="Augmenting Data Stage 1 to Training Stage 1" )</Edge 3>
194
+ <Edge 4>( sourceNode="<m003>" | targetNode="<m004>" | task="Training Stage 1 to Augmenting Data Stage 2" )</Edge 4>
195
+ <Edge 5>( sourceNode="<m004>" | targetNode="<m005>" | task="Augmenting Data Stage 2 to Training Stage 2" )</Edge 5>
196
+ <Edge 6>( sourceNode="<m005>" | targetNode="<m006>" | task="Training Stage 2 to Evaluating Model" )</Edge 6>
197
+ <Edge 7>( sourceNode="<m006>" | targetNode="<m007>" | task="Evaluating Model to Triggering Deployment" )</Edge 7>
198
+
199
+ #############################
200
+
201
+
202
+ When you give output dont mention anything like 'Here is the list of Nodes and Edges extracted from the text:'. Just give the response straight away
203
+
204
+
205
+
206
+ -Real Data-
207
+ ######################
208
+
209
+ Input: Project Descripion: {project_desc}
210
+
211
+ **Instructions**
212
+
213
+ 1. A list of modules available for building the pipeline. You must only use these modules to form the flow.
214
+ 2. Do not Generate New Names for Modules. Only use whatever is available in the list
215
+
216
+ Input: Available Modules:
217
+
218
+ {modules}
219
+
220
+ ######################
221
+
222
+ Output:
223
+ '''
224
+
225
+ @staticmethod
226
+ def _parse_llm_response(raw_response: str) -> Graph:
227
+
228
+ pattern = r'<([a-zA-Z0-9]+)>([^<]+)<\/\1>|sourceNode="<([^"]+)>"\s*\|\s*targetNode="<([^"]+)>"\s*\|\s*task="([^"]+)"'
229
+ nodes, edges = [], []
230
+
231
+ list_ = raw_response.split('\n')
232
+
233
+ for line in list_:
234
+ matches = re.findall(pattern, line)
235
+
236
+ try:
237
+ for match in matches:
238
+ if match[0]:
239
+
240
+ nd = Node(node_id=match[0], name=match[1])
241
+ nodes.append(nd)
242
+
243
+ elif match[2]:
244
+
245
+ edge = Edge(source=match[2], target=match[3], desc=match[4])
246
+ edges.append(edge)
247
+
248
+ except Exception as e:
249
+ print(f"Error parsing line : {line}, error: {e}")
250
+
251
+ return Graph(nodes=nodes, edges=edges)
252
+
253
+ @staticmethod
254
+ def _store_graph(graph_data: Graph):
255
+
256
+ nodes, edges = [], []
257
+ dict_ = {}
258
+
259
+ for node in graph_data.nodes:
260
+ dict_[node.node_id] = node.name
261
+
262
+ nodes.append({
263
+ 'node_id': node.node_id,
264
+ 'name': node.name
265
+ })
266
+
267
+ dict_['Start'] = 'StartNode'
268
+
269
+ for edge in graph_data.edges:
270
+ source_node = dict_[edge.source]
271
+ target_node = dict_[edge.target]
272
+ edges.append({
273
+ 'source': edge.source,
274
+ 'target': edge.target,
275
+ 'desc': edge.desc
276
+ })
277
+
278
+ json_obj = {'Nodes': nodes, 'Edges': edges}
279
+
280
+ helper = HelperClass()
src/utils/settings.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yaml
2
+ from typing import Optional, List
3
+ from pydantic_settings import BaseSettings
4
+ from llama_index.core import Settings
5
+ from llama_index.llms.openai import OpenAI
6
+ from llama_index.llms.ollama import Ollama
7
+ from llama_index.llms.groq import Groq
8
+ from llama_index.embeddings.openai import OpenAIEmbedding
9
+ from llama_index.embeddings.ollama import OllamaEmbedding
10
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
11
+ from pydantic import Field, ValidationError
12
+
13
+
14
+ class ProjectSettings(BaseSettings):
15
+
16
+ OPENAI_API_KEY: Optional[str] = Field(None, env='OPENAI_API_KEY')
17
+ GROQ_KEY: Optional[str] = Field(None, env='GROQ_KEY')
18
+ config: Optional[dict] = None
19
+ moduleList: List[str] = Field(default_factory=list)
20
+
21
+ class Config:
22
+ env_file = '.env'
23
+
24
+ def __init__(self, **kwargs):
25
+ super().__init__(**kwargs)
26
+ self.config = self._read_yaml_config()
27
+
28
+ if not self.config:
29
+ raise ValidationError("Config file could not be loaded")
30
+
31
+ self._instantiate_services()
32
+
33
+ self.moduleList = self.config.get('userPreference', {})['ModuleList']
34
+
35
+ def _instantiate_services(self):
36
+ llm_service = self.config.get('userPreference', {}).get('llmService', '').lower()
37
+
38
+ if llm_service == 'ollama':
39
+ self._initialize_ollama()
40
+ elif llm_service == 'openai':
41
+ self._initialize_openai()
42
+ elif llm_service == 'groq':
43
+ self._initialize_groq()
44
+ else:
45
+ raise ValueError(f"Invalid LLM service: {llm_service}")
46
+
47
+ def _initialize_ollama(self):
48
+ ollama_config = self.config.get('OllamaConfig', {})
49
+ Settings.llm = Ollama(base_url=ollama_config['baseURL'], model=ollama_config['llm'])
50
+ Settings.embed_model = OllamaEmbedding(base_url=ollama_config['baseURL'], model_name=ollama_config['llm'])
51
+
52
+ def _initialize_openai(self):
53
+ openai_config = self.config.get('OpenAIConfig', {})
54
+ Settings.llm = OpenAI(model=openai_config['llm'], api_key=self.OPENAI_API_KEY)
55
+ Settings.embed_model = OpenAIEmbedding(model=openai_config['embeddingModel'], api_key=self.OPENAI_API_KEY)
56
+
57
+ def _initialize_groq(self):
58
+ groq_config = self.config.get('GroqConfig', {})
59
+ Settings.llm = Groq(model=groq_config['llm'], api_key=self.GROQ_KEY)
60
+ Settings.embed_model = HuggingFaceEmbedding(model_name=groq_config['hfEmbedding'])
61
+
62
+ @staticmethod
63
+ def _read_yaml_config():
64
+ with open("config.yaml", "r") as file:
65
+ config = yaml.safe_load(file)
66
+
67
+ return config
68
+
69
+
70
+ settings = ProjectSettings()
src/workflows/__init__.py ADDED
File without changes
src/workflows/analysis_workflow.py ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.models.analysis_models import MLTaskType, ModelResponseStatus, RequirementsAnalysis, TechnicalResearch, ComponentType, ParameterSpec, ConfigParam, FunctionSpec, ComponentSpec, ImplementationPlan
2
+ from typing import Iterator, List, Optional
3
+ from phi.workflow import Workflow, RunResponse, RunEvent
4
+ from phi.agent import Agent
5
+ from phi.model.openai import OpenAIChat
6
+ from phi.storage.workflow.sqlite import SqlWorkflowStorage
7
+ from phi.storage.agent.sqlite import SqlAgentStorage
8
+ # from phi.memory.db.sqlite import SqliteMemoryDb
9
+ from phi.tools.duckduckgo import DuckDuckGo
10
+ from phi.utils.log import logger
11
+ from dotenv import load_dotenv
12
+ import json
13
+ import os
14
+
15
+ load_dotenv()
16
+ api_key = os.getenv("OPENAI_API_KEY")
17
+
18
+ class MLAnalysisWorkflow(Workflow):
19
+ """Workflow for analyzing ML business requirements and creating technical specifications"""
20
+
21
+ # Initialize agents
22
+ requirements_analyst: Agent = Agent(
23
+ name="ML Requirements Analyst",
24
+ model=OpenAIChat(id="gpt-4o", api_key=api_key),
25
+ description="Expert ML Solutions Architect specialized in analyzing business requirements",
26
+ instructions=[
27
+ "Analyze business problems and translate them into technical ML specifications.",
28
+ "1. Understand the core business problem and objectives",
29
+ "2. Identify the type of ML task required",
30
+ "3. Determine data requirements and constraints",
31
+ "4. List unclear points that need clarification",
32
+ "5. Specify areas that need technical research",
33
+ "Be precise in identifying what information is missing or needs validation."
34
+ ],
35
+ response_model=RequirementsAnalysis,
36
+ structured_outputs=True,
37
+ reasoning=True,
38
+ storage=SqlAgentStorage(
39
+ table_name="requirements_sessions",
40
+ db_file="storage/agent_storage.db"
41
+ ),
42
+ debug_mode=True,
43
+ # memory=AgentMemory(memory_db=requirements_db)
44
+ )
45
+
46
+ technical_researcher: Agent = Agent(
47
+ name="ML Technical Researcher",
48
+ model=OpenAIChat(id="gpt-4o", api_key=api_key),
49
+ description="ML Expert specialized in researching technical implementations",
50
+ tools=[DuckDuckGo(search=True, news=False)],
51
+ instructions=[
52
+ "Research and validate technical aspects of ML solutions.",
53
+ "1. Search for similar ML implementations and best practices",
54
+ "2. Find recommended models and architectures",
55
+ "3. Research typical hyperparameters and evaluation metrics",
56
+ "4. Look for implementation constraints and requirements",
57
+ "5. Validate technical feasibility",
58
+ "Provide sources for all technical information.",
59
+ "Focus on recent and reliable technical sources."
60
+ ],
61
+ response_model=TechnicalResearch,
62
+ structured_outputs=True,
63
+ prevent_hallucination=True,
64
+ reasoning=True,
65
+ storage=SqlAgentStorage(
66
+ table_name="researcher_sessions",
67
+ db_file="storage/agent_storage.db"
68
+ ),
69
+ debug_mode=True,
70
+ # memory=AgentMemory(memory_db=researcher_db)
71
+ )
72
+
73
+ writer: Agent = Agent(
74
+ model=OpenAIChat(id="gpt-4o", api_key=api_key),
75
+ instructions=[
76
+ "You will be provided with lots of structured outputs. Your work is to display this"
77
+ "in a nicely formatted manner without changing any of the content. Present all the links"
78
+ "as they are, with explicitly mentioned hyperlinks. Do not change any content."
79
+ ],
80
+ markdown=True,
81
+ )
82
+
83
+ def validate_model_response(self, response: ModelResponseStatus) -> List[str]:
84
+ """Check for missing or incomplete fields in ModelResponseStatus"""
85
+ logger.info("Checking for missing or incomplete fields in ModelResponseStatus...")
86
+ missing_fields = []
87
+ response_dict = response.model_dump()
88
+
89
+ for field, value in response_dict.items():
90
+ if value == "..." or value == ["..."]:
91
+ missing_fields.append(field)
92
+ elif isinstance(value, list) and not value:
93
+ missing_fields.append(field)
94
+
95
+ return missing_fields
96
+
97
+ def analyze_requirements(self, user_query: str) -> Optional[RequirementsAnalysis]:
98
+ """Stream requirements analysis"""
99
+ logger.info("Analyzing requirements...")
100
+ prompt = f"Analyze this business problem and provide initial technical specifications: {user_query}"
101
+
102
+ analyse_stream = self.requirements_analyst.run(prompt)
103
+ return analyse_stream.content
104
+
105
+ def conduct_research(self, research_prompt: str) -> Optional[TechnicalResearch]:
106
+ """Stream technical research"""
107
+ logger.info("Conducting technical research...")
108
+
109
+ conduct_stream = self.technical_researcher.run(research_prompt)
110
+ return conduct_stream.content
111
+
112
+ def finalize_analysis(self, final_prompt: str) -> Optional[RequirementsAnalysis]:
113
+ """Stream final analysis"""
114
+ logger.info("Finalizing analysis...")
115
+
116
+ finalise_stream = self.requirements_analyst.run(final_prompt)
117
+ return finalise_stream.content
118
+
119
+ def write_requirements_post(self, requirements_results: RequirementsAnalysis) -> Iterator[RunResponse]:
120
+ """
121
+ Write a blog post on a topic.
122
+ :param requirements_results: requirements_analyst response
123
+ :return: iterator for the workflow response
124
+ """
125
+ logger.info("Writing requirements analysis...")
126
+ writer_input = {"model_response": requirements_results.model_response.model_dump(),
127
+ "unclear_points": requirements_results.unclear_points,
128
+ "search_queries": requirements_results.search_queries,
129
+ "business_understanding": requirements_results.business_understanding
130
+ }
131
+ yield from self.writer.run(json.dumps(writer_input, indent=4), stream=True)
132
+
133
+ def write_research_post(self, research_results: TechnicalResearch) -> Iterator[RunResponse]:
134
+ """
135
+ Write a blog post on a topic.
136
+ :param research_results: research content
137
+ :return: iterator for the workflow response
138
+ """
139
+ logger.info("Writing research findings...")
140
+ writer_input = {"research_findings": research_results.research_findings,
141
+ "reference_implementations": research_results.reference_implementations,
142
+ "sources": research_results.sources
143
+ }
144
+ yield from self.writer.run(json.dumps(writer_input, indent=4), stream=True)
145
+
146
+ def run(self, user_query: str) -> Iterator[RunResponse]:
147
+ """
148
+ Run the ML analysis workflow
149
+ Args:
150
+ user_query: Description of the business problem
151
+ """
152
+ try:
153
+ # Initial requirements analysis with streaming
154
+ requirements_result: Optional[RequirementsAnalysis] = self.analyze_requirements(user_query)
155
+ if not requirements_result:
156
+ yield RunResponse(
157
+ event=RunEvent.workflow_completed,
158
+ content="Error: Requirements analysis failed to produce valid results."
159
+ )
160
+ return
161
+ logger.info("Writing initial requirements analysis...")
162
+ yield from self.write_requirements_post(requirements_result)
163
+
164
+ # Check what needs research
165
+ missing_fields = self.validate_model_response(requirements_result.model_response)
166
+ logger.info("Missing fields found!")
167
+ search_queries = requirements_result.search_queries
168
+ logger.info("Search queries found!")
169
+ unclear_points = requirements_result.unclear_points
170
+ logger.info("Unclear points found!")
171
+ if missing_fields or search_queries:
172
+ # Conduct technical research
173
+ logger.info("Researching technical specifications...")
174
+ research_prompt = (
175
+ f"Research the following for this ML problem: {user_query}\n"
176
+ f"Missing information needed for: {', '.join(missing_fields)}\n"
177
+ f"Specific topics to research: {', '.join(search_queries)}\n"
178
+ f"Points needing clarification: {', '.join(unclear_points)}\n"
179
+ f"Current understanding: {requirements_result.business_understanding}"
180
+ )
181
+ logger.info("Conducting research...")
182
+ research_result: Optional[TechnicalResearch] = self.conduct_research(research_prompt)
183
+ logger.info("Sharing research findings...")
184
+ yield from self.write_research_post(research_result)
185
+
186
+ final_prompt = (
187
+ f"Original problem: {user_query}\n"
188
+ f"Research findings: {research_result.research_findings}\n"
189
+ "Please provide final technical specifications incorporating this research."
190
+ )
191
+ logger.info("Obtaining final requirements")
192
+ final_result: Optional[RequirementsAnalysis] = self.finalize_analysis(final_prompt)
193
+ logger.info("Writing final requirements...")
194
+ yield from self.write_requirements_post(final_result)
195
+
196
+ except Exception as e:
197
+ logger.error(f"Workflow error: {str(e)}")
198
+ yield RunResponse(
199
+ event=RunEvent.workflow_completed,
200
+ content=f"Error in analysis workflow: {str(e)}"
201
+ )
202
+
203
+
204
+ class MLImplementationPlanner(Workflow):
205
+ """Workflow for creating detailed ML implementation plans"""
206
+
207
+ # Initialize architect agent
208
+ architect: Agent = Agent(
209
+ name="ML System Architect",
210
+ model=OpenAIChat(id="gpt-4o", api_key=api_key),
211
+ description="Expert ML System Architect specialized in detailed implementation planning",
212
+ instructions=[
213
+ "Create detailed technical implementation plans for ML systems.",
214
+ "1. Break down the system into logical components",
215
+ "2. Define detailed function specifications for each component",
216
+ "3. Specify clear interfaces between components",
217
+ "4. Consider error handling and edge cases",
218
+ "5. Plan testing and deployment strategies",
219
+ "Be extremely specific about function signatures and component interactions.",
220
+ "Focus on maintainability and scalability in the design."
221
+ ],
222
+ response_model=ImplementationPlan,
223
+ structured_outputs=True,
224
+ reasoning=True,
225
+ storage=SqlAgentStorage(
226
+ table_name="architect_sessions",
227
+ db_file="storage/agent_storage.db"
228
+ ),
229
+ debug_mode=True,
230
+ # memory=AgentMemory(memory_db=architect_db)
231
+ )
232
+
233
+ writer: Agent = Agent(
234
+ model=OpenAIChat(id="gpt-4o", api_key=api_key),
235
+ instructions=[
236
+ "You will be provided with lots of structured outputs. Your work is to display this"
237
+ "in a nicely formatted manner without changing any of the content."
238
+ ],
239
+ markdown=True,
240
+ )
241
+
242
+ def create_implementation_plan(self, planning_prompt: str) -> Optional[ImplementationPlan]:
243
+ """Stream implementation plan creation"""
244
+ logger.info("Creating implementation plan...")
245
+ planning_stream = self.architect.run(planning_prompt)
246
+ return planning_stream.content
247
+
248
+ def validate_interfaces(self, validation_prompt: str) -> Optional[ImplementationPlan]:
249
+ """Stream interface validation"""
250
+ logger.info("Validating interfaces...")
251
+ architect_stream = self.architect.run(validation_prompt)
252
+ return architect_stream.content
253
+
254
+ def write_implementation_post(self, implementation_results: ImplementationPlan) -> Iterator[RunResponse]:
255
+ """
256
+ Write a blog post on a topic.
257
+ :param implementation_results: implementation plan results
258
+ :return: iterator for the workflow response
259
+ """
260
+ logger.info("Writing implementation plan...")
261
+ writer_input = {"components": [comp.model_dump() for comp in implementation_results.components],
262
+ "system_requirements": implementation_results.system_requirements,
263
+ "deployment_notes": implementation_results.deployment_notes,
264
+ "testing_strategy": implementation_results.testing_strategy,
265
+ "implementation_order": implementation_results.implementation_order
266
+ }
267
+ yield from self.writer.run(json.dumps(writer_input, indent=4), stream=True)
268
+
269
+ def run(
270
+ self,
271
+ requirements_analysis: RequirementsAnalysis,
272
+ technical_research: Optional[TechnicalResearch] = None
273
+ ) -> Iterator[RunResponse]:
274
+ """
275
+ Create implementation plan based on requirements analysis and research
276
+
277
+ Args:
278
+ requirements_analysis: Results from requirements analysis
279
+ technical_research: Optional results from technical research
280
+ """
281
+ try:
282
+ logger.info("Starting planning workflow...")
283
+ # Prepare comprehensive prompt for the architect
284
+ planning_prompt = (
285
+ f"Create a detailed implementation plan for this ML system.\n\n"
286
+ f"Business Understanding:\n{requirements_analysis.business_understanding}\n\n"
287
+ f"Technical Specifications:\n"
288
+ f"- Task Type: {requirements_analysis.model_response.task}\n"
289
+ f"- Models: {', '.join(requirements_analysis.model_response.models)}\n"
290
+ f"- Data Requirements: {requirements_analysis.model_response.data_source}\n"
291
+ f"- Technical Requirements: {requirements_analysis.model_response.technical_requirements}\n"
292
+ )
293
+ if technical_research:
294
+ logger.info("Technical Research found! Modifying context...")
295
+ planning_prompt += (
296
+ f"\nResearch Findings:\n{technical_research.research_findings}\n"
297
+ f"Reference Implementations:\n"
298
+ f"{chr(10).join(technical_research.reference_implementations)}"
299
+ )
300
+
301
+ # Stream implementation plan
302
+ logger.info("generating implementation plan...")
303
+ plan_result: Optional[ImplementationPlan] = self.create_implementation_plan(planning_prompt)
304
+ logger.info("writing implementation plan...")
305
+ yield from self.write_implementation_post(plan_result)
306
+
307
+ if plan_result:
308
+ validation_prompt = (
309
+ "Validate the interfaces between these components "
310
+ "and ensure all dependencies are properly specified:\n"
311
+ f"{plan_result.components}"
312
+ )
313
+ logger.info("validating results...")
314
+ validate_result: Optional[ImplementationPlan] = self.validate_interfaces(validation_prompt)
315
+ logger.info("writing validated implementation plan...")
316
+ yield from self.write_implementation_post(validate_result)
317
+
318
+ except Exception as e:
319
+ logger.error("Error in planning workflow".format(e))
320
+ # yield RunResponse(
321
+ # event=RunEvent.workflow_completed,
322
+ # content=f"Error in planning workflow: {str(e)}"
323
+ # )
324
+
src/workflows/graph_workflow.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.utils.settings import settings
2
+ from llama_index.core.settings import Settings
3
+ from src.models.workflow_graph import GetResponseEvent, ConstructGraphEvent
4
+ from llama_index.core.workflow import (
5
+ Event,
6
+ StartEvent,
7
+ StopEvent,
8
+ Workflow,
9
+ Context,
10
+ step
11
+ )
12
+ import asyncio
13
+ from src.utils.helper import helper
14
+
15
+ class DesignGraphWorkflow(Workflow):
16
+
17
+ @step
18
+ async def GetRawWorkflow(self, ctx: Context, ev: StartEvent) -> GetResponseEvent:
19
+ _project_description = ev._project_description
20
+ llm = ev.llm
21
+
22
+ modules = settings.moduleList
23
+ prompt = helper._build_prompt(
24
+ project_desc=_project_description,
25
+ modules=modules
26
+ )
27
+
28
+ try:
29
+ response = await llm.acomplete(prompt)
30
+ except Exception as e:
31
+ raise (f"Error: {e}")
32
+
33
+ return GetResponseEvent(project_details=_project_description, rawResponse=response.text)
34
+
35
+ @step
36
+ def ExtractGraph(self, ctx: Context, ev: GetResponseEvent) -> ConstructGraphEvent:
37
+ raw_llm_response = ev.rawResponse
38
+ graph = helper._parse_llm_response(raw_response=raw_llm_response)
39
+
40
+ return ConstructGraphEvent(workflowGraph=graph)
41
+
42
+ @step
43
+ def exportGraph(self, ctx: Context, ev: ConstructGraphEvent) -> StopEvent:
44
+ graph = ev.workflowGraph
45
+ helper._store_graph(graph_data=graph)
46
+
47
+ return StopEvent(result=graph)
48
+
src/workflows/reasoning_modules.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This file contains all the reasoning modules and checks in place for devising the ML interview.
3
+ Make changes here to add new or missing questions.
4
+ """
5
+
6
+ from llama_index.core.prompts import PromptTemplate
7
+
8
+ _REASONING_MODULES = [
9
+
10
+ # I. Identification of the Problem Space
11
+ "1. What is the specific business problem? What is the problem space owner trying to achieve?",
12
+ "2. Are there any stakeholders or individuals who are directly affected by the problem? What are their perspectives and needs?", # 22
13
+ "3. What are the kinds of ML problems which can be used for this problem space?",
14
+ "4. What are the identified outcomes? What are the expected outcomes from the ML solution? What are the long-term implications of this problem and its solutions? (8)", # 8
15
+ "5. How does it affect end users or stakeholders? How urgent is the problem?",
16
+ "6. What is the core issue or problem that needs to be addressed? (16)", # 16
17
+ "7. What are the underlying causes or factors contributing to the problem? (17)", # 17
18
+ "8. What are the alternative perspectives or viewpoints on this problem? (7)", # 7
19
+ "9. What resources (data, compute power, expertise etc.) are needed to tackle the problem effectively? (22)", # 22
20
+ "10. Do the stakeholders have someone with ML expertise in the team?",
21
+ "11. Is there anything the stakeholders absolutely do not want?",
22
+ "12. How can I simplify the problem so that it is easier to solve? (4)", # 4
23
+ "13. Are there any potential solutions or strategies that have been tried before? If yes, what were the outcomes and lessons learned? (18)", # 18
24
+ "14. Does the problem involve decision-making or planning, where choices need to be made under uncertainty or with competing objectives? (28)", # 28
25
+ "15. Is the problem a design challenge that requires creative solutions and innovation? (30)", # 30
26
+ "16. Is the problem time-sensitive or urgent, requiring immediate attention and action? (32)", # 32
27
+ "17. What kinds of solutions typically are produced for this kind of problem specification? (33)", # 33
28
+ "18. Given the problem specification and the current best solution, have a guess about other possible solutions. (34)", # 34
29
+ "19. What is the best way to modify this current best solution, given what you know about these kinds of problem specifications? (36)", # 36
30
+ "20. How could I devise an experiment to help figure out the nuances of the problem?",
31
+
32
+ # II. Data Assessment
33
+ "21. Is there any relevant data or information that can provide insights into the problem? If yes, what data sources are available, and how can they be analyzed? (20)", # 20
34
+ "22. Does the available data meet the quality, quantity, and diversity requirements for the ML solution?",
35
+ "23. Does the existing data suffer from any biases which can be mitigated to improve performance?",
36
+ "24. Is there any scope for applying additional data?",
37
+ "25. How might additional data be applied to this problem?",
38
+ "26. Adaptation: Are there any privacy or security concerns with the data? How do they align with compliance standards?",
39
+ "27. What outcomes can come out of this data?",
40
+ "28. What ML models can be potentially applied on this data?",
41
+ "29. Adaptation: Are there constraints on data collection, storage, or computation? What preprocessing, modeling, or analysis is needed?",
42
+ "30. Does the problem involve a physical constraint, such as limited resources, infrastructure, or space? (26)", # 26
43
+ "31. Is the problem an analytical one that requires data analysis, modeling, or optimization techniques? (29)", # 29
44
+ "32. Use Risk Analysis: Evaluate potential risks, uncertainties, and tradeoffs associated with different solutions or approaches to a problem. Emphasize assessing the potential consequences and likelihood of success or failure, and making informed decisions based on a balanced analysis of risks and benefits. (14)", # 14
45
+ "33. Is the data preprocessed or does it need to be processed?",
46
+ "34. Is there a script for processing this or a particular methodology they follow or does it need to be created?",
47
+ "35. Is there a preferred framework or output format that should be followed?",
48
+ "36. How might the data be manipulated according to the identified ML problem?",
49
+
50
+ # III. Defining Goals and Metrics
51
+ "37. What is the acceptable error rate?",
52
+ "38. How critical is the problem? Who does it affect?",
53
+ "39. What ML evaluations can be applied to assess performance here?",
54
+ "40. Are there any benchmarks on which this performance must be measured?",
55
+ "41. How urgent is the problem? What kind of latency or response time is acceptable?",
56
+ "42. How efficient should the solution be? What is the expected budget?",
57
+ "43. What is to be optimised against?",
58
+ "44. What’s more important - benchmarking metrics or performance in production?",
59
+ "45. Adaptation: What metrics (e.g., accuracy, precision, recall, business KPIs) best reflect the solution's success? How can they be tracked over time?",
60
+ "46. How could I measure progress on this problem? (3)", # 3
61
+ "47. How can progress or success in solving the problem be measured or evaluated? (23)", # 23
62
+ "48. What indicators or metrics can be used? (24)", # 24
63
+ "49. How can I break down this problem into smaller, more manageable parts? (9)", # 9
64
+
65
+ # IV. Experimentation and Prototyping
66
+ "50. What experiments can validate feasibility or assumptions? Implement step-by-step approaches to refine the ML model.",
67
+ "51. What machine learning solutions must be applied on this problem to iteratively improve performance?",
68
+ "52. How could I devise an experiment to help solve that problem? (1)", # 1
69
+ "53. Make a list of ideas for solving this problem, and apply them one by one to the problem to see if any progress can be made. (2)", # 2
70
+ "54. Let’s think step by step. (38)", # 38
71
+ "55. Let’s make a step by step plan and implement it with good notation and explanation. (39)", # 39
72
+ "56. What assumptions about the data, model, or process need testing? What challenges might arise during training or deployment?",
73
+ "57. What are the key assumptions underlying this problem? (5)", # 5
74
+ "58. What are the potential risks and drawbacks of each solution? (6)", # 6
75
+ "59. What are the potential obstacles or challenges that might arise in solving this problem? (19)", # 19
76
+ "60. Is there a particular model the stakeholders are looking for or they want to experiment across multiple ones?",
77
+ "61. Do you have previously trained model or logs for this problem?",
78
+
79
+ # V. Ideation and Creativity
80
+ "62. Is there any out-of-the-box idea that can be executed on this data which aligns with the business needs?",
81
+ "63. Explore novel model architectures, feature engineering techniques, or data augmentation methods.",
82
+ "64. Try creative thinking, generate innovative and out-of-the-box ideas to solve the problem. Explore unconventional solutions, thinking beyond traditional boundaries, and encouraging imagination and originality. (11)", # 11
83
+ "65. Ignoring the current best solution, create an entirely new solution to the problem. (37)", # 37
84
+ "66. Challenge the status quo. Could a non-ML approach or an alternative ML model yield better results?",
85
+ "67. Let’s imagine the current best solution is totally wrong, what other ways are there to think about the problem specification? (35)", # 35
86
+
87
+ # VI. Common Reasoning Patterns
88
+ "68. How do interconnected components (data pipelines, business logic, ML models) influence each other? How can cross-functional collaboration improve the workflow?",
89
+ "69. Seek input and collaboration from others to solve the problem. Emphasize teamwork, open communication, and leveraging the diverse perspectives and expertise of a group to come up with effective solutions. (12)", # 12
90
+ "70. Use systems thinking: Consider the problem as part of a larger system and understanding the interconnectedness of various elements. Focuses on identifying the underlying causes, feedback loops, and interdependencies that influence the problem, and developing holistic solutions that address the system as a whole. (13)", # 13
91
+ "71. Regularly evaluate the workflow, identifying areas for improvement and applying lessons from previous projects.",
92
+ "72. Use Reflective Thinking: Step back from the problem, take the time for introspection and self-reflection. Examine personal biases, assumptions, and mental models that may influence problem-solving, and being open to learning from past experiences to improve future approaches. (15)", # 15
93
+ "73. Critical Thinking: This style involves analyzing the problem from different perspectives, questioning assumptions, and evaluating the evidence or information available. It focuses on logical reasoning, evidence-based decision-making, and identifying potential biases or flaws in thinking. (10)" , # 10
94
+
95
+ ]
96
+
97
+ _REASONING_MODULES = "\n".join(_REASONING_MODULES)
98
+
99
+ SELECT_PROMPT_TEMPLATE = PromptTemplate(
100
+ "Given the task: {task}, which of the following reasoning modules are relevant? "
101
+ "Elaborate on why they are relevant."
102
+ "\n\n {reasoning_modules}"
103
+ )
104
+
105
+ ADAPT_PROMPT_TEMPLATE = PromptTemplate(
106
+ "Without working out the full solution, adapt the following reasoning modules to be specific to our task:"
107
+ "\n{selected_modules} \n\nOur task: \n{task}"
108
+ )
109
+
110
+ IMPLEMENT_PROMPT_TEMPLATE = PromptTemplate(
111
+ "Without working out the full solution, create an actionable reasoning structure for the task using these adapted"
112
+ "reasoning modules: \n{adapted_modules} \n\nTask Description: \n{task}"
113
+ )
114
+
115
+ REASONING_PROMPT_TEMPLATE = PromptTemplate(
116
+ "Using the following reasoning structure: {reasoning_structure}\n\n"
117
+ "Solve this task, providing your final answer: {task}"
118
+ )
119
+
120
+
121
+ # TODO: Add LLM-as-the-judge system prompt here
122
+
123
+ JUDGE_REQUIREMENT_PROMPT_TEMPLATE = PromptTemplate(
124
+ "You receive some data from a conversation with the user and your task is to determine whether or not they "
125
+ "have provided the following requirements during the conversation. Analyse the conversation to find the "
126
+ "requirements. Use only the provided context."
127
+ "\n\nContext for Judgement: \n{judging_context}"
128
+ "\n\nRequirements to be satisfied: "
129
+ """
130
+ class WorkflowSchema(BaseModel):
131
+ data_source: str
132
+ data_format: str
133
+ additional_data_requirement: bool
134
+ constraints: str
135
+ available_preprocess_script: bool
136
+ preprocess_script: str
137
+ recommended_preprocess_steps: List[str]
138
+ task: str
139
+ models: List[str]
140
+ hyperparameters: List[str]
141
+ eval_metrics: List[str]
142
+ deploy_constraints: str
143
+ """
144
+ "Reply only with a 0 or 1 value, corresponding to false or true, without providing any explanation."
145
+ )
146
+
147
+ # TODO: Add initial interaction for user query system prompt here
148
+
149
+ ML_EXPERT_PROMPT_TEMPLATE = PromptTemplate(
150
+ "You're a machine learning expert, skilled at interpreting user needs from a discussion and turning it into an"
151
+ "end-to-end workflow according to the user requirements. From the provided context, analyse the problem from a "
152
+ "technical as well as a business point of view and rephrase to provide focus on the aspects requiring additional"
153
+ "clarification and requirements, so your input can be forwarded to the team asking the further questions. Make "
154
+ "vague language and requirements more clear. \n\n User query: {query}."
155
+ )
src/workflows/workflow_context.py ADDED
File without changes
src/workflows/workflow_discovery.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module uses the self-discover workflow hand-in-hand with data representation on an index from the user
3
+ responses and prompt programming from DsPy. Self-Discover Workflow has two stages for any given task.
4
+
5
+ 1. Stage 1:
6
+ a. Select: selects subset of reasoning modules.
7
+ b. Adapt: adapts selected reasoning modules to the task.
8
+ c. Implement: gives reasoning structure for the task.
9
+ 2. Stage 2:
10
+ Uses the generated reasoning structure for the task to generate an answer.
11
+ """
12
+
13
+ import os
14
+ import asyncio
15
+
16
+ from llama_index.core.llms import LLM
17
+ from llama_index.core.workflow import Workflow, Context, StartEvent, StopEvent, step
18
+ from llama_index.core.settings import Settings
19
+ from src.models.discovery_events import GetModulesEvent, RefineModulesEvent, ReasoningStructureEvent
20
+ from src.workflows.reasoning_modules import _REASONING_MODULES, REASONING_PROMPT_TEMPLATE
21
+ from src.workflows.reasoning_modules import SELECT_PROMPT_TEMPLATE, ADAPT_PROMPT_TEMPLATE, IMPLEMENT_PROMPT_TEMPLATE
22
+ from src.workflows.reasoning_modules import JUDGE_REQUIREMENT_PROMPT_TEMPLATE
23
+
24
+
25
+ class SelfDiscoverWorkflow(Workflow):
26
+ """Self discover workflow."""
27
+
28
+ @step
29
+ async def get_modules(self, context: Context, event: StartEvent) -> GetModulesEvent:
30
+ """
31
+ Select modules required for the task from the defined reasoning modules.
32
+ :param context: global context maintained for the user until StopEvent is emitted.
33
+ :param event: trigger event for this step, here Start of the workflow.
34
+ :return: pydantic GetModulesEvent with "task" and selected "modules".
35
+ """
36
+
37
+ task = event.get("task")
38
+ llm: LLM = event.get("llm")
39
+ await context.set("llm", llm)
40
+
41
+ prompt = SELECT_PROMPT_TEMPLATE.format(task=task, reasoning_modules=_REASONING_MODULES)
42
+ result = llm.complete(prompt)
43
+
44
+ return GetModulesEvent(task=task, modules=str(result))
45
+
46
+ @step
47
+ async def refine_modules(self, context: Context, event: GetModulesEvent) -> RefineModulesEvent:
48
+ """
49
+ Refines and adapts the subset of given reasoning modules based on the task.
50
+ :param context: global context maintained for the user until StopEvent is emitted.
51
+ :param event: trigger event for the step, here completion of GetModulesEvent.
52
+ :return: pydantic RefineModulesEvent with "task" and "refined_modules".
53
+ """
54
+
55
+ task = event.task
56
+ modules = event.modules
57
+ llm: LLM = await context.get("llm")
58
+
59
+ prompt = ADAPT_PROMPT_TEMPLATE.format(task=task, selected_modules=modules)
60
+ result = llm.complete(prompt)
61
+
62
+ return RefineModulesEvent(task=task, refined_modules=str(result))
63
+
64
+ @step
65
+ async def create_reasoning_structure(self, context: Context, event: RefineModulesEvent) -> ReasoningStructureEvent:
66
+ """
67
+ Creates a reasoning structure for the task given the adapted reasoning modules.
68
+ :param context: global context maintained for the user until StopEvent is emitted.
69
+ :param event: trigger event for the step, here completion of RefineModulesEvent.
70
+ :return: pydantic ReasoningStructureEvent with "task" and "reasoning_structure"
71
+ """
72
+
73
+ task = event.task
74
+ refined_modules = event.refined_modules
75
+ llm: LLM = await context.get("llm")
76
+
77
+ prompt = IMPLEMENT_PROMPT_TEMPLATE.format(task=task, adapted_modules=refined_modules)
78
+ result = llm.complete(prompt)
79
+
80
+ return ReasoningStructureEvent(task=task, reasoning_structure=str(result))
81
+
82
+ @step
83
+ async def get_final_result(self, context: Context, event: ReasoningStructureEvent) -> StopEvent:
84
+ """
85
+ Gets the final result from the reasoning structure event
86
+ :param context: global context maintained for the user until StopEvent is emitted.
87
+ :param event: trigger event for the step, here completion of ReasoningStructureEvent.
88
+ :return: StopEvent signal, last step of the workflow.
89
+ """
90
+
91
+ task = event.task
92
+ reasoning_structure = event.reasoning_structure
93
+ llm: LLM = await context.get("llm")
94
+
95
+ prompt = REASONING_PROMPT_TEMPLATE.format(task=task, reasoning_structure=reasoning_structure)
96
+ result = llm.complete(prompt)
97
+ await context.set("workflow_result", result)
98
+
99
+ return StopEvent(result=str(result))
100
+
101
+
102
+ class JudgeWorkflow(Workflow):
103
+ """Judgement Workflow to decide whether further questions are necessary."""
104
+ @step
105
+ async def judge(self, context: Context, event: StartEvent) -> StopEvent:
106
+ """
107
+ Select modules required for the task from the defined reasoning modules.
108
+ :param context: global context maintained for the user until StopEvent is emitted.
109
+ :param event: trigger event for this step, here Start of the workflow.
110
+ :return: StopEvent signal, last step of the workflow.
111
+ """
112
+
113
+ judging_context = event.get("judging_context")
114
+ llm: LLM = event.get("llm")
115
+ await context.set("llm", llm)
116
+
117
+ prompt = JUDGE_REQUIREMENT_PROMPT_TEMPLATE.format(judging_context=judging_context)
118
+ result = str(llm.complete(prompt))
119
+ result = False if result == "0" else True
120
+
121
+ return StopEvent(result=result)
122
+
123
+
124
+ # runner for the workflow
125
+ async def main():
126
+ workflow = SelfDiscoverWorkflow()
127
+ # example task
128
+ predefined_task = (
129
+ "The user wants a step-by-step workflow for titanic survival prediction ML problem. "
130
+ "They want to understand whether a person has chances of surviving the titanic accident "
131
+ "depending on their background, ticket, gender and titanic pitfalls. To perform this, they "
132
+ "want to design a machine learning workflow and derive conclusions from their data. The final "
133
+ "model should be able to predict survive/die classes. The data has these features: "
134
+ "survival, ticket class, sex, age, siblings/spouses, parents/children, ticket, fare, cabin, embarked. "
135
+ "In case the problem is not clear at any point and you need more input from the user, share the current "
136
+ "workflow with the user and end with follow-up questions."
137
+ )
138
+ intermediate_result = await workflow.run(task=predefined_task, llm=Settings._llm)
139
+ print(str(intermediate_result))
140
+
141
+
142
+ if __name__ == "__main__":
143
+ asyncio.run(main())