Spaces:
Runtime error
Runtime error
nananie143
commited on
Upload folder using huggingface_hub
Browse files- orchestrator.py +77 -0
orchestrator.py
CHANGED
@@ -525,3 +525,80 @@ class AgentOrchestrator:
|
|
525 |
self.logger.error(f"Resource error: {error}")
|
526 |
# Implement recovery logic
|
527 |
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
525 |
self.logger.error(f"Resource error: {error}")
|
526 |
# Implement recovery logic
|
527 |
pass
|
528 |
+
|
529 |
+
async def _recover_agent(self, agent_id: str):
|
530 |
+
"""Recover a failed agent."""
|
531 |
+
try:
|
532 |
+
agent = self.agents[agent_id]
|
533 |
+
|
534 |
+
# Log recovery attempt
|
535 |
+
self.logger.info(f"Attempting to recover agent {agent_id}")
|
536 |
+
|
537 |
+
# Reset agent state
|
538 |
+
agent.state = AgentState.IDLE
|
539 |
+
agent.load = 0
|
540 |
+
agent.last_active = datetime.now()
|
541 |
+
|
542 |
+
# Reassign any tasks that were assigned to this agent
|
543 |
+
for task_id, task in self.tasks.items():
|
544 |
+
if task.assigned_to == agent_id:
|
545 |
+
await self._reassign_task(task_id)
|
546 |
+
|
547 |
+
# Update metrics
|
548 |
+
agent.metrics["recovery_attempts"] = agent.metrics.get("recovery_attempts", 0) + 1
|
549 |
+
|
550 |
+
self.logger.info(f"Successfully recovered agent {agent_id}")
|
551 |
+
return True
|
552 |
+
|
553 |
+
except Exception as e:
|
554 |
+
self.logger.error(f"Failed to recover agent {agent_id}: {e}")
|
555 |
+
return False
|
556 |
+
|
557 |
+
async def _recover_task(self, task_id: str):
|
558 |
+
"""Recover a failed task."""
|
559 |
+
try:
|
560 |
+
task = self.tasks[task_id]
|
561 |
+
|
562 |
+
# Log recovery attempt
|
563 |
+
self.logger.info(f"Attempting to recover task {task_id}")
|
564 |
+
|
565 |
+
# Reset task state
|
566 |
+
task.state = "pending"
|
567 |
+
task.assigned_to = None
|
568 |
+
|
569 |
+
# Try to reassign the task
|
570 |
+
await self._reassign_task(task_id)
|
571 |
+
|
572 |
+
self.logger.info(f"Successfully recovered task {task_id}")
|
573 |
+
return True
|
574 |
+
|
575 |
+
except Exception as e:
|
576 |
+
self.logger.error(f"Failed to recover task {task_id}: {e}")
|
577 |
+
return False
|
578 |
+
|
579 |
+
async def _recover_resource(self, resource_id: str):
|
580 |
+
"""Recover a failed resource."""
|
581 |
+
try:
|
582 |
+
# Log recovery attempt
|
583 |
+
self.logger.info(f"Attempting to recover resource {resource_id}")
|
584 |
+
|
585 |
+
# Release any locks on the resource
|
586 |
+
if resource_id in self.resource_locks:
|
587 |
+
lock = self.resource_locks[resource_id]
|
588 |
+
if lock.locked():
|
589 |
+
lock.release()
|
590 |
+
|
591 |
+
# Reset resource state
|
592 |
+
if resource_id in self.resource_pool:
|
593 |
+
self.resource_pool[resource_id] = {
|
594 |
+
"state": "available",
|
595 |
+
"last_error": None,
|
596 |
+
"last_recovery": datetime.now()
|
597 |
+
}
|
598 |
+
|
599 |
+
self.logger.info(f"Successfully recovered resource {resource_id}")
|
600 |
+
return True
|
601 |
+
|
602 |
+
except Exception as e:
|
603 |
+
self.logger.error(f"Failed to recover resource {resource_id}: {e}")
|
604 |
+
return False
|