nananie143 commited on
Commit
defdab6
·
verified ·
1 Parent(s): 01711c6

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. orchestrator.py +77 -0
orchestrator.py CHANGED
@@ -525,3 +525,80 @@ class AgentOrchestrator:
525
  self.logger.error(f"Resource error: {error}")
526
  # Implement recovery logic
527
  pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
525
  self.logger.error(f"Resource error: {error}")
526
  # Implement recovery logic
527
  pass
528
+
529
+ async def _recover_agent(self, agent_id: str):
530
+ """Recover a failed agent."""
531
+ try:
532
+ agent = self.agents[agent_id]
533
+
534
+ # Log recovery attempt
535
+ self.logger.info(f"Attempting to recover agent {agent_id}")
536
+
537
+ # Reset agent state
538
+ agent.state = AgentState.IDLE
539
+ agent.load = 0
540
+ agent.last_active = datetime.now()
541
+
542
+ # Reassign any tasks that were assigned to this agent
543
+ for task_id, task in self.tasks.items():
544
+ if task.assigned_to == agent_id:
545
+ await self._reassign_task(task_id)
546
+
547
+ # Update metrics
548
+ agent.metrics["recovery_attempts"] = agent.metrics.get("recovery_attempts", 0) + 1
549
+
550
+ self.logger.info(f"Successfully recovered agent {agent_id}")
551
+ return True
552
+
553
+ except Exception as e:
554
+ self.logger.error(f"Failed to recover agent {agent_id}: {e}")
555
+ return False
556
+
557
+ async def _recover_task(self, task_id: str):
558
+ """Recover a failed task."""
559
+ try:
560
+ task = self.tasks[task_id]
561
+
562
+ # Log recovery attempt
563
+ self.logger.info(f"Attempting to recover task {task_id}")
564
+
565
+ # Reset task state
566
+ task.state = "pending"
567
+ task.assigned_to = None
568
+
569
+ # Try to reassign the task
570
+ await self._reassign_task(task_id)
571
+
572
+ self.logger.info(f"Successfully recovered task {task_id}")
573
+ return True
574
+
575
+ except Exception as e:
576
+ self.logger.error(f"Failed to recover task {task_id}: {e}")
577
+ return False
578
+
579
+ async def _recover_resource(self, resource_id: str):
580
+ """Recover a failed resource."""
581
+ try:
582
+ # Log recovery attempt
583
+ self.logger.info(f"Attempting to recover resource {resource_id}")
584
+
585
+ # Release any locks on the resource
586
+ if resource_id in self.resource_locks:
587
+ lock = self.resource_locks[resource_id]
588
+ if lock.locked():
589
+ lock.release()
590
+
591
+ # Reset resource state
592
+ if resource_id in self.resource_pool:
593
+ self.resource_pool[resource_id] = {
594
+ "state": "available",
595
+ "last_error": None,
596
+ "last_recovery": datetime.now()
597
+ }
598
+
599
+ self.logger.info(f"Successfully recovered resource {resource_id}")
600
+ return True
601
+
602
+ except Exception as e:
603
+ self.logger.error(f"Failed to recover resource {resource_id}: {e}")
604
+ return False