diff --git a/agent/docker_ops.py b/agent/docker_ops.py index 0a451e6..579e953 100644 --- a/agent/docker_ops.py +++ b/agent/docker_ops.py @@ -84,16 +84,31 @@ class DockerOps: def list_containers(self) -> list[dict]: """List all containers (running + stopped) with metadata. + Also includes Swarm services scaled to 0 replicas (which have no + containers) so they remain visible in the dashboard. + Returns a list of dicts, each containing: id, name, status, image, created, uptime, is_swarm, swarm_service + + Uses the low-level API to get container IDs to avoid NotFound errors + from containers that disappear between list and inspect (e.g. Swarm + task containers in the Dead state). """ - containers = self.client.containers.list(all=True) + raw_ids = [r["Id"] for r in self.client.api.containers(all=True)] result = [] - for c in containers: + swarm_services_with_containers = set() + + for cid in raw_ids: + try: + c = self.client.containers.get(cid) + except NotFound: + continue image_tags = c.image.tags image_name = image_tags[0] if image_tags else "" swarm_service = self._swarm_service_name(c) + if swarm_service: + swarm_services_with_containers.add(swarm_service) started_at = c.attrs.get("State", {}).get("StartedAt", "") result.append( { @@ -107,10 +122,53 @@ class DockerOps: "swarm_service": swarm_service, } ) + + # Include Swarm services with 0 replicas (only works on manager nodes) + try: + for svc in self.client.services.list(): + if svc.name not in swarm_services_with_containers: + image = ( + svc.attrs.get("Spec", {}) + .get("TaskTemplate", {}) + .get("ContainerSpec", {}) + .get("Image", "") + ) + if "@" in image: + image = image.split("@")[0] + result.append( + { + "id": f"swarm:{svc.name}", + "name": svc.name, + "status": "stopped", + "image": image, + "created": svc.attrs.get("CreatedAt", ""), + "uptime": None, + "is_swarm": True, + "swarm_service": svc.name, + } + ) + except Exception: + pass # Not a manager node or Swarm not active + return result def start_container(self, container_id: str) -> dict: """Start a container. For Swarm services, scale to 1 replica.""" + # Handle 0-replica Swarm services (synthetic swarm: IDs) + if container_id.startswith("swarm:"): + svc_name = container_id[6:] + try: + service = self._get_service(svc_name) + if service: + service.scale(1) + return { + "success": True, + "message": f"Scaled Swarm service {svc_name} to 1 replica", + } + return {"success": False, "message": f"Swarm service {svc_name} not found"} + except Exception as exc: + return {"success": False, "message": str(exc)} + try: container = self.client.containers.get(container_id) except NotFound: @@ -135,6 +193,20 @@ class DockerOps: def stop_container(self, container_id: str) -> dict: """Stop a container. For Swarm services, scale to 0 replicas.""" + if container_id.startswith("swarm:"): + svc_name = container_id[6:] + try: + service = self._get_service(svc_name) + if service: + service.scale(0) + return { + "success": True, + "message": f"Scaled Swarm service {svc_name} to 0 replicas", + } + return {"success": False, "message": f"Swarm service {svc_name} not found"} + except Exception as exc: + return {"success": False, "message": str(exc)} + try: container = self.client.containers.get(container_id) except NotFound: @@ -159,6 +231,20 @@ class DockerOps: def restart_container(self, container_id: str) -> dict: """Restart a container. For Swarm services, force-update.""" + if container_id.startswith("swarm:"): + svc_name = container_id[6:] + try: + service = self._get_service(svc_name) + if service: + service.force_update() + return { + "success": True, + "message": f"Force-updated Swarm service {svc_name}", + } + return {"success": False, "message": f"Swarm service {svc_name} not found"} + except Exception as exc: + return {"success": False, "message": str(exc)} + try: container = self.client.containers.get(container_id) except NotFound: @@ -187,6 +273,13 @@ class DockerOps: Returns: dict with keys ``container`` (name) and ``logs`` (string). """ + if container_id.startswith("swarm:"): + svc_name = container_id[6:] + return { + "container": svc_name, + "logs": "Service is scaled to 0 replicas — no logs available.\nStart the service to view logs.", + } + try: container = self.client.containers.get(container_id) except NotFound: @@ -200,6 +293,27 @@ class DockerOps: def pull_image(self, container_id: str) -> dict: """Pull the latest version of the container's image.""" + if container_id.startswith("swarm:"): + svc_name = container_id[6:] + service = self._get_service(svc_name) + if not service: + return {"success": False, "message": f"Swarm service {svc_name} not found"} + image = ( + service.attrs.get("Spec", {}) + .get("TaskTemplate", {}) + .get("ContainerSpec", {}) + .get("Image", "") + ) + if "@" in image: + image = image.split("@")[0] + if not image: + return {"success": False, "message": f"No image found for service {svc_name}"} + try: + self.client.images.pull(image) + return {"success": True, "message": f"Pulled image {image}"} + except Exception as exc: + return {"success": False, "message": str(exc)} + try: container = self.client.containers.get(container_id) except NotFound: