From fe76ca7456e7df4b02a28da53f4ae75bea33ba8e Mon Sep 17 00:00:00 2001 From: Pluto Date: Thu, 5 Mar 2026 22:48:20 -0600 Subject: [PATCH] fix: agent Dockerfile package structure and Dead container crash - Dockerfile: COPY to /app/agent/ and use agent.main:app for proper package imports - docker_ops: use low-level API in get_health() to avoid NotFound on containers stuck in Docker Dead state - Add comprehensive README with architecture, API docs, and usage Co-Authored-By: Claude Opus 4.6 --- README.md | 88 +++++++++++++++++++++++++++++++++++++++++++-- agent/Dockerfile | 4 +-- agent/docker_ops.py | 8 +++-- 3 files changed, 94 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 4be8a1d..e5a77a5 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,87 @@ -# farm-manager +# Farm Manager -Web dashboard for managing Docker services across the homelab cluster \ No newline at end of file +Web dashboard for managing Docker services across the homelab cluster. Provides full control (start/stop/restart/logs/pull) with service grouping for bulk operations. + +## Architecture + +``` +Browser --> API Server (hf-pdocker-01:8888) + | + +------+------+ + | | | + Agent Agent Agent + (01:8889)(02:8889)(bart:8889) + | | | + Docker Docker Docker + Socket Socket Socket +``` + +- **API Server** — FastAPI on :8888. Serves the dashboard, proxies commands to node agents, manages service groups. +- **Node Agent** — FastAPI on :8889 per node. Mounts Docker socket, auto-detects Swarm vs Compose containers. +- **Dashboard** — Vanilla HTML/CSS/JS. Dark theme, service cards, group management, log viewer. + +## Quick Start + +```bash +# Build and push images +docker build -f agent/Dockerfile -t 127.0.0.1:5050/farm-agent:latest agent/ +docker build -f Dockerfile.server -t 127.0.0.1:5050/farm-manager:latest . +docker push 127.0.0.1:5050/farm-agent:latest +docker push 127.0.0.1:5050/farm-manager:latest + +# Deploy agent on each node +cd /mnt/docker-data/compose/{node}/farm-agent && docker compose up -d + +# Deploy server on hf-pdocker-01 +cd /mnt/docker-data/compose/hf-pdocker-01/farm-manager && docker compose up -d +``` + +## Configuration + +`config.json` — Node definitions (name, host, agent_port) + +`groups.json` — Service groups for bulk operations (10 default groups included) + +Both stored at `/mnt/docker-data/configs/farm-manager/`. + +## API + +### Server (`:8888`) + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/api/nodes` | List nodes with health | +| GET | `/api/services` | All containers across nodes | +| POST | `/api/services/{node}/{container}/start` | Start container | +| POST | `/api/services/{node}/{container}/stop` | Stop container | +| POST | `/api/services/{node}/{container}/restart` | Restart container | +| GET | `/api/services/{node}/{container}/logs` | Get logs | +| POST | `/api/services/{node}/{container}/pull` | Pull image | +| GET/POST/PUT/DELETE | `/api/groups[/{id}]` | Group CRUD | +| POST | `/api/groups/{id}/start\|stop\|restart` | Bulk group actions | + +### Agent (`:8889`) + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/health` | Agent health check | +| GET | `/containers` | List all containers | +| POST | `/containers/{id}/start\|stop\|restart` | Container actions | +| GET | `/containers/{id}/logs` | Get logs | +| POST | `/containers/{id}/pull` | Pull image | + +## Testing + +```bash +pip install -r agent/requirements.txt -r server/requirements.txt pytest pytest-asyncio httpx +pytest -v +``` + +52 tests covering agent Docker operations, agent API, server proxy routes, and group CRUD. + +## Swarm Handling + +The agent auto-detects Swarm containers via the `com.docker.swarm.service.name` label: +- **Start**: Scales service to 1 replica +- **Stop**: Scales service to 0 replicas +- **Restart**: Force-updates the service diff --git a/agent/Dockerfile b/agent/Dockerfile index 03f52a6..68e1444 100644 --- a/agent/Dockerfile +++ b/agent/Dockerfile @@ -7,8 +7,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt -COPY . . +COPY . /app/agent/ EXPOSE 8889 -CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8889"] +CMD ["uvicorn", "agent.main:app", "--host", "0.0.0.0", "--port", "8889"] diff --git a/agent/docker_ops.py b/agent/docker_ops.py index dfad27c..0a451e6 100644 --- a/agent/docker_ops.py +++ b/agent/docker_ops.py @@ -217,8 +217,12 @@ class DockerOps: return {"success": False, "message": str(exc)} def get_health(self) -> dict: - """Return node health info: hostname and container count.""" - containers = self.client.containers.list(all=True) + """Return node health info: hostname and container count. + + Uses the low-level API to avoid NotFound errors from containers + stuck in Docker's 'Dead' state. + """ + containers = self.client.api.containers(all=True) return { "status": "healthy", "hostname": socket.gethostname(),