diff --git a/Cargo.lock b/Cargo.lock index d5de42fb3..ec9fc2a81 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3224,6 +3224,7 @@ version = "0.0.0" dependencies = [ "anyhow", "axum 0.8.8", + "bollard", "bytes", "clap", "futures", @@ -3257,6 +3258,7 @@ dependencies = [ "serde_json", "sha2 0.10.9", "sqlx", + "tar", "tempfile", "thiserror 2.0.18", "tokio", diff --git a/architecture/gateway.md b/architecture/gateway.md index 5dd2419af..7aed7542e 100644 --- a/architecture/gateway.md +++ b/architecture/gateway.md @@ -26,7 +26,7 @@ graph TD SUP_REG["SupervisorSessionRegistry"] STORE["Store
(SQLite / Postgres)"] COMPUTE["ComputeRuntime"] - DRIVER["ComputeDriver
(kubernetes / vm)"] + DRIVER["ComputeDriver
(kubernetes / docker / vm)"] WATCH_BUS["SandboxWatchBus"] LOG_BUS["TracingLogBus"] PLAT_BUS["PlatformEventBus"] @@ -75,6 +75,7 @@ graph TD | TLS | `crates/openshell-server/src/tls.rs` | `TlsAcceptor` wrapping rustls with ALPN | | Persistence | `crates/openshell-server/src/persistence/mod.rs` | `Store` enum (SQLite/Postgres), generic object CRUD, protobuf codec | | Compute runtime | `crates/openshell-server/src/compute/mod.rs` | `ComputeRuntime`, gateway-owned sandbox lifecycle orchestration over a compute backend | +| Compute driver: Docker | `crates/openshell-server/src/compute/docker.rs` | In-process Docker create/delete/watch, supervisor side-load, local daemon integration | | Compute driver: Kubernetes | `crates/openshell-driver-kubernetes/src/driver.rs` | Kubernetes CRD create/delete/watch, pod template translation | | Compute driver: VM | `crates/openshell-driver-vm/src/driver.rs` | Per-sandbox microVM create/delete/watch, supervisor-only guest boot | | Sandbox index | `crates/openshell-server/src/sandbox_index.rs` | `SandboxIndex` -- in-memory name/pod-to-id correlation | @@ -103,6 +104,7 @@ The gateway boots in `cli::run_cli` (`crates/openshell-server/src/cli.rs`) and p 1. Connect to the persistence store (`Store::connect`), which auto-detects SQLite vs Postgres from the URL prefix and runs migrations. 2. Create `ComputeRuntime` with a `ComputeDriver` implementation selected by `OPENSHELL_DRIVERS`: - `kubernetes` wraps `KubernetesComputeDriver` in `ComputeDriverService`, so the gateway uses the `openshell.compute.v1.ComputeDriver` RPC surface even without transport. + - `docker` constructs `DockerComputeDriver` in-process, talks directly to the local Docker daemon through Bollard, and keeps Docker-only configuration (supervisor/TLS bind mounts) local to `openshell-server`. - `vm` spawns the standalone `openshell-driver-vm` binary as a local compute-driver process, resolves it from `--driver-dir`, conventional libexec install paths, or a sibling of the gateway binary, connects to it over a Unix domain socket, and keeps the libkrun/rootfs runtime out of the gateway binary. 3. Build `ServerState` (shared via `Arc` across all handlers), including a fresh `SupervisorSessionRegistry`. 4. **Spawn background tasks**: @@ -132,7 +134,12 @@ All configuration is via CLI flags with environment variable fallbacks. The `--d | `--sandbox-namespace` | `OPENSHELL_SANDBOX_NAMESPACE` | `default` | Kubernetes namespace for sandbox CRDs | | `--sandbox-image` | `OPENSHELL_SANDBOX_IMAGE` | None | Default container image for sandbox pods | | `--grpc-endpoint` | `OPENSHELL_GRPC_ENDPOINT` | None | gRPC endpoint reachable from within the cluster (for supervisor callbacks) | -| `--drivers` | `OPENSHELL_DRIVERS` | `kubernetes` | Compute backend to use. Current options are `kubernetes` and `vm`. | +| `--drivers` | `OPENSHELL_DRIVERS` | `kubernetes` | Compute backend to use. Current options are `kubernetes`, `docker`, and `vm`. | +| `--docker-supervisor-bin` | `OPENSHELL_DOCKER_SUPERVISOR_BIN` | Sibling `openshell-sandbox` → local cargo build → extracted from `--docker-supervisor-image` | Linux `openshell-sandbox` binary bind-mounted into Docker sandboxes as PID 1 | +| `--docker-supervisor-image` | `OPENSHELL_DOCKER_SUPERVISOR_IMAGE` | `ghcr.io/nvidia/openshell/supervisor:` | Image the gateway pulls to extract the Linux supervisor binary when no explicit path or local build is available. The binary is cached under `$XDG_DATA_HOME/openshell/docker-supervisor//openshell-sandbox` and reused across restarts. | +| `--docker-tls-ca` | `OPENSHELL_DOCKER_TLS_CA` | None | CA cert bind-mounted into Docker sandboxes at `/etc/openshell/tls/client/ca.crt` for gateway mTLS | +| `--docker-tls-cert` | `OPENSHELL_DOCKER_TLS_CERT` | None | Client cert bind-mounted into Docker sandboxes at `/etc/openshell/tls/client/tls.crt` for gateway mTLS | +| `--docker-tls-key` | `OPENSHELL_DOCKER_TLS_KEY` | None | Client private key bind-mounted into Docker sandboxes at `/etc/openshell/tls/client/tls.key` for gateway mTLS | | `--vm-driver-state-dir` | `OPENSHELL_VM_DRIVER_STATE_DIR` | `target/openshell-vm-driver` | Host directory for VM sandbox rootfs, console logs, and runtime state | | `--driver-dir` | `OPENSHELL_DRIVER_DIR` | unset | Override directory for `openshell-driver-vm`. When unset, the gateway searches `~/.local/libexec/openshell`, `/usr/local/libexec/openshell`, `/usr/local/libexec`, then a sibling binary. | | `--vm-krun-log-level` | `OPENSHELL_VM_KRUN_LOG_LEVEL` | `1` | libkrun log level for VM helper processes | @@ -599,6 +606,17 @@ The Helm chart template is at `deploy/helm/openshell/templates/statefulset.yaml` The gateway reaches the sandbox exclusively through the supervisor-initiated `ConnectSupervisor` session, so the driver never returns sandbox network endpoints. +### Docker Driver + +`DockerComputeDriver` (`crates/openshell-server/src/compute/docker.rs`) is built directly into the gateway. It connects to the local Docker daemon with Bollard and provisions one long-lived container per sandbox. + +- **Create**: Pulls the requested image according to `sandbox_image_pull_policy`, creates a labeled container, bind-mounts a Linux `openshell-sandbox` binary read-only at `/opt/openshell/bin/openshell-sandbox`, and starts that supervisor as PID 1. No sandbox ports are published. +- **Persistence**: The Docker driver does not create a separate workspace volume. `/sandbox` lives on the container writable layer, so data persists across gateway restarts as long as the same container remains. +- **Gateway callback**: When `OPENSHELL_GRPC_ENDPOINT` points at `localhost` or another loopback address, the driver rewrites it to `host.openshell.internal` inside the container and injects `host-gateway` aliases so the supervisor can still open its outbound `ConnectSupervisor` stream. +- **TLS**: For `https://` gateway endpoints, the driver requires `--docker-tls-ca`, `--docker-tls-cert`, and `--docker-tls-key`. These files are bind-mounted read-only into `/etc/openshell/tls/client`, and the driver sets `OPENSHELL_TLS_CA`, `OPENSHELL_TLS_CERT`, and `OPENSHELL_TLS_KEY` to those paths. +- **Limits**: V1 supports only `cpu_limit` and `memory_limit`, mapped to Docker `NanoCpus` and `Memory`. GPU requests, resource requests, `agent_socket_path`, and non-empty `platform_config` are rejected as failed preconditions. +- **Watch stream**: The driver polls Docker for OpenShell-managed containers, emits snapshot diffs and deletions, and rebuilds its state from labels after gateway restart. Containers running under Docker restart policy `unless-stopped` come back after daemon restart without any inbound port setup. + ### VM Driver `VmDriver` (`crates/openshell-driver-vm/src/driver.rs`) is served by the standalone `openshell-driver-vm` process. The gateway spawns that binary on demand and talks to it over the internal `openshell.compute.v1.ComputeDriver` gRPC contract via a Unix domain socket. diff --git a/crates/openshell-core/src/config.rs b/crates/openshell-core/src/config.rs index 3217a783d..061feffe5 100644 --- a/crates/openshell-core/src/config.rs +++ b/crates/openshell-core/src/config.rs @@ -15,6 +15,7 @@ use std::str::FromStr; pub enum ComputeDriverKind { Kubernetes, Vm, + Docker, Podman, } @@ -24,6 +25,7 @@ impl ComputeDriverKind { match self { Self::Kubernetes => "kubernetes", Self::Vm => "vm", + Self::Docker => "docker", Self::Podman => "podman", } } @@ -42,9 +44,10 @@ impl FromStr for ComputeDriverKind { match value.trim().to_ascii_lowercase().as_str() { "kubernetes" => Ok(Self::Kubernetes), "vm" => Ok(Self::Vm), + "docker" => Ok(Self::Docker), "podman" => Ok(Self::Podman), other => Err(format!( - "unsupported compute driver '{other}'. expected one of: kubernetes, vm, podman" + "unsupported compute driver '{other}'. expected one of: kubernetes, vm, docker, podman" )), } } @@ -385,12 +388,16 @@ mod tests { "podman".parse::().unwrap(), ComputeDriverKind::Podman ); + assert_eq!( + "docker".parse::().unwrap(), + ComputeDriverKind::Docker + ); } #[test] fn compute_driver_kind_rejects_unknown_values() { - let err = "docker".parse::().unwrap_err(); - assert!(err.contains("unsupported compute driver 'docker'")); + let err = "firecracker".parse::().unwrap_err(); + assert!(err.contains("unsupported compute driver 'firecracker'")); } #[test] diff --git a/crates/openshell-sandbox/src/procfs.rs b/crates/openshell-sandbox/src/procfs.rs index a6dd379b8..1ce91dd20 100644 --- a/crates/openshell-sandbox/src/procfs.rs +++ b/crates/openshell-sandbox/src/procfs.rs @@ -576,9 +576,12 @@ mod tests { } /// An unlinked executable whose filename contains non-UTF-8 bytes must - /// still strip exactly one kernel-added `" (deleted)"` suffix. We operate - /// on raw bytes via `OsStrExt`, so invalid UTF-8 is not a reason to skip - /// the strip and return a path that downstream `stat()` calls will reject. + /// still resolve to its original path. Some kernels append a literal + /// `" (deleted)"` suffix to `/proc//exe` after unlink while others + /// do not for this edge case, so the assertion has to tolerate both. + /// + /// When the suffix is present, we still need to strip exactly one copy + /// while operating on raw bytes via `OsStrExt`. #[cfg(target_os = "linux")] #[test] fn binary_path_strips_suffix_for_non_utf8_filename() { @@ -603,13 +606,10 @@ mod tests { wait_for_child_exec(pid, &exe_path); std::fs::remove_file(&exe_path).unwrap(); - // Sanity: raw readlink ends with " (deleted)" and is not valid UTF-8. + // Sanity: the raw readlink remains non-UTF-8 after unlink. let raw = std::fs::read_link(format!("/proc/{pid}/exe")).unwrap(); let raw_bytes = raw.as_os_str().as_bytes(); - assert!( - raw_bytes.ends_with(b" (deleted)"), - "kernel should append ' (deleted)' to unlinked exe readlink" - ); + let kernel_appended_deleted_suffix = raw_bytes.ends_with(b" (deleted)"); assert!( std::str::from_utf8(raw_bytes).is_err(), "test precondition: raw readlink must contain non-UTF-8 bytes" @@ -619,12 +619,19 @@ mod tests { binary_path(pid).expect("binary_path should succeed for non-UTF-8 unlinked path"); assert_eq!( resolved, exe_path, - "binary_path must strip exactly one ' (deleted)' suffix for non-UTF-8 paths" - ); - assert!( - !resolved.as_os_str().as_bytes().ends_with(b" (deleted)"), - "stripped path must not end with ' (deleted)'" + "binary_path must resolve non-UTF-8 unlinked paths back to the original filename" ); + if kernel_appended_deleted_suffix { + assert!( + !resolved.as_os_str().as_bytes().ends_with(b" (deleted)"), + "stripped path must not end with ' (deleted)'" + ); + } else { + assert_eq!( + raw, exe_path, + "kernels that omit the deleted suffix should report the original unlinked path" + ); + } let _ = child.kill(); let _ = child.wait(); diff --git a/crates/openshell-server/Cargo.toml b/crates/openshell-server/Cargo.toml index b2524ff0b..5af790092 100644 --- a/crates/openshell-server/Cargo.toml +++ b/crates/openshell-server/Cargo.toml @@ -67,6 +67,7 @@ sqlx = { workspace = true } reqwest = { workspace = true } uuid = { workspace = true } url = { workspace = true } +bollard = { version = "0.20" } hmac = "0.12" sha2 = "0.10" hex = "0.4" @@ -74,6 +75,8 @@ russh = "0.57" rand = "0.9" petname = "2" ipnet = "2" +tar = "0.4" +tempfile = "3" [features] dev-settings = ["openshell-core/dev-settings"] @@ -81,7 +84,6 @@ dev-settings = ["openshell-core/dev-settings"] [dev-dependencies] hyper-rustls = { version = "0.27", default-features = false, features = ["native-tokio", "http1", "tls12", "logging", "ring", "webpki-tokio"] } rcgen = { version = "0.13", features = ["crypto", "pem"] } -tempfile = "3" tokio-tungstenite = { workspace = true } futures-util = "0.3" wiremock = "0.6" diff --git a/crates/openshell-server/src/cli.rs b/crates/openshell-server/src/cli.rs index 0b64de803..9f664ebee 100644 --- a/crates/openshell-server/src/cli.rs +++ b/crates/openshell-server/src/cli.rs @@ -11,7 +11,7 @@ use std::path::PathBuf; use tracing::info; use tracing_subscriber::EnvFilter; -use crate::compute::VmComputeConfig; +use crate::compute::{DockerComputeConfig, VmComputeConfig}; use crate::{run_server, tracing_bus::TracingLogBus}; /// `OpenShell` gateway process - gRPC and HTTP server with protocol multiplexing. @@ -165,6 +165,33 @@ struct Args { #[arg(long, env = "OPENSHELL_VM_TLS_KEY")] vm_tls_key: Option, + /// Linux `openshell-sandbox` binary bind-mounted into Docker sandboxes. + /// + /// When unset the gateway falls back to (in order) a sibling + /// `openshell-sandbox` next to the gateway binary, a local cargo build, + /// or extracting the binary from `--docker-supervisor-image`. + #[arg(long, env = "OPENSHELL_DOCKER_SUPERVISOR_BIN")] + docker_supervisor_bin: Option, + + /// Image the Docker driver pulls to extract the Linux + /// `openshell-sandbox` binary when no explicit `--docker-supervisor-bin` + /// override or local build is available. Defaults to + /// `ghcr.io/nvidia/openshell/supervisor:`. + #[arg(long, env = "OPENSHELL_DOCKER_SUPERVISOR_IMAGE")] + docker_supervisor_image: Option, + + /// CA certificate bind-mounted into Docker sandboxes for gateway mTLS. + #[arg(long, env = "OPENSHELL_DOCKER_TLS_CA")] + docker_tls_ca: Option, + + /// Client certificate bind-mounted into Docker sandboxes for gateway mTLS. + #[arg(long, env = "OPENSHELL_DOCKER_TLS_CERT")] + docker_tls_cert: Option, + + /// Client private key bind-mounted into Docker sandboxes for gateway mTLS. + #[arg(long, env = "OPENSHELL_DOCKER_TLS_KEY")] + docker_tls_key: Option, + /// Disable TLS entirely — listen on plaintext HTTP. /// Use this when the gateway sits behind a reverse proxy or tunnel /// (e.g. Cloudflare Tunnel) that terminates TLS at the edge. @@ -283,6 +310,14 @@ async fn run_from_args(args: Args) -> Result<()> { guest_tls_key: args.vm_tls_key, }; + let docker_config = DockerComputeConfig { + supervisor_bin: args.docker_supervisor_bin, + supervisor_image: args.docker_supervisor_image, + guest_tls_ca: args.docker_tls_ca, + guest_tls_cert: args.docker_tls_cert, + guest_tls_key: args.docker_tls_key, + }; + if args.disable_tls { info!("TLS disabled — listening on plaintext HTTP"); } else if args.disable_gateway_auth { @@ -291,7 +326,7 @@ async fn run_from_args(args: Args) -> Result<()> { info!(bind = %config.bind_address, "Starting OpenShell server"); - run_server(config, vm_config, tracing_log_bus) + run_server(config, vm_config, docker_config, tracing_log_bus) .await .into_diagnostic() } diff --git a/crates/openshell-server/src/compute/docker.rs b/crates/openshell-server/src/compute/docker.rs new file mode 100644 index 000000000..d5a868209 --- /dev/null +++ b/crates/openshell-server/src/compute/docker.rs @@ -0,0 +1,1983 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Bundled Docker compute driver. + +use bollard::Docker; +use bollard::errors::Error as BollardError; +use bollard::models::{ + ContainerCreateBody, ContainerSummary, ContainerSummaryStateEnum, HostConfig, Mount, + MountTypeEnum, RestartPolicy, RestartPolicyNameEnum, +}; +use bollard::query_parameters::{ + CreateContainerOptionsBuilder, CreateImageOptions, DownloadFromContainerOptionsBuilder, + ListContainersOptionsBuilder, RemoveContainerOptionsBuilder, +}; +use bytes::Bytes; +use futures::{Stream, StreamExt}; +use openshell_core::proto::compute::v1::{ + CreateSandboxRequest, CreateSandboxResponse, DeleteSandboxRequest, DeleteSandboxResponse, + DriverCondition, DriverSandbox, DriverSandboxStatus, DriverSandboxTemplate, + GetCapabilitiesRequest, GetCapabilitiesResponse, GetSandboxRequest, GetSandboxResponse, + ListSandboxesRequest, ListSandboxesResponse, StopSandboxRequest, StopSandboxResponse, + ValidateSandboxCreateRequest, ValidateSandboxCreateResponse, WatchSandboxesDeletedEvent, + WatchSandboxesEvent, WatchSandboxesRequest, WatchSandboxesSandboxEvent, + compute_driver_server::ComputeDriver, watch_sandboxes_event, +}; +use openshell_core::{Config, Error, Result as CoreResult}; +use std::collections::HashMap; +use std::io::Read; +use std::path::{Path, PathBuf}; +use std::pin::Pin; +use std::sync::Arc; +use std::time::Duration; +use tokio::sync::{broadcast, mpsc}; +use tokio_stream::wrappers::ReceiverStream; +use tonic::{Request, Response, Status}; +use tracing::{info, warn}; +use url::{Host, Url}; + +const WATCH_BUFFER: usize = 128; +const WATCH_POLL_INTERVAL: Duration = Duration::from_secs(2); +const WATCH_POLL_MAX_BACKOFF: Duration = Duration::from_secs(30); + +const MANAGED_BY_LABEL_KEY: &str = "openshell.ai/managed-by"; +const MANAGED_BY_LABEL_VALUE: &str = "openshell"; +const SANDBOX_ID_LABEL_KEY: &str = "openshell.ai/sandbox-id"; +const SANDBOX_NAME_LABEL_KEY: &str = "openshell.ai/sandbox-name"; +const SANDBOX_NAMESPACE_LABEL_KEY: &str = "openshell.ai/sandbox-namespace"; + +const SUPERVISOR_MOUNT_PATH: &str = "/opt/openshell/bin/openshell-sandbox"; +#[cfg(test)] +const TLS_MOUNT_DIR: &str = "/etc/openshell/tls/client"; +const TLS_CA_MOUNT_PATH: &str = "/etc/openshell/tls/client/ca.crt"; +const TLS_CERT_MOUNT_PATH: &str = "/etc/openshell/tls/client/tls.crt"; +const TLS_KEY_MOUNT_PATH: &str = "/etc/openshell/tls/client/tls.key"; +const SANDBOX_COMMAND: &str = "sleep infinity"; +const HOST_OPENSHELL_INTERNAL: &str = "host.openshell.internal"; +const HOST_DOCKER_INTERNAL: &str = "host.docker.internal"; + +/// Default image holding the Linux `openshell-sandbox` binary. The gateway +/// pulls this image and extracts the binary to a host-side cache when no +/// explicit `--docker-supervisor-bin` override or local build is available. +const DEFAULT_DOCKER_SUPERVISOR_IMAGE_REPO: &str = "ghcr.io/nvidia/openshell/supervisor"; + +/// Image tag baked in at compile time to pair the gateway with a matching +/// supervisor image. Mirrors the pattern used by `openshell-bootstrap`: +/// defaults to `"dev"`; CI overrides with a release version via the +/// `OPENSHELL_IMAGE_TAG` env var during `cargo build`. +const DEFAULT_DOCKER_SUPERVISOR_IMAGE_TAG: &str = match option_env!("OPENSHELL_IMAGE_TAG") { + Some(tag) => tag, + None => "dev", +}; + +/// Path to the supervisor binary inside the `openshell/supervisor` image. +const SUPERVISOR_IMAGE_BINARY_PATH: &str = "/usr/local/bin/openshell-sandbox"; + +/// Return the default `ghcr.io/nvidia/openshell/supervisor:` reference +/// used when no supervisor binary override is provided. +pub fn default_docker_supervisor_image() -> String { + format!("{DEFAULT_DOCKER_SUPERVISOR_IMAGE_REPO}:{DEFAULT_DOCKER_SUPERVISOR_IMAGE_TAG}") +} + +/// Queried by the Docker driver to decide when a sandbox's supervisor +/// relay is live. Implementations return `true` once a sandbox has an +/// active `ConnectSupervisor` session registered. +/// +/// The driver cannot observe the supervisor's SSH socket directly (it +/// lives inside the container), so it leans on this signal to flip the +/// Ready condition from `DependenciesNotReady` to `True`. +pub trait SupervisorReadiness: Send + Sync + 'static { + fn is_supervisor_connected(&self, sandbox_id: &str) -> bool; +} + +/// Gateway-local configuration for the bundled Docker compute driver. +#[derive(Debug, Clone, Default)] +pub struct DockerComputeConfig { + /// Optional override for the Linux `openshell-sandbox` binary mounted into containers. + pub supervisor_bin: Option, + + /// Optional override for the image the gateway pulls to extract the + /// Linux `openshell-sandbox` binary when no explicit binary path or + /// local build is available. Defaults to + /// `ghcr.io/nvidia/openshell/supervisor:`. + pub supervisor_image: Option, + + /// Host-side CA certificate for Docker sandbox mTLS. + pub guest_tls_ca: Option, + + /// Host-side client certificate for Docker sandbox mTLS. + pub guest_tls_cert: Option, + + /// Host-side private key for Docker sandbox mTLS. + pub guest_tls_key: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct DockerGuestTlsPaths { + pub(crate) ca: PathBuf, + pub(crate) cert: PathBuf, + pub(crate) key: PathBuf, +} + +#[derive(Debug, Clone)] +struct DockerDriverRuntimeConfig { + default_image: String, + image_pull_policy: String, + grpc_endpoint: String, + ssh_socket_path: String, + ssh_handshake_secret: String, + ssh_handshake_skew_secs: u64, + log_level: String, + supervisor_bin: PathBuf, + guest_tls: Option, + daemon_version: String, +} + +#[derive(Clone)] +pub struct DockerComputeDriver { + docker: Arc, + config: DockerDriverRuntimeConfig, + events: broadcast::Sender, + supervisor_readiness: Arc, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +struct DockerResourceLimits { + nano_cpus: Option, + memory_bytes: Option, +} + +type WatchStream = + Pin> + Send + 'static>>; + +impl DockerComputeDriver { + pub async fn new( + config: &Config, + docker_config: &DockerComputeConfig, + supervisor_readiness: Arc, + ) -> CoreResult { + if config.grpc_endpoint.trim().is_empty() { + return Err(Error::config( + "grpc_endpoint is required when using the docker compute driver", + )); + } + + let docker = Docker::connect_with_local_defaults() + .map_err(|err| Error::execution(format!("failed to create Docker client: {err}")))?; + let version = docker.version().await.map_err(|err| { + Error::execution(format!("failed to query Docker daemon version: {err}")) + })?; + let daemon_arch = normalize_docker_arch(version.arch.as_deref().unwrap_or_default()); + let supervisor_bin = resolve_supervisor_bin(&docker, docker_config, &daemon_arch).await?; + let guest_tls = docker_guest_tls_paths(config, docker_config)?; + + let driver = Self { + docker: Arc::new(docker), + config: DockerDriverRuntimeConfig { + default_image: config.sandbox_image.clone(), + image_pull_policy: config.sandbox_image_pull_policy.clone(), + grpc_endpoint: config.grpc_endpoint.clone(), + ssh_socket_path: config.sandbox_ssh_socket_path.clone(), + ssh_handshake_secret: config.ssh_handshake_secret.clone(), + ssh_handshake_skew_secs: config.ssh_handshake_skew_secs, + log_level: config.log_level.clone(), + supervisor_bin, + guest_tls, + daemon_version: version.version.unwrap_or_else(|| "unknown".to_string()), + }, + events: broadcast::channel(WATCH_BUFFER).0, + supervisor_readiness, + }; + + let poll_driver = driver.clone(); + tokio::spawn(async move { + poll_driver.poll_loop().await; + }); + + Ok(driver) + } + + fn capabilities(&self) -> GetCapabilitiesResponse { + GetCapabilitiesResponse { + driver_name: "docker".to_string(), + driver_version: self.config.daemon_version.clone(), + default_image: self.config.default_image.clone(), + supports_gpu: false, + } + } + + fn validate_sandbox(&self, sandbox: &DriverSandbox) -> Result<(), Status> { + let spec = sandbox + .spec + .as_ref() + .ok_or_else(|| Status::invalid_argument("sandbox.spec is required"))?; + let template = spec + .template + .as_ref() + .ok_or_else(|| Status::invalid_argument("sandbox.spec.template is required"))?; + + if template.image.trim().is_empty() { + return Err(Status::failed_precondition( + "docker sandboxes require a template image", + )); + } + if spec.gpu { + return Err(Status::failed_precondition( + "docker compute driver does not support gpu sandboxes", + )); + } + if !template.agent_socket_path.trim().is_empty() { + return Err(Status::failed_precondition( + "docker compute driver does not support template.agent_socket_path", + )); + } + if template + .platform_config + .as_ref() + .is_some_and(|config| !config.fields.is_empty()) + { + return Err(Status::failed_precondition( + "docker compute driver does not support template.platform_config", + )); + } + + let _ = docker_resource_limits(template)?; + Ok(()) + } + + async fn get_sandbox_snapshot( + &self, + sandbox_id: &str, + sandbox_name: &str, + ) -> Result, Status> { + let container = self + .find_managed_container_summary(sandbox_id, sandbox_name) + .await?; + Ok(container.and_then(|summary| { + sandbox_from_container_summary(&summary, self.supervisor_readiness.as_ref()) + })) + } + + async fn current_snapshots(&self) -> Result, Status> { + let containers = self.list_managed_container_summaries().await?; + let mut sandboxes = containers + .iter() + .filter_map(|summary| { + sandbox_from_container_summary(summary, self.supervisor_readiness.as_ref()) + }) + .collect::>(); + sandboxes.sort_by(|left, right| left.id.cmp(&right.id)); + Ok(sandboxes) + } + + async fn create_sandbox_inner(&self, sandbox: &DriverSandbox) -> Result<(), Status> { + self.validate_sandbox(sandbox)?; + + if self + .find_managed_container_summary(&sandbox.id, &sandbox.name) + .await? + .is_some() + { + return Err(Status::already_exists("sandbox already exists")); + } + + let template = sandbox + .spec + .as_ref() + .and_then(|spec| spec.template.as_ref()) + .expect("validated sandbox has template"); + self.ensure_image_available(&template.image).await?; + + let container_name = container_name_for_sandbox(sandbox); + let create_body = self.build_container_create_body(sandbox)?; + self.docker + .create_container( + Some( + CreateContainerOptionsBuilder::default() + .name(container_name.as_str()) + .build(), + ), + create_body, + ) + .await + .map_err(|err| { + create_status_from_docker_error("create docker sandbox container", err) + })?; + + if let Err(err) = self.docker.start_container(&container_name, None).await { + let cleanup = self + .docker + .remove_container( + &container_name, + Some(RemoveContainerOptionsBuilder::default().force(true).build()), + ) + .await; + if let Err(cleanup_err) = cleanup { + warn!( + sandbox_id = %sandbox.id, + container_name, + error = %cleanup_err, + "Failed to clean up Docker container after start failure" + ); + } + return Err(create_status_from_docker_error( + "start docker sandbox container", + err, + )); + } + + Ok(()) + } + + async fn delete_sandbox_inner( + &self, + sandbox_id: &str, + sandbox_name: &str, + ) -> Result { + let Some(container) = self + .find_managed_container_summary(sandbox_id, sandbox_name) + .await? + else { + return Ok(false); + }; + // Prefer the container ID: it's always populated in ContainerSummary, + // remains valid while the container exists, and is accepted by + // `remove_container` just like a name. Falling back to the parsed + // name covers any transient where `id` is missing. + let Some(target) = container + .id + .as_deref() + .filter(|id| !id.is_empty()) + .map(str::to_string) + .or_else(|| summary_container_name(&container)) + else { + return Ok(false); + }; + + match self + .docker + .remove_container( + &target, + Some(RemoveContainerOptionsBuilder::default().force(true).build()), + ) + .await + { + Ok(()) => Ok(true), + Err(err) if is_not_found_error(&err) => Ok(false), + Err(err) => Err(internal_status("delete docker sandbox container", err)), + } + } + + async fn poll_loop(self) { + let mut previous = match self.current_snapshot_map().await { + Ok(snapshots) => snapshots, + Err(err) => { + warn!(error = %err, "Failed to seed Docker sandbox watch state"); + HashMap::new() + } + }; + + // Exponential backoff on consecutive Docker failures to avoid a 2s + // warn-log flood when the daemon is unreachable for an extended + // period (e.g. restart, socket removed). + let mut backoff = WATCH_POLL_INTERVAL; + loop { + tokio::time::sleep(backoff).await; + match self.current_snapshot_map().await { + Ok(current) => { + emit_snapshot_diff(&self.events, &previous, ¤t); + previous = current; + backoff = WATCH_POLL_INTERVAL; + } + Err(err) => { + warn!( + error = %err, + backoff_secs = backoff.as_secs(), + "Failed to poll Docker sandboxes" + ); + backoff = (backoff * 2).min(WATCH_POLL_MAX_BACKOFF); + } + } + } + } + + async fn current_snapshot_map(&self) -> Result, Status> { + self.current_snapshots().await.map(|snapshots| { + snapshots + .into_iter() + .map(|sandbox| (sandbox.id.clone(), sandbox)) + .collect() + }) + } + + async fn list_managed_container_summaries(&self) -> Result, Status> { + let filters = label_filters([format!("{MANAGED_BY_LABEL_KEY}={MANAGED_BY_LABEL_VALUE}")]); + self.docker + .list_containers(Some( + ListContainersOptionsBuilder::default() + .all(true) + .filters(&filters) + .build(), + )) + .await + .map_err(|err| internal_status("list Docker sandbox containers", err)) + } + + async fn find_managed_container_summary( + &self, + sandbox_id: &str, + sandbox_name: &str, + ) -> Result, Status> { + let mut label_filter_values = + vec![format!("{MANAGED_BY_LABEL_KEY}={MANAGED_BY_LABEL_VALUE}")]; + if !sandbox_id.is_empty() { + label_filter_values.push(format!("{SANDBOX_ID_LABEL_KEY}={sandbox_id}")); + } else if !sandbox_name.is_empty() { + label_filter_values.push(format!("{SANDBOX_NAME_LABEL_KEY}={sandbox_name}")); + } + + let filters = label_filters(label_filter_values); + let containers = self + .docker + .list_containers(Some( + ListContainersOptionsBuilder::default() + .all(true) + .filters(&filters) + .build(), + )) + .await + .map_err(|err| internal_status("find Docker sandbox container", err))?; + + Ok(containers.into_iter().find(|summary| { + let Some(labels) = summary.labels.as_ref() else { + return false; + }; + let id_matches = sandbox_id.is_empty() + || labels + .get(SANDBOX_ID_LABEL_KEY) + .is_some_and(|value| value == sandbox_id); + let name_matches = sandbox_name.is_empty() + || labels + .get(SANDBOX_NAME_LABEL_KEY) + .is_some_and(|value| value == sandbox_name); + id_matches && name_matches + })) + } + + async fn ensure_image_available(&self, image: &str) -> Result<(), Status> { + let policy = self.config.image_pull_policy.trim().to_ascii_lowercase(); + match policy.as_str() { + "" | "ifnotpresent" => { + if self.docker.inspect_image(image).await.is_ok() { + return Ok(()); + } + self.pull_image(image).await + } + "always" => self.pull_image(image).await, + "never" => match self.docker.inspect_image(image).await { + Ok(_) => Ok(()), + Err(err) if is_not_found_error(&err) => Err(Status::failed_precondition(format!( + "docker image '{image}' is not present locally and sandbox_image_pull_policy=Never" + ))), + Err(err) => Err(internal_status("inspect Docker image", err)), + }, + other => Err(Status::failed_precondition(format!( + "unsupported docker sandbox_image_pull_policy '{other}'; expected Always, IfNotPresent, or Never", + ))), + } + } + + async fn pull_image(&self, image: &str) -> Result<(), Status> { + let mut stream = self.docker.create_image( + Some(CreateImageOptions { + from_image: Some(image.to_string()), + ..Default::default() + }), + None, + None, + ); + while let Some(result) = stream.next().await { + result.map_err(|err| internal_status("pull Docker image", err))?; + } + Ok(()) + } + + fn build_container_create_body( + &self, + sandbox: &DriverSandbox, + ) -> Result { + let spec = sandbox + .spec + .as_ref() + .ok_or_else(|| Status::invalid_argument("sandbox.spec is required"))?; + let template = spec + .template + .as_ref() + .ok_or_else(|| Status::invalid_argument("sandbox.spec.template is required"))?; + let resource_limits = docker_resource_limits(template)?; + let mut labels = template.labels.clone(); + labels.insert( + MANAGED_BY_LABEL_KEY.to_string(), + MANAGED_BY_LABEL_VALUE.to_string(), + ); + labels.insert(SANDBOX_ID_LABEL_KEY.to_string(), sandbox.id.clone()); + labels.insert(SANDBOX_NAME_LABEL_KEY.to_string(), sandbox.name.clone()); + labels.insert( + SANDBOX_NAMESPACE_LABEL_KEY.to_string(), + sandbox.namespace.clone(), + ); + + Ok(ContainerCreateBody { + image: Some(template.image.clone()), + user: Some("0".to_string()), + env: Some(build_environment(sandbox, &self.config)), + entrypoint: Some(vec![SUPERVISOR_MOUNT_PATH.to_string()]), + labels: Some(labels), + host_config: Some(HostConfig { + nano_cpus: resource_limits.nano_cpus, + memory: resource_limits.memory_bytes, + mounts: Some(build_mounts(&self.config)), + restart_policy: Some(RestartPolicy { + name: Some(RestartPolicyNameEnum::UNLESS_STOPPED), + maximum_retry_count: None, + }), + cap_add: Some(vec![ + "SYS_ADMIN".to_string(), + "NET_ADMIN".to_string(), + "SYS_PTRACE".to_string(), + "SYSLOG".to_string(), + ]), + extra_hosts: Some(vec![ + format!("{HOST_DOCKER_INTERNAL}:host-gateway"), + format!("{HOST_OPENSHELL_INTERNAL}:host-gateway"), + ]), + ..Default::default() + }), + ..Default::default() + }) + } +} + +#[tonic::async_trait] +impl ComputeDriver for DockerComputeDriver { + type WatchSandboxesStream = WatchStream; + + async fn get_capabilities( + &self, + _request: Request, + ) -> Result, Status> { + Ok(Response::new(self.capabilities())) + } + + async fn validate_sandbox_create( + &self, + request: Request, + ) -> Result, Status> { + let sandbox = request + .into_inner() + .sandbox + .ok_or_else(|| Status::invalid_argument("sandbox is required"))?; + self.validate_sandbox(&sandbox)?; + Ok(Response::new(ValidateSandboxCreateResponse {})) + } + + async fn get_sandbox( + &self, + request: Request, + ) -> Result, Status> { + let request = request.into_inner(); + if request.sandbox_id.is_empty() && request.sandbox_name.is_empty() { + return Err(Status::invalid_argument( + "sandbox_id or sandbox_name is required", + )); + } + + let sandbox = self + .get_sandbox_snapshot(&request.sandbox_id, &request.sandbox_name) + .await? + .ok_or_else(|| Status::not_found("sandbox not found"))?; + + if !request.sandbox_id.is_empty() && request.sandbox_id != sandbox.id { + return Err(Status::failed_precondition( + "sandbox_id did not match the fetched sandbox", + )); + } + + Ok(Response::new(GetSandboxResponse { + sandbox: Some(sandbox), + })) + } + + async fn list_sandboxes( + &self, + _request: Request, + ) -> Result, Status> { + Ok(Response::new(ListSandboxesResponse { + sandboxes: self.current_snapshots().await?, + })) + } + + async fn create_sandbox( + &self, + request: Request, + ) -> Result, Status> { + let sandbox = request + .into_inner() + .sandbox + .ok_or_else(|| Status::invalid_argument("sandbox is required"))?; + self.create_sandbox_inner(&sandbox).await?; + Ok(Response::new(CreateSandboxResponse {})) + } + + async fn stop_sandbox( + &self, + _request: Request, + ) -> Result, Status> { + Err(Status::unimplemented( + "stop sandbox is not implemented by the docker compute driver", + )) + } + + async fn delete_sandbox( + &self, + request: Request, + ) -> Result, Status> { + let request = request.into_inner(); + Ok(Response::new(DeleteSandboxResponse { + deleted: self + .delete_sandbox_inner(&request.sandbox_id, &request.sandbox_name) + .await?, + })) + } + + async fn watch_sandboxes( + &self, + _request: Request, + ) -> Result, Status> { + // Subscribe before taking the initial snapshot so any event emitted + // between the snapshot and this subscriber becoming active is still + // delivered. Downstream consumers treat sandbox events as + // idempotent (keyed by sandbox id), so a duplicate event is benign + // while a missed one leaks state. + let mut rx = self.events.subscribe(); + let initial = self.current_snapshots().await?; + let (tx, out_rx) = mpsc::channel(WATCH_BUFFER); + tokio::spawn(async move { + for sandbox in initial { + if tx + .send(Ok(WatchSandboxesEvent { + payload: Some(watch_sandboxes_event::Payload::Sandbox( + WatchSandboxesSandboxEvent { + sandbox: Some(sandbox), + }, + )), + })) + .await + .is_err() + { + return; + } + } + + loop { + match rx.recv().await { + Ok(event) => { + if tx.send(Ok(event)).await.is_err() { + return; + } + } + Err(broadcast::error::RecvError::Lagged(_)) => continue, + Err(broadcast::error::RecvError::Closed) => return, + } + } + }); + + Ok(Response::new(Box::pin(ReceiverStream::new(out_rx)))) + } +} + +fn build_mounts(config: &DockerDriverRuntimeConfig) -> Vec { + let mut mounts = vec![bind_mount( + &config.supervisor_bin, + SUPERVISOR_MOUNT_PATH, + true, + )]; + if let Some(tls) = &config.guest_tls { + mounts.push(bind_mount(&tls.ca, TLS_CA_MOUNT_PATH, true)); + mounts.push(bind_mount(&tls.cert, TLS_CERT_MOUNT_PATH, true)); + mounts.push(bind_mount(&tls.key, TLS_KEY_MOUNT_PATH, true)); + } + mounts +} + +fn bind_mount(source: &Path, target: &str, read_only: bool) -> Mount { + Mount { + target: Some(target.to_string()), + source: Some(source.display().to_string()), + typ: Some(MountTypeEnum::BIND), + read_only: Some(read_only), + ..Default::default() + } +} + +fn build_environment(sandbox: &DriverSandbox, config: &DockerDriverRuntimeConfig) -> Vec { + let mut environment = HashMap::from([ + ("HOME".to_string(), "/root".to_string()), + ( + "PATH".to_string(), + "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin".to_string(), + ), + ("TERM".to_string(), "xterm".to_string()), + ( + "OPENSHELL_LOG_LEVEL".to_string(), + sandbox_log_level(sandbox, &config.log_level), + ), + ]); + + if let Some(spec) = sandbox.spec.as_ref() { + if let Some(template) = spec.template.as_ref() { + environment.extend(template.environment.clone()); + } + environment.extend(spec.environment.clone()); + } + + environment.insert( + "OPENSHELL_ENDPOINT".to_string(), + container_visible_openshell_endpoint(&config.grpc_endpoint), + ); + environment.insert("OPENSHELL_SANDBOX_ID".to_string(), sandbox.id.clone()); + environment.insert("OPENSHELL_SANDBOX".to_string(), sandbox.name.clone()); + environment.insert( + "OPENSHELL_SSH_SOCKET_PATH".to_string(), + config.ssh_socket_path.clone(), + ); + environment.insert( + "OPENSHELL_SANDBOX_COMMAND".to_string(), + SANDBOX_COMMAND.to_string(), + ); + environment.insert( + "OPENSHELL_SSH_HANDSHAKE_SECRET".to_string(), + config.ssh_handshake_secret.clone(), + ); + environment.insert( + "OPENSHELL_SSH_HANDSHAKE_SKEW_SECS".to_string(), + config.ssh_handshake_skew_secs.to_string(), + ); + if config.guest_tls.is_some() { + environment.insert( + "OPENSHELL_TLS_CA".to_string(), + TLS_CA_MOUNT_PATH.to_string(), + ); + environment.insert( + "OPENSHELL_TLS_CERT".to_string(), + TLS_CERT_MOUNT_PATH.to_string(), + ); + environment.insert( + "OPENSHELL_TLS_KEY".to_string(), + TLS_KEY_MOUNT_PATH.to_string(), + ); + } + + let mut pairs = environment.into_iter().collect::>(); + pairs.sort_by(|left, right| left.0.cmp(&right.0)); + pairs + .into_iter() + .map(|(key, value)| format!("{key}={value}")) + .collect() +} + +fn sandbox_log_level(sandbox: &DriverSandbox, default_level: &str) -> String { + sandbox + .spec + .as_ref() + .map(|spec| spec.log_level.as_str()) + .filter(|level| !level.is_empty()) + .unwrap_or(default_level) + .to_string() +} + +fn container_visible_openshell_endpoint(endpoint: &str) -> String { + let Ok(mut url) = Url::parse(endpoint) else { + return endpoint.to_string(); + }; + + let should_rewrite = match url.host() { + Some(Host::Ipv4(ip)) => ip.is_loopback() || ip.is_unspecified(), + Some(Host::Ipv6(ip)) => ip.is_loopback() || ip.is_unspecified(), + Some(Host::Domain(host)) => host.eq_ignore_ascii_case("localhost"), + None => false, + }; + + if should_rewrite && url.set_host(Some(HOST_OPENSHELL_INTERNAL)).is_ok() { + return url.to_string(); + } + + endpoint.to_string() +} + +fn docker_resource_limits( + template: &DriverSandboxTemplate, +) -> Result { + let Some(resources) = template.resources.as_ref() else { + return Ok(DockerResourceLimits::default()); + }; + + if !resources.cpu_request.trim().is_empty() { + return Err(Status::failed_precondition( + "docker compute driver does not support resources.requests.cpu", + )); + } + if !resources.memory_request.trim().is_empty() { + return Err(Status::failed_precondition( + "docker compute driver does not support resources.requests.memory", + )); + } + + Ok(DockerResourceLimits { + nano_cpus: parse_cpu_limit(&resources.cpu_limit)?, + memory_bytes: parse_memory_limit(&resources.memory_limit)?, + }) +} + +fn parse_cpu_limit(value: &str) -> Result, Status> { + let value = value.trim(); + if value.is_empty() { + return Ok(None); + } + if let Some(millicores) = value.strip_suffix('m') { + let millicores = millicores.parse::().map_err(|_| { + Status::failed_precondition(format!( + "invalid docker cpu_limit '{value}'; expected an integer or millicore quantity", + )) + })?; + if millicores <= 0 { + return Err(Status::failed_precondition( + "docker cpu_limit must be greater than zero", + )); + } + return Ok(Some(millicores.saturating_mul(1_000_000))); + } + + let cores = value.parse::().map_err(|_| { + Status::failed_precondition(format!( + "invalid docker cpu_limit '{value}'; expected an integer or millicore quantity", + )) + })?; + if !cores.is_finite() || cores <= 0.0 { + return Err(Status::failed_precondition( + "docker cpu_limit must be greater than zero", + )); + } + + Ok(Some((cores * 1_000_000_000.0).round() as i64)) +} + +fn parse_memory_limit(value: &str) -> Result, Status> { + let value = value.trim(); + if value.is_empty() { + return Ok(None); + } + + let number_end = value + .find(|ch: char| !(ch.is_ascii_digit() || ch == '.')) + .unwrap_or(value.len()); + let (number, suffix) = value.split_at(number_end); + let amount = number.parse::().map_err(|_| { + Status::failed_precondition(format!( + "invalid docker memory_limit '{value}'; expected a Kubernetes-style quantity", + )) + })?; + if !amount.is_finite() || amount <= 0.0 { + return Err(Status::failed_precondition( + "docker memory_limit must be greater than zero", + )); + } + + let multiplier = match suffix { + "" => 1_f64, + "Ki" => 1024_f64, + "Mi" => 1024_f64.powi(2), + "Gi" => 1024_f64.powi(3), + "Ti" => 1024_f64.powi(4), + "Pi" => 1024_f64.powi(5), + "Ei" => 1024_f64.powi(6), + "K" => 1000_f64, + "M" => 1000_f64.powi(2), + "G" => 1000_f64.powi(3), + "T" => 1000_f64.powi(4), + "P" => 1000_f64.powi(5), + "E" => 1000_f64.powi(6), + _ => { + return Err(Status::failed_precondition(format!( + "invalid docker memory_limit suffix '{suffix}'", + ))); + } + }; + + Ok(Some((amount * multiplier).round() as i64)) +} + +fn sandbox_from_container_summary( + summary: &ContainerSummary, + readiness: &dyn SupervisorReadiness, +) -> Option { + let labels = summary.labels.as_ref()?; + let id = labels.get(SANDBOX_ID_LABEL_KEY)?.clone(); + let name = labels.get(SANDBOX_NAME_LABEL_KEY)?.clone(); + let namespace = labels + .get(SANDBOX_NAMESPACE_LABEL_KEY) + .cloned() + .unwrap_or_default(); + + let supervisor_connected = readiness.is_supervisor_connected(&id); + Some(DriverSandbox { + id, + name: name.clone(), + namespace, + spec: None, + status: Some(driver_status_from_summary( + summary, + &name, + supervisor_connected, + )), + }) +} + +fn driver_status_from_summary( + summary: &ContainerSummary, + sandbox_name: &str, + supervisor_connected: bool, +) -> DriverSandboxStatus { + let state = summary.state.unwrap_or(ContainerSummaryStateEnum::EMPTY); + let (ready, reason, message, deleting) = container_ready_condition(state, supervisor_connected); + + DriverSandboxStatus { + sandbox_name: summary_container_name(summary).unwrap_or_else(|| sandbox_name.to_string()), + instance_id: summary.id.clone().unwrap_or_default(), + agent_fd: String::new(), + sandbox_fd: String::new(), + conditions: vec![DriverCondition { + r#type: "Ready".to_string(), + status: ready.to_string(), + reason: reason.to_string(), + message: message.to_string(), + last_transition_time: String::new(), + }], + deleting, + } +} + +fn container_ready_condition( + state: ContainerSummaryStateEnum, + supervisor_connected: bool, +) -> (&'static str, &'static str, &'static str, bool) { + match state { + ContainerSummaryStateEnum::RUNNING => { + if supervisor_connected { + ( + "True", + "SupervisorConnected", + "Supervisor relay is live", + false, + ) + } else { + ( + "False", + "DependenciesNotReady", + "Container is running; waiting for supervisor relay", + false, + ) + } + } + ContainerSummaryStateEnum::CREATED => ("False", "Starting", "Container created", false), + ContainerSummaryStateEnum::RESTARTING => { + ("False", "Starting", "Container restarting", false) + } + ContainerSummaryStateEnum::EMPTY => { + ("False", "Starting", "Container state is unknown", false) + } + ContainerSummaryStateEnum::REMOVING => { + ("False", "Deleting", "Container is being removed", true) + } + ContainerSummaryStateEnum::PAUSED => { + ("False", "ContainerPaused", "Container is paused", false) + } + ContainerSummaryStateEnum::EXITED => { + ("False", "ContainerExited", "Container exited", false) + } + ContainerSummaryStateEnum::DEAD => ("False", "ContainerDead", "Container is dead", false), + } +} + +fn summary_container_name(summary: &ContainerSummary) -> Option { + summary + .names + .as_ref() + .and_then(|names| names.first()) + .map(|name| name.trim_start_matches('/').to_string()) + .filter(|name| !name.is_empty()) +} + +fn emit_snapshot_diff( + events: &broadcast::Sender, + previous: &HashMap, + current: &HashMap, +) { + for (sandbox_id, sandbox) in current { + if previous.get(sandbox_id) == Some(sandbox) { + continue; + } + let _ = events.send(WatchSandboxesEvent { + payload: Some(watch_sandboxes_event::Payload::Sandbox( + WatchSandboxesSandboxEvent { + sandbox: Some(sandbox.clone()), + }, + )), + }); + } + + for sandbox_id in previous.keys() { + if current.contains_key(sandbox_id) { + continue; + } + let _ = events.send(WatchSandboxesEvent { + payload: Some(watch_sandboxes_event::Payload::Deleted( + WatchSandboxesDeletedEvent { + sandbox_id: sandbox_id.clone(), + }, + )), + }); + } +} + +fn label_filters(values: impl IntoIterator) -> HashMap> { + HashMap::from([("label".to_string(), values.into_iter().collect())]) +} + +/// Maximum Docker container name length. Docker's own limit is 253 bytes, but +/// we cap at a conservative 200 to leave headroom for tooling that truncates +/// names further. +const MAX_CONTAINER_NAME_LEN: usize = 200; +const CONTAINER_NAME_PREFIX: &str = "openshell-"; + +fn container_name_for_sandbox(sandbox: &DriverSandbox) -> String { + let id_suffix = sanitize_docker_name(&sandbox.id); + let name = sanitize_docker_name(&sandbox.name); + if name.is_empty() { + let mut base = format!("{CONTAINER_NAME_PREFIX}{id_suffix}"); + // The prefix is always < MAX_CONTAINER_NAME_LEN. Truncate the id + // suffix only if the sandbox id itself is pathologically long. + if base.len() > MAX_CONTAINER_NAME_LEN { + base.truncate(MAX_CONTAINER_NAME_LEN); + } + return base; + } + + // Reserve space for the prefix and the `-` tail so the id + // suffix — which is what makes the name unique between sandboxes that + // share a human-readable prefix — is never truncated away. + let reserved = CONTAINER_NAME_PREFIX.len() + 1 + id_suffix.len(); + if reserved >= MAX_CONTAINER_NAME_LEN { + // Pathological sandbox id. Fall back to `` and truncate. + let mut base = format!("{CONTAINER_NAME_PREFIX}{id_suffix}"); + base.truncate(MAX_CONTAINER_NAME_LEN); + return trim_container_name_tail(base); + } + + let name_budget = MAX_CONTAINER_NAME_LEN - reserved; + let truncated_name = if name.len() > name_budget { + trim_container_name_tail(name[..name_budget].to_string()) + } else { + name + }; + format!("{CONTAINER_NAME_PREFIX}{truncated_name}-{id_suffix}") +} + +/// Docker container names may not end with `-`, `.`, or `_`. Truncation can +/// leave one of those trailing, so strip them before returning. +fn trim_container_name_tail(mut value: String) -> String { + while value + .chars() + .last() + .is_some_and(|ch| matches!(ch, '-' | '.' | '_')) + { + value.pop(); + } + value +} + +fn sanitize_docker_name(value: &str) -> String { + value + .chars() + .map(|ch| { + if ch.is_ascii_alphanumeric() || matches!(ch, '_' | '.' | '-') { + ch.to_ascii_lowercase() + } else { + '-' + } + }) + .collect::() + .trim_matches('-') + .to_string() +} + +fn normalize_docker_arch(arch: &str) -> String { + match arch { + "x86_64" => "amd64".to_string(), + "aarch64" => "arm64".to_string(), + other => other.to_ascii_lowercase(), + } +} + +pub(crate) async fn resolve_supervisor_bin( + docker: &Docker, + docker_config: &DockerComputeConfig, + daemon_arch: &str, +) -> CoreResult { + // Tier 1: explicit --docker-supervisor-bin / OPENSHELL_DOCKER_SUPERVISOR_BIN. + if let Some(path) = docker_config.supervisor_bin.clone() { + let path = canonicalize_existing_file(&path, "docker supervisor binary")?; + validate_linux_elf_binary(&path)?; + return Ok(path); + } + + // Tier 2: sibling `openshell-sandbox` next to the running gateway + // (release artifact layout). Linux-only because the sibling must be a + // Linux ELF to bind-mount into a Linux container. + if cfg!(target_os = "linux") { + let current_exe = std::env::current_exe() + .map_err(|err| Error::config(format!("failed to resolve current executable: {err}")))?; + if let Some(parent) = current_exe.parent() { + let sibling = parent.join("openshell-sandbox"); + if sibling.is_file() { + let path = canonicalize_existing_file(&sibling, "docker supervisor binary")?; + if validate_linux_elf_binary(&path).is_ok() { + return Ok(path); + } + } + } + } + + // Tier 3: local cargo target build (developer workflow). Preferred + // over a registry pull when available because it matches whatever the + // developer just built. + let target_candidates = linux_supervisor_candidates(daemon_arch); + for candidate in &target_candidates { + if candidate.is_file() { + let path = canonicalize_existing_file(candidate, "docker supervisor binary")?; + if validate_linux_elf_binary(&path).is_ok() { + return Ok(path); + } + } + } + + // Tier 4: pull the supervisor image from a registry and extract the + // binary to a host-side cache keyed by image content digest. This is + // the default path for released gateway binaries. + let image = docker_config + .supervisor_image + .clone() + .unwrap_or_else(default_docker_supervisor_image); + extract_supervisor_bin_from_image(docker, &image).await +} + +fn linux_supervisor_candidates(daemon_arch: &str) -> Vec { + match daemon_arch { + "arm64" => vec![PathBuf::from( + "target/aarch64-unknown-linux-gnu/release/openshell-sandbox", + )], + "amd64" => vec![PathBuf::from( + "target/x86_64-unknown-linux-gnu/release/openshell-sandbox", + )], + _ => Vec::new(), + } +} + +/// Pull the supervisor image (if not already local), extract +/// `/usr/local/bin/openshell-sandbox` to a host cache keyed by the image's +/// content digest, and return the cache path. +/// +/// The extraction is atomic: the binary is written to a sibling temp file +/// inside the digest-keyed directory and renamed into place, so concurrent +/// gateway starts don't observe a partial file. +async fn extract_supervisor_bin_from_image(docker: &Docker, image: &str) -> CoreResult { + // Inspect first to see if the image is already present; only pull on miss. + let inspect = match docker.inspect_image(image).await { + Ok(inspect) => inspect, + Err(err) if is_not_found_error(&err) => { + info!(image = image, "Pulling docker supervisor image"); + pull_supervisor_image(docker, image).await?; + docker.inspect_image(image).await.map_err(|err| { + Error::config(format!( + "failed to inspect docker supervisor image '{image}' after pull: {err}", + )) + })? + } + Err(err) => { + return Err(Error::config(format!( + "failed to inspect docker supervisor image '{image}': {err}", + ))); + } + }; + + let digest = inspect.id.clone().ok_or_else(|| { + Error::config(format!( + "docker supervisor image '{image}' inspect response has no Id", + )) + })?; + + let cache_path = supervisor_cache_path(&digest)?; + if cache_path.is_file() { + validate_linux_elf_binary(&cache_path)?; + return Ok(cache_path); + } + + let cache_dir = cache_path.parent().ok_or_else(|| { + Error::config(format!( + "docker supervisor cache path '{}' has no parent directory", + cache_path.display(), + )) + })?; + std::fs::create_dir_all(cache_dir).map_err(|err| { + Error::config(format!( + "failed to create docker supervisor cache dir '{}': {err}", + cache_dir.display(), + )) + })?; + + info!( + image = image, + digest = digest, + cache_path = %cache_path.display(), + "Extracting supervisor binary from image to host cache", + ); + + let binary_bytes = extract_supervisor_binary_bytes(docker, image).await?; + write_cache_binary_atomic(&cache_path, &binary_bytes)?; + validate_linux_elf_binary(&cache_path)?; + Ok(cache_path) +} + +async fn pull_supervisor_image(docker: &Docker, image: &str) -> CoreResult<()> { + let mut stream = docker.create_image( + Some(CreateImageOptions { + from_image: Some(image.to_string()), + ..Default::default() + }), + None, + None, + ); + while let Some(result) = stream.next().await { + result.map_err(|err| { + Error::config(format!( + "failed to pull docker supervisor image '{image}': {err}", + )) + })?; + } + Ok(()) +} + +/// Create a short-lived container from `image`, stream out the supervisor +/// binary as a tar archive, and return the untarred file bytes. The +/// container is always removed, even on error paths. +async fn extract_supervisor_binary_bytes(docker: &Docker, image: &str) -> CoreResult> { + let container_name = temp_extract_container_name(); + docker + .create_container( + Some( + CreateContainerOptionsBuilder::default() + .name(container_name.as_str()) + .build(), + ), + ContainerCreateBody { + image: Some(image.to_string()), + entrypoint: Some(vec!["/bin/true".to_string()]), + cmd: Some(Vec::new()), + ..Default::default() + }, + ) + .await + .map_err(|err| { + Error::config(format!( + "failed to create extractor container from '{image}': {err}", + )) + })?; + + // Always tear down the extractor container, even if extraction fails. + let result = download_binary_from_container(docker, &container_name).await; + if let Err(remove_err) = docker + .remove_container( + &container_name, + Some(RemoveContainerOptionsBuilder::default().force(true).build()), + ) + .await + { + warn!( + container = container_name, + error = %remove_err, + "Failed to remove supervisor extractor container", + ); + } + result +} + +async fn download_binary_from_container( + docker: &Docker, + container_name: &str, +) -> CoreResult> { + let options = DownloadFromContainerOptionsBuilder::default() + .path(SUPERVISOR_IMAGE_BINARY_PATH) + .build(); + let mut stream = docker.download_from_container(container_name, Some(options)); + + let mut tar_bytes = Vec::new(); + while let Some(chunk) = stream.next().await { + let chunk: Bytes = chunk.map_err(|err| { + Error::config(format!( + "failed to read supervisor binary stream from '{container_name}': {err}", + )) + })?; + tar_bytes.extend_from_slice(&chunk); + } + + extract_first_tar_entry(&tar_bytes).map_err(|err| { + Error::config(format!( + "failed to extract supervisor binary from tar archive returned by '{container_name}': {err}", + )) + }) +} + +/// Extract the payload of the first regular-file entry in a tar archive. +/// Docker's `/containers//archive` endpoint returns a single-file tar +/// when `path` points to a file, so we only need the first entry. +fn extract_first_tar_entry(tar_bytes: &[u8]) -> Result, String> { + let mut archive = tar::Archive::new(std::io::Cursor::new(tar_bytes)); + let mut entries = archive + .entries() + .map_err(|err| format!("open tar archive: {err}"))?; + let mut entry = entries + .next() + .ok_or_else(|| "tar archive was empty".to_string())? + .map_err(|err| format!("read tar entry: {err}"))?; + let mut bytes = Vec::with_capacity(entry.size() as usize); + entry + .read_to_end(&mut bytes) + .map_err(|err| format!("read tar entry payload: {err}"))?; + Ok(bytes) +} + +fn write_cache_binary_atomic(final_path: &Path, bytes: &[u8]) -> CoreResult<()> { + let dir = final_path.parent().ok_or_else(|| { + Error::config(format!( + "docker supervisor cache path '{}' has no parent directory", + final_path.display(), + )) + })?; + let mut temp = tempfile::Builder::new() + .prefix(".openshell-sandbox-") + .tempfile_in(dir) + .map_err(|err| { + Error::config(format!( + "failed to create temp file for supervisor binary in '{}': {err}", + dir.display(), + )) + })?; + std::io::Write::write_all(&mut temp, bytes).map_err(|err| { + Error::config(format!( + "failed to write supervisor binary to temp file: {err}", + )) + })?; + temp.as_file().sync_all().map_err(|err| { + Error::config(format!("failed to sync supervisor binary temp file: {err}",)) + })?; + + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + std::fs::set_permissions(temp.path(), std::fs::Permissions::from_mode(0o755)).map_err( + |err| { + Error::config(format!( + "failed to chmod supervisor binary temp file: {err}", + )) + }, + )?; + } + + temp.persist(final_path).map_err(|err| { + Error::config(format!( + "failed to rename supervisor binary into '{}': {}", + final_path.display(), + err.error, + )) + })?; + Ok(()) +} + +/// Cache path for an extracted supervisor binary, keyed by the image's +/// content-addressable digest (e.g. `sha256:abc123…`). The digest-prefixed +/// directory keeps stale extractions from earlier releases isolated so they +/// can be GC'd without affecting the active binary. +fn supervisor_cache_path(digest: &str) -> CoreResult { + let base = openshell_core::paths::xdg_data_dir() + .map_err(|err| Error::config(format!("failed to resolve XDG data dir: {err}")))?; + Ok(supervisor_cache_path_with_base(&base, digest)) +} + +fn supervisor_cache_path_with_base(base: &Path, digest: &str) -> PathBuf { + let sanitized = digest.replace(':', "-"); + base.join("openshell") + .join("docker-supervisor") + .join(sanitized) + .join("openshell-sandbox") +} + +fn temp_extract_container_name() -> String { + use std::sync::atomic::{AtomicU64, Ordering}; + static SEQ: AtomicU64 = AtomicU64::new(0); + let pid = std::process::id(); + let seq = SEQ.fetch_add(1, Ordering::Relaxed); + format!("openshell-supervisor-extract-{pid}-{seq}") +} + +fn canonicalize_existing_file(path: &Path, description: &str) -> CoreResult { + if !path.is_file() { + return Err(Error::config(format!( + "{description} '{}' does not exist or is not a file", + path.display() + ))); + } + std::fs::canonicalize(path).map_err(|err| { + Error::config(format!( + "failed to resolve {description} '{}': {err}", + path.display() + )) + }) +} + +pub(crate) fn validate_linux_elf_binary(path: &Path) -> CoreResult<()> { + let mut file = std::fs::File::open(path).map_err(|err| { + Error::config(format!( + "failed to open docker supervisor binary '{}': {err}", + path.display() + )) + })?; + let mut magic = [0_u8; 4]; + file.read_exact(&mut magic).map_err(|err| { + Error::config(format!( + "failed to read docker supervisor binary '{}': {err}", + path.display() + )) + })?; + if magic != [0x7f, b'E', b'L', b'F'] { + return Err(Error::config(format!( + "docker supervisor binary '{}' must be a Linux ELF executable", + path.display() + ))); + } + Ok(()) +} + +pub(crate) fn docker_guest_tls_paths( + config: &Config, + docker_config: &DockerComputeConfig, +) -> CoreResult> { + let tls_flags_provided = docker_config.guest_tls_ca.is_some() + || docker_config.guest_tls_cert.is_some() + || docker_config.guest_tls_key.is_some(); + + if !config.grpc_endpoint.starts_with("https://") { + if tls_flags_provided { + return Err(Error::config(format!( + "--docker-tls-ca/--docker-tls-cert/--docker-tls-key were provided but OPENSHELL_GRPC_ENDPOINT is '{}'; TLS materials require an https:// endpoint", + config.grpc_endpoint, + ))); + } + return Ok(None); + } + + let provided = [ + docker_config.guest_tls_ca.as_ref(), + docker_config.guest_tls_cert.as_ref(), + docker_config.guest_tls_key.as_ref(), + ]; + if provided.iter().all(Option::is_none) { + return Err(Error::config( + "docker compute driver requires --docker-tls-ca, --docker-tls-cert, and --docker-tls-key when OPENSHELL_GRPC_ENDPOINT uses https://", + )); + } + + let Some(ca) = docker_config.guest_tls_ca.clone() else { + return Err(Error::config( + "--docker-tls-ca is required when Docker sandbox TLS materials are configured", + )); + }; + let Some(cert) = docker_config.guest_tls_cert.clone() else { + return Err(Error::config( + "--docker-tls-cert is required when Docker sandbox TLS materials are configured", + )); + }; + let Some(key) = docker_config.guest_tls_key.clone() else { + return Err(Error::config( + "--docker-tls-key is required when Docker sandbox TLS materials are configured", + )); + }; + + Ok(Some(DockerGuestTlsPaths { + ca: canonicalize_existing_file(&ca, "docker TLS CA certificate")?, + cert: canonicalize_existing_file(&cert, "docker TLS client certificate")?, + key: canonicalize_existing_file(&key, "docker TLS client private key")?, + })) +} + +fn is_not_found_error(err: &BollardError) -> bool { + matches!( + err, + BollardError::DockerResponseServerError { + status_code: 404, + .. + } + ) +} + +fn create_status_from_docker_error(operation: &str, err: BollardError) -> Status { + if matches!( + err, + BollardError::DockerResponseServerError { + status_code: 409, + .. + } + ) { + Status::already_exists("sandbox already exists") + } else { + internal_status(operation, err) + } +} + +fn internal_status(operation: &str, err: BollardError) -> Status { + Status::internal(format!("{operation} failed: {err}")) +} + +#[cfg(test)] +mod tests { + use super::*; + use openshell_core::proto::compute::v1::{ + DriverResourceRequirements, DriverSandboxSpec, DriverSandboxTemplate, + }; + use std::fs; + use tempfile::TempDir; + + fn test_sandbox() -> DriverSandbox { + DriverSandbox { + id: "sbx-123".to_string(), + name: "demo".to_string(), + namespace: "default".to_string(), + spec: Some(DriverSandboxSpec { + log_level: "debug".to_string(), + environment: HashMap::from([("SPEC_ENV".to_string(), "spec".to_string())]), + template: Some(DriverSandboxTemplate { + image: "ghcr.io/nvidia/openshell/sandbox:dev".to_string(), + agent_socket_path: String::new(), + labels: HashMap::new(), + environment: HashMap::from([( + "TEMPLATE_ENV".to_string(), + "template".to_string(), + )]), + resources: None, + platform_config: None, + }), + gpu: false, + }), + status: None, + } + } + + fn runtime_config() -> DockerDriverRuntimeConfig { + DockerDriverRuntimeConfig { + default_image: "image:latest".to_string(), + image_pull_policy: String::new(), + grpc_endpoint: "https://localhost:8443".to_string(), + ssh_socket_path: "/run/openshell/ssh.sock".to_string(), + ssh_handshake_secret: "secret".to_string(), + ssh_handshake_skew_secs: 300, + log_level: "info".to_string(), + supervisor_bin: PathBuf::from("/tmp/openshell-sandbox"), + guest_tls: Some(DockerGuestTlsPaths { + ca: PathBuf::from("/tmp/ca.crt"), + cert: PathBuf::from("/tmp/tls.crt"), + key: PathBuf::from("/tmp/tls.key"), + }), + daemon_version: "28.0.0".to_string(), + } + } + + #[test] + fn container_visible_endpoint_rewrites_loopback_hosts() { + assert_eq!( + container_visible_openshell_endpoint("https://localhost:8443"), + "https://host.openshell.internal:8443/" + ); + assert_eq!( + container_visible_openshell_endpoint("http://127.0.0.1:8080"), + "http://host.openshell.internal:8080/" + ); + assert_eq!( + container_visible_openshell_endpoint("https://gateway.internal:8443"), + "https://gateway.internal:8443" + ); + } + + #[test] + fn parse_cpu_limit_supports_cores_and_millicores() { + assert_eq!(parse_cpu_limit("250m").unwrap(), Some(250_000_000)); + assert_eq!(parse_cpu_limit("2").unwrap(), Some(2_000_000_000)); + assert!(parse_cpu_limit("0").is_err()); + } + + #[test] + fn parse_memory_limit_supports_binary_quantities() { + assert_eq!(parse_memory_limit("512Mi").unwrap(), Some(536_870_912)); + assert_eq!(parse_memory_limit("1G").unwrap(), Some(1_000_000_000)); + assert!(parse_memory_limit("12XB").is_err()); + } + + #[test] + fn docker_resource_limits_rejects_requests() { + let template = DriverSandboxTemplate { + image: "img".to_string(), + agent_socket_path: String::new(), + labels: HashMap::new(), + environment: HashMap::new(), + resources: Some(DriverResourceRequirements { + cpu_request: "250m".to_string(), + cpu_limit: String::new(), + memory_request: String::new(), + memory_limit: String::new(), + }), + platform_config: None, + }; + + let err = docker_resource_limits(&template).unwrap_err(); + assert_eq!(err.code(), tonic::Code::FailedPrecondition); + assert!(err.message().contains("resources.requests.cpu")); + } + + #[test] + fn build_environment_sets_docker_tls_paths() { + let env = build_environment(&test_sandbox(), &runtime_config()); + assert!(env.contains(&format!("OPENSHELL_TLS_CA={TLS_CA_MOUNT_PATH}"))); + assert!(env.contains(&format!("OPENSHELL_TLS_CERT={TLS_CERT_MOUNT_PATH}"))); + assert!(env.contains(&format!("OPENSHELL_TLS_KEY={TLS_KEY_MOUNT_PATH}"))); + assert!(env.contains(&"TEMPLATE_ENV=template".to_string())); + assert!(env.contains(&"SPEC_ENV=spec".to_string())); + assert!(env.contains(&"OPENSHELL_SANDBOX_COMMAND=sleep infinity".to_string())); + } + + #[test] + fn build_mounts_uses_docker_tls_directory() { + let mounts = build_mounts(&runtime_config()); + let targets = mounts + .iter() + .filter_map(|mount| mount.target.clone()) + .collect::>(); + assert!(targets.contains(&SUPERVISOR_MOUNT_PATH.to_string())); + assert!(targets.contains(&TLS_CA_MOUNT_PATH.to_string())); + assert!(targets.contains(&TLS_CERT_MOUNT_PATH.to_string())); + assert!(targets.contains(&TLS_KEY_MOUNT_PATH.to_string())); + assert!( + targets + .iter() + .all(|target| target.starts_with(TLS_MOUNT_DIR) || target == SUPERVISOR_MOUNT_PATH) + ); + } + + #[test] + fn driver_status_keeps_running_sandboxes_provisioning_with_stable_message() { + let running = ContainerSummary { + id: Some("cid".to_string()), + names: Some(vec!["/openshell-demo".to_string()]), + labels: Some(HashMap::from([ + (SANDBOX_ID_LABEL_KEY.to_string(), "sbx-1".to_string()), + (SANDBOX_NAME_LABEL_KEY.to_string(), "demo".to_string()), + ( + SANDBOX_NAMESPACE_LABEL_KEY.to_string(), + "default".to_string(), + ), + ])), + state: Some(ContainerSummaryStateEnum::RUNNING), + status: Some("Up 2 seconds".to_string()), + ..Default::default() + }; + let exited = ContainerSummary { + state: Some(ContainerSummaryStateEnum::EXITED), + status: Some("Exited (1) 3 seconds ago".to_string()), + ..running.clone() + }; + let running_later = ContainerSummary { + status: Some("Up 4 seconds".to_string()), + ..running.clone() + }; + + let running_status = driver_status_from_summary(&running, "demo", false); + let running_later_status = driver_status_from_summary(&running_later, "demo", false); + assert_eq!(running_status.conditions[0].status, "False"); + assert_eq!(running_status.conditions[0].reason, "DependenciesNotReady"); + assert_eq!( + running_status.conditions[0].message, + "Container is running; waiting for supervisor relay" + ); + assert_eq!(running_status.conditions, running_later_status.conditions); + + let exited_status = driver_status_from_summary(&exited, "demo", false); + assert_eq!(exited_status.conditions[0].status, "False"); + assert_eq!(exited_status.conditions[0].reason, "ContainerExited"); + assert_eq!(exited_status.conditions[0].message, "Container exited"); + + // With a live supervisor session, a RUNNING container flips Ready=True + // so ExecSandbox and other "sandbox must be ready" gates can proceed. + let running_connected = driver_status_from_summary(&running, "demo", true); + assert_eq!(running_connected.conditions[0].status, "True"); + assert_eq!( + running_connected.conditions[0].reason, + "SupervisorConnected" + ); + + // Supervisor readiness is ignored for non-RUNNING states — an exited + // container must not report Ready=True. + let exited_connected = driver_status_from_summary(&exited, "demo", true); + assert_eq!(exited_connected.conditions[0].status, "False"); + } + + #[test] + fn validate_linux_elf_binary_rejects_non_elf_files() { + let tempdir = TempDir::new().unwrap(); + let path = tempdir.path().join("openshell-sandbox"); + fs::write(&path, b"not-elf").unwrap(); + + let err = validate_linux_elf_binary(&path).unwrap_err(); + assert!(err.to_string().contains("Linux ELF executable")); + } + + #[test] + fn docker_guest_tls_paths_require_all_files_for_https() { + let config = Config::new(None).with_grpc_endpoint("https://localhost:8443"); + let tempdir = TempDir::new().unwrap(); + let ca = tempdir.path().join("ca.crt"); + fs::write(&ca, b"ca").unwrap(); + + let err = docker_guest_tls_paths( + &config, + &DockerComputeConfig { + guest_tls_ca: Some(ca), + ..Default::default() + }, + ) + .unwrap_err(); + assert!(err.to_string().contains("--docker-tls-cert")); + } + + #[test] + fn linux_supervisor_candidates_follow_daemon_arch() { + assert_eq!( + linux_supervisor_candidates("amd64"), + vec![PathBuf::from( + "target/x86_64-unknown-linux-gnu/release/openshell-sandbox", + )] + ); + assert_eq!( + linux_supervisor_candidates("arm64"), + vec![PathBuf::from( + "target/aarch64-unknown-linux-gnu/release/openshell-sandbox", + )] + ); + } + + #[test] + fn container_name_preserves_id_suffix_for_long_names() { + // Names up to 253 chars are permitted by the gRPC layer. The id + // suffix is what makes the container name unique between sandboxes + // sharing a prefix, so it must always appear in the final name. + let long_name = "a".repeat(253); + let first = DriverSandbox { + id: "sbx-first-1234567890".to_string(), + name: long_name.clone(), + namespace: "default".to_string(), + spec: None, + status: None, + }; + let second = DriverSandbox { + id: "sbx-second-0987654321".to_string(), + ..first.clone() + }; + + let first_container = container_name_for_sandbox(&first); + let second_container = container_name_for_sandbox(&second); + + assert!( + first_container.len() <= MAX_CONTAINER_NAME_LEN, + "container name {} exceeded {MAX_CONTAINER_NAME_LEN} chars: {first_container}", + first_container.len(), + ); + assert!( + first_container.ends_with(&first.id), + "container name should end with sandbox id: {first_container}", + ); + assert_ne!( + first_container, second_container, + "container names must differ for sandboxes with distinct ids", + ); + } + + #[test] + fn container_name_empty_sandbox_name_uses_id_only() { + let sandbox = DriverSandbox { + id: "sbx-abc".to_string(), + name: String::new(), + namespace: "default".to_string(), + spec: None, + status: None, + }; + assert_eq!(container_name_for_sandbox(&sandbox), "openshell-sbx-abc",); + } + + #[test] + fn trim_container_name_tail_strips_separators() { + assert_eq!(trim_container_name_tail("foo-".to_string()), "foo"); + assert_eq!(trim_container_name_tail("foo-.".to_string()), "foo"); + assert_eq!(trim_container_name_tail("foo_-.".to_string()), "foo"); + assert_eq!(trim_container_name_tail("foo".to_string()), "foo"); + } + + #[test] + fn docker_guest_tls_paths_rejects_tls_flags_without_https() { + let config = Config::new(None).with_grpc_endpoint("http://localhost:8080"); + let tempdir = TempDir::new().unwrap(); + let ca = tempdir.path().join("ca.crt"); + fs::write(&ca, b"ca").unwrap(); + + let err = docker_guest_tls_paths( + &config, + &DockerComputeConfig { + guest_tls_ca: Some(ca), + ..Default::default() + }, + ) + .unwrap_err(); + assert!(err.to_string().contains("https://")); + } + + #[test] + fn docker_guest_tls_paths_allows_plain_http_without_tls_flags() { + let config = Config::new(None).with_grpc_endpoint("http://localhost:8080"); + let result = docker_guest_tls_paths( + &config, + &DockerComputeConfig { + ..Default::default() + }, + ) + .unwrap(); + assert!(result.is_none()); + } + + #[test] + fn default_docker_supervisor_image_uses_nvidia_ghcr_repo() { + let image = default_docker_supervisor_image(); + assert!( + image.starts_with("ghcr.io/nvidia/openshell/supervisor:"), + "unexpected default image reference: {image}", + ); + } + + #[test] + fn supervisor_cache_path_namespaces_by_digest_under_openshell_data_dir() { + let base = PathBuf::from("/var/cache/share"); + let path = supervisor_cache_path_with_base( + &base, + "sha256:abc123deadbeef0123456789cafe0123456789fe", + ); + + assert_eq!( + path, + PathBuf::from( + "/var/cache/share/openshell/docker-supervisor/sha256-abc123deadbeef0123456789cafe0123456789fe/openshell-sandbox", + ), + ); + } + + #[test] + fn supervisor_cache_path_isolates_different_digests() { + let base = PathBuf::from("/data"); + let left = supervisor_cache_path_with_base(&base, "sha256:aaaaaaaa"); + let right = supervisor_cache_path_with_base(&base, "sha256:bbbbbbbb"); + assert_ne!( + left.parent().unwrap(), + right.parent().unwrap(), + "digest-keyed directories must differ so rollouts are isolated", + ); + } + + #[test] + fn write_cache_binary_atomic_materializes_file_with_executable_mode() { + let tempdir = TempDir::new().unwrap(); + let target = tempdir.path().join("nested").join("openshell-sandbox"); + fs::create_dir_all(target.parent().unwrap()).unwrap(); + + write_cache_binary_atomic(&target, b"\x7fELFpayload").unwrap(); + + assert!(target.is_file()); + assert_eq!(fs::read(&target).unwrap(), b"\x7fELFpayload"); + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mode = fs::metadata(&target).unwrap().permissions().mode() & 0o777; + assert_eq!(mode, 0o755, "expected 0755, got {mode:04o}"); + } + } + + #[test] + fn write_cache_binary_atomic_overwrites_existing_file() { + let tempdir = TempDir::new().unwrap(); + let target = tempdir.path().join("openshell-sandbox"); + fs::write(&target, b"stale").unwrap(); + + write_cache_binary_atomic(&target, b"\x7fELFfresh").unwrap(); + assert_eq!(fs::read(&target).unwrap(), b"\x7fELFfresh"); + } + + #[test] + fn temp_extract_container_names_are_unique_per_call() { + let first = temp_extract_container_name(); + let second = temp_extract_container_name(); + assert_ne!(first, second); + assert!(first.starts_with("openshell-supervisor-extract-")); + } + + #[test] + fn extract_first_tar_entry_returns_payload_of_single_file_archive() { + // Build a tar archive with the same shape Docker returns from + // `/containers//archive` for a single file. + let payload = b"\x7fELFtest-binary-bytes"; + let mut tar_buf = Vec::new(); + { + let mut builder = tar::Builder::new(&mut tar_buf); + let mut header = tar::Header::new_gnu(); + header.set_path("openshell-sandbox").unwrap(); + header.set_size(payload.len() as u64); + header.set_mode(0o755); + header.set_cksum(); + builder.append(&header, payload.as_slice()).unwrap(); + builder.finish().unwrap(); + } + + let extracted = extract_first_tar_entry(&tar_buf).unwrap(); + assert_eq!(extracted, payload); + } + + #[test] + fn extract_first_tar_entry_rejects_empty_archive() { + let mut tar_buf = Vec::new(); + tar::Builder::new(&mut tar_buf).finish().unwrap(); + let err = extract_first_tar_entry(&tar_buf).unwrap_err(); + assert!(err.contains("empty"), "unexpected error message: {err}"); + } +} diff --git a/crates/openshell-server/src/compute/mod.rs b/crates/openshell-server/src/compute/mod.rs index 95ffbfaa4..2aa4e8eed 100644 --- a/crates/openshell-server/src/compute/mod.rs +++ b/crates/openshell-server/src/compute/mod.rs @@ -3,8 +3,10 @@ //! Gateway-owned compute orchestration over a pluggable compute backend. +pub mod docker; pub mod vm; +pub use docker::DockerComputeConfig; pub use vm::VmComputeConfig; use crate::grpc::policy::{SANDBOX_SETTINGS_OBJECT_TYPE, sandbox_settings_id}; @@ -224,6 +226,31 @@ impl ComputeRuntime { }) } + pub async fn new_docker( + config: openshell_core::Config, + docker_config: DockerComputeConfig, + store: Arc, + sandbox_index: SandboxIndex, + sandbox_watch_bus: SandboxWatchBus, + tracing_log_bus: TracingLogBus, + supervisor_readiness: Arc, + ) -> Result { + let driver = + docker::DockerComputeDriver::new(&config, &docker_config, supervisor_readiness) + .await + .map_err(|err| ComputeError::Message(err.to_string()))?; + let driver: SharedComputeDriver = Arc::new(driver); + Self::from_driver( + driver, + None, + store, + sandbox_index, + sandbox_watch_bus, + tracing_log_bus, + ) + .await + } + pub async fn new_kubernetes( config: KubernetesComputeConfig, store: Arc, @@ -866,14 +893,14 @@ fn build_platform_config(template: &SandboxTemplate) -> Option Option, +) -> Option { + use prost_types::{Struct, Value, value::Kind}; + + let resources = resources.as_ref()?; + let mut fields = std::collections::BTreeMap::new(); + + for (section_name, value) in &resources.fields { + if !matches!(section_name.as_str(), "limits" | "requests") { + fields.insert(section_name.clone(), value.clone()); + continue; + } + + let Some(Kind::StructValue(section)) = value.kind.as_ref() else { + fields.insert(section_name.clone(), value.clone()); + continue; + }; + + let section_fields = section + .fields + .iter() + .filter_map(|(resource_name, resource_value)| { + let is_typed_quantity = matches!(resource_name.as_str(), "cpu" | "memory") + && matches!(resource_value.kind.as_ref(), Some(Kind::StringValue(_))); + if is_typed_quantity { + None + } else { + Some((resource_name.clone(), resource_value.clone())) + } + }) + .collect::>(); + + if !section_fields.is_empty() { + fields.insert( + section_name.clone(), + Value { + kind: Some(Kind::StructValue(Struct { + fields: section_fields, + })), + }, + ); + } + } + + if fields.is_empty() { + None + } else { + Some(Struct { fields }) + } +} + fn driver_status_from_public(status: &SandboxStatus, phase: i32) -> DriverSandboxStatus { DriverSandboxStatus { sandbox_name: status.sandbox_name.clone(), @@ -1045,6 +1124,31 @@ mod tests { }; use std::sync::Arc; + fn string_value(value: &str) -> prost_types::Value { + prost_types::Value { + kind: Some(prost_types::value::Kind::StringValue(value.to_string())), + } + } + + fn number_value(value: f64) -> prost_types::Value { + prost_types::Value { + kind: Some(prost_types::value::Kind::NumberValue(value)), + } + } + + fn struct_value( + fields: impl IntoIterator, prost_types::Value)>, + ) -> prost_types::Value { + prost_types::Value { + kind: Some(prost_types::value::Kind::StructValue(prost_types::Struct { + fields: fields + .into_iter() + .map(|(key, value)| (key.into(), value)) + .collect(), + })), + } + } + #[derive(Debug, Default)] struct TestDriver { listed_sandboxes: Vec, @@ -1303,6 +1407,123 @@ mod tests { assert_eq!(derive_phase(Some(&status)), SandboxPhase::Ready); } + #[test] + fn build_platform_config_omits_typed_cpu_and_memory_resources() { + let template = SandboxTemplate { + resources: Some(prost_types::Struct { + fields: [ + ( + "limits", + struct_value([("cpu", string_value("2")), ("memory", string_value("1Gi"))]), + ), + ( + "requests", + struct_value([ + ("cpu", string_value("500m")), + ("memory", string_value("512Mi")), + ]), + ), + ] + .into_iter() + .map(|(key, value)| (key.to_string(), value)) + .collect(), + }), + ..Default::default() + }; + + assert!(build_platform_config(&template).is_none()); + } + + #[test] + fn build_platform_config_preserves_non_typed_resource_fields() { + let template = SandboxTemplate { + resources: Some(prost_types::Struct { + fields: [ + ( + "limits", + struct_value([ + ("cpu", string_value("2")), + ("memory", string_value("1Gi")), + ("nvidia.com/gpu", string_value("1")), + ]), + ), + ( + "requests", + struct_value([ + ("cpu", string_value("500m")), + ("memory", string_value("512Mi")), + ("hugepages-2Mi", string_value("4Mi")), + ]), + ), + ("opaque_cpu", number_value(2.0)), + ] + .into_iter() + .map(|(key, value)| (key.to_string(), value)) + .collect(), + }), + ..Default::default() + }; + + let platform_config = build_platform_config(&template).unwrap(); + let resources_raw = platform_config + .fields + .get("resources_raw") + .and_then(|value| value.kind.as_ref()) + .and_then(|kind| match kind { + prost_types::value::Kind::StructValue(inner) => Some(inner), + _ => None, + }) + .unwrap(); + + let limits = resources_raw + .fields + .get("limits") + .and_then(|value| value.kind.as_ref()) + .and_then(|kind| match kind { + prost_types::value::Kind::StructValue(inner) => Some(inner), + _ => None, + }) + .unwrap(); + assert!(!limits.fields.contains_key("cpu")); + assert!(!limits.fields.contains_key("memory")); + assert_eq!( + limits + .fields + .get("nvidia.com/gpu") + .and_then(|value| value.kind.as_ref()) + .and_then(|kind| match kind { + prost_types::value::Kind::StringValue(value) => Some(value.as_str()), + _ => None, + }), + Some("1") + ); + + let requests = resources_raw + .fields + .get("requests") + .and_then(|value| value.kind.as_ref()) + .and_then(|kind| match kind { + prost_types::value::Kind::StructValue(inner) => Some(inner), + _ => None, + }) + .unwrap(); + assert!(!requests.fields.contains_key("cpu")); + assert!(!requests.fields.contains_key("memory")); + assert_eq!( + requests + .fields + .get("hugepages-2Mi") + .and_then(|value| value.kind.as_ref()) + .and_then(|kind| match kind { + prost_types::value::Kind::StringValue(value) => Some(value.as_str()), + _ => None, + }), + Some("4Mi") + ); + + assert!(resources_raw.fields.contains_key("opaque_cpu")); + } + #[test] fn rewrite_user_facing_conditions_rewrites_gpu_unschedulable_message() { let mut status = Some(SandboxStatus { diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index c2ff8322e..9e26d11a1 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -43,7 +43,7 @@ use std::time::Duration; use tokio::net::TcpListener; use tracing::{debug, error, info}; -use compute::{ComputeRuntime, VmComputeConfig}; +use compute::{ComputeRuntime, DockerComputeConfig, VmComputeConfig}; pub use grpc::OpenShellService; pub use http::{health_router, http_router}; pub use multiplex::{MultiplexService, MultiplexedService}; @@ -88,7 +88,11 @@ pub struct ServerState { pub settings_mutex: tokio::sync::Mutex<()>, /// Registry of active supervisor sessions and pending relay channels. - pub supervisor_sessions: supervisor_session::SupervisorSessionRegistry, + /// + /// Stored as `Arc` so compute drivers (e.g. the bundled Docker + /// driver) can be constructed before `ServerState` and still + /// query session state to surface supervisor readiness. + pub supervisor_sessions: Arc, } fn is_benign_tls_handshake_failure(error: &std::io::Error) -> bool { @@ -108,6 +112,7 @@ impl ServerState { sandbox_index: SandboxIndex, sandbox_watch_bus: SandboxWatchBus, tracing_log_bus: TracingLogBus, + supervisor_sessions: Arc, ) -> Self { Self { config, @@ -119,7 +124,7 @@ impl ServerState { ssh_connections_by_token: Mutex::new(HashMap::new()), ssh_connections_by_sandbox: Mutex::new(HashMap::new()), settings_mutex: tokio::sync::Mutex::new(()), - supervisor_sessions: supervisor_session::SupervisorSessionRegistry::new(), + supervisor_sessions, } } } @@ -134,6 +139,7 @@ impl ServerState { pub async fn run_server( config: Config, vm_config: VmComputeConfig, + docker_config: DockerComputeConfig, tracing_log_bus: TracingLogBus, ) -> Result<()> { let database_url = config.database_url.trim(); @@ -150,13 +156,16 @@ pub async fn run_server( let sandbox_index = SandboxIndex::new(); let sandbox_watch_bus = SandboxWatchBus::new(); + let supervisor_sessions = Arc::new(supervisor_session::SupervisorSessionRegistry::new()); let compute = build_compute_runtime( &config, &vm_config, + &docker_config, store.clone(), sandbox_index.clone(), sandbox_watch_bus.clone(), tracing_log_bus.clone(), + supervisor_sessions.clone(), ) .await?; let state = Arc::new(ServerState::new( @@ -166,6 +175,7 @@ pub async fn run_server( sandbox_index, sandbox_watch_bus, tracing_log_bus, + supervisor_sessions, )); state.compute.spawn_watchers(); @@ -254,10 +264,12 @@ pub async fn run_server( async fn build_compute_runtime( config: &Config, vm_config: &VmComputeConfig, + docker_config: &DockerComputeConfig, store: Arc, sandbox_index: SandboxIndex, sandbox_watch_bus: SandboxWatchBus, tracing_log_bus: TracingLogBus, + supervisor_sessions: Arc, ) -> Result { let driver = configured_compute_driver(config)?; info!(driver = %driver, "Using compute driver"); @@ -288,6 +300,17 @@ async fn build_compute_runtime( ) .await .map_err(|e| Error::execution(format!("failed to create compute runtime: {e}"))), + ComputeDriverKind::Docker => ComputeRuntime::new_docker( + config.clone(), + docker_config.clone(), + store, + sandbox_index, + sandbox_watch_bus, + tracing_log_bus, + supervisor_sessions, + ) + .await + .map_err(|e| Error::execution(format!("failed to create compute runtime: {e}"))), ComputeDriverKind::Vm => { let (channel, driver_process) = compute::vm::spawn(config, vm_config).await?; ComputeRuntime::new_remote_vm( @@ -312,7 +335,9 @@ fn configured_compute_driver(config: &Config) -> Result { [] => Err(Error::config( "at least one compute driver must be configured", )), - [driver @ ComputeDriverKind::Kubernetes] | [driver @ ComputeDriverKind::Vm] => Ok(*driver), + [driver @ ComputeDriverKind::Kubernetes] + | [driver @ ComputeDriverKind::Vm] + | [driver @ ComputeDriverKind::Docker] => Ok(*driver), [ComputeDriverKind::Podman] => Err(Error::config( "compute driver 'podman' is not implemented yet", )), @@ -398,4 +423,13 @@ mod tests { ComputeDriverKind::Vm ); } + + #[test] + fn configured_compute_driver_accepts_docker() { + let config = Config::new(None).with_compute_drivers([ComputeDriverKind::Docker]); + assert_eq!( + configured_compute_driver(&config).unwrap(), + ComputeDriverKind::Docker + ); + } } diff --git a/crates/openshell-server/src/supervisor_session.rs b/crates/openshell-server/src/supervisor_session.rs index f81ee9e3c..f11974b2e 100644 --- a/crates/openshell-server/src/supervisor_session.rs +++ b/crates/openshell-server/src/supervisor_session.rs @@ -61,6 +61,12 @@ struct LiveSession { /// Holds a oneshot sender that will deliver the upgraded relay stream. type RelayStreamSender = oneshot::Sender; +impl crate::compute::docker::SupervisorReadiness for SupervisorSessionRegistry { + fn is_supervisor_connected(&self, sandbox_id: &str) -> bool { + Self::is_connected(self, sandbox_id) + } +} + /// Registry of active supervisor sessions and pending relay channels. #[derive(Default)] pub struct SupervisorSessionRegistry { @@ -126,6 +132,14 @@ impl SupervisorSessionRegistry { } } + /// Report whether a live supervisor session is registered for a sandbox. + /// + /// Used by compute drivers that need to surface "supervisor relay ready" + /// through the Ready condition without polling the sandbox runtime. + pub fn is_connected(&self, sandbox_id: &str) -> bool { + self.sessions.lock().unwrap().contains_key(sandbox_id) + } + /// Remove the session for a sandbox. fn remove(&self, sandbox_id: &str) { self.sessions.lock().unwrap().remove(sandbox_id); diff --git a/docs/about/architecture.mdx b/docs/about/architecture.mdx index a88e6f490..6b04d2f00 100644 --- a/docs/about/architecture.mdx +++ b/docs/about/architecture.mdx @@ -22,6 +22,18 @@ The following table describes each component and its role in the system: | **Sandbox** | Isolated runtime that includes container supervision and policy-enforced egress routing. | | **Policy Engine** | Policy definition and enforcement layer for filesystem, network, process, and inference constraints. Defense in depth enforces policies from the application layer down to infrastructure and kernel layers. | +## Compute Backends + +The gateway talks to sandbox runtimes through a compute driver. OpenShell currently supports three backends: + +| Backend | Where it runs | Notes | +|---|---|---| +| **Kubernetes** | In-process in the gateway | Creates sandbox CRDs and lets the sandbox supervisor call back over the gateway's gRPC control plane. | +| **Docker** | In-process in the gateway | Uses the local Docker daemon through Bollard, keeps one container per sandbox alive until delete, and does not publish sandbox ports. | +| **VM** | Separate helper process | Spawns `openshell-driver-vm`, which boots libkrun guests and keeps the VM runtime outside the gateway binary. | + +Regardless of backend, sandbox access is supervisor-initiated. The gateway does not dial sandbox IPs or expose sandbox SSH ports. Each supervisor opens `ConnectSupervisor` back to the gateway, and SSH or exec traffic rides that existing mTLS HTTP/2 connection as relay streams. + ## How a Request Flows Every outbound connection from agent code passes through the same decision path: diff --git a/docs/reference/support-matrix.mdx b/docs/reference/support-matrix.mdx index c5eeee567..ac863b8e3 100644 --- a/docs/reference/support-matrix.mdx +++ b/docs/reference/support-matrix.mdx @@ -39,6 +39,18 @@ The following software must be installed on the host before using the OpenShell | ------------------------------- | --------------- | ----------------------------------------------- | | Docker Desktop or Docker Engine | 28.04 | Must be running before any `openshell` command. | +## Compute Driver Notes + +OpenShell gateway deployments always run inside Docker, but sandbox provisioning can target different compute backends. + +| Backend | Host requirement | Notes | +| --- | --- | --- | +| Kubernetes | Docker plus the bundled single-node cluster | Default backend used by `openshell gateway start`. | +| Docker | Local Docker-compatible daemon reachable from the gateway process | Sandboxes stay as long-lived containers. No sandbox ports are published; access still uses the supervisor relay. | +| VM | Host support for the packaged libkrun runtime | The gateway spawns a separate `openshell-driver-vm` helper. | + +For Docker-backed sandboxes on macOS, the gateway must side-load a Linux `openshell-sandbox` binary into each container. Provide that binary with `--docker-supervisor-bin` or build one locally so the gateway can auto-discover it at `target/aarch64-unknown-linux-gnu/release/openshell-sandbox`. + ## Sandbox Runtime Versions Sandbox container images are maintained in the [openshell-community](https://github.com/nvidia/openshell-community) repository. Refer to that repository for the current list of installed components and their versions. diff --git a/e2e/rust/e2e-docker.sh b/e2e/rust/e2e-docker.sh new file mode 100755 index 000000000..cbff0e178 --- /dev/null +++ b/e2e/rust/e2e-docker.sh @@ -0,0 +1,276 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Run the Rust e2e smoke test against a standalone gateway running the +# bundled Docker compute driver. +# +# Unlike the Kubernetes driver (which deploys a k3s cluster) or the VM +# driver (which boots libkrun), the Docker driver runs in-process inside +# the gateway binary and uses the local Docker daemon to run sandbox +# containers. This script: +# +# 1. Builds openshell-gateway, openshell-cli, and a Linux ELF +# openshell-sandbox binary (cross-compiled so it can run inside +# Docker containers on macOS hosts). +# 2. Ensures the supervisor image (openshell/supervisor:dev) exists +# locally — the sandbox containers launch from it, with the +# cross-compiled openshell-sandbox binary bind-mounted over the +# image-provided copy. +# 3. Generates an ephemeral mTLS PKI (CA, server cert, client cert). +# 4. Starts openshell-gateway with --drivers=docker, binding to a +# random free host port. +# 5. Installs the client cert into the CLI gateway config dir and +# runs the Rust smoke test. +# 6. Tears the gateway process down on exit. +# +# Usage: mise run e2e:docker + +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +WORKDIR="$(mktemp -d "/tmp/openshell-e2e-docker.XXXXXX")" +GATEWAY_BIN="${ROOT}/target/debug/openshell-gateway" +CLI_BIN="${ROOT}/target/debug/openshell" +STATE_DIR="" +GATEWAY_CONFIG_DIR="" +GATEWAY_PID="" +GATEWAY_LOG="${WORKDIR}/gateway.log" + +cleanup() { + local exit_code=$? + if [ -n "${GATEWAY_PID}" ] && kill -0 "${GATEWAY_PID}" 2>/dev/null; then + echo "Stopping openshell-gateway (pid ${GATEWAY_PID})..." + kill "${GATEWAY_PID}" 2>/dev/null || true + wait "${GATEWAY_PID}" 2>/dev/null || true + fi + + # On failure, preserve sandbox container logs for post-mortem + # debugging before removing the containers. + if [ "${exit_code}" -ne 0 ] && command -v docker >/dev/null 2>&1; then + local ids + ids=$(docker ps -aq --filter "label=openshell.ai/managed-by=openshell" 2>/dev/null || true) + if [ -n "${ids}" ]; then + echo "=== sandbox container logs (preserved for debugging) ===" + for id in ${ids}; do + echo "--- container ${id} (inspect) ---" + docker inspect --format '{{.Name}} state={{.State.Status}} exit={{.State.ExitCode}} restarts={{.RestartCount}} error={{.State.Error}}' "${id}" 2>/dev/null || true + echo "--- container ${id} (last 80 log lines) ---" + docker logs --tail 80 "${id}" 2>&1 || true + done + echo "=== end sandbox container logs ===" + fi + fi + + # Remove any lingering sandbox containers the gateway failed to clean + # up. The driver labels its containers with openshell.ai/managed-by. + if command -v docker >/dev/null 2>&1; then + local stale + stale=$(docker ps -aq --filter "label=openshell.ai/managed-by=openshell" 2>/dev/null || true) + if [ -n "${stale}" ]; then + # shellcheck disable=SC2086 + docker rm -f ${stale} >/dev/null 2>&1 || true + fi + fi + + if [ "${exit_code}" -ne 0 ] && [ -f "${GATEWAY_LOG}" ]; then + echo "=== gateway log (preserved for debugging) ===" + cat "${GATEWAY_LOG}" + echo "=== end gateway log ===" + fi + + # Remove gateway CLI config we created so repeated runs don't + # accumulate stale gateway entries. + if [ -n "${GATEWAY_CONFIG_DIR}" ] && [ -d "${GATEWAY_CONFIG_DIR}" ]; then + rm -rf "${GATEWAY_CONFIG_DIR}" + fi + + rm -rf "${WORKDIR}" 2>/dev/null || true +} +trap cleanup EXIT + +# ── Preflight ──────────────────────────────────────────────────────── +if ! command -v docker >/dev/null 2>&1; then + echo "ERROR: docker CLI is required to run e2e:docker" >&2 + exit 2 +fi +if ! docker info >/dev/null 2>&1; then + echo "ERROR: docker daemon is not reachable (docker info failed)" >&2 + exit 2 +fi +if ! command -v openssl >/dev/null 2>&1; then + echo "ERROR: openssl is required to generate ephemeral PKI" >&2 + exit 2 +fi + +# Detect Linux arch of the Docker daemon so we build the matching +# openshell-sandbox binary. +DAEMON_ARCH=$(docker info --format '{{.Architecture}}' 2>/dev/null || true) +case "${DAEMON_ARCH}" in + aarch64|arm64) SUPERVISOR_TARGET="aarch64-unknown-linux-gnu" ;; + x86_64|amd64) SUPERVISOR_TARGET="x86_64-unknown-linux-gnu" ;; + *) + echo "ERROR: unsupported Docker daemon architecture '${DAEMON_ARCH}'" >&2 + exit 2 + ;; +esac +SUPERVISOR_BIN="${ROOT}/target/${SUPERVISOR_TARGET}/release/openshell-sandbox" + +# ── Build binaries ─────────────────────────────────────────────────── +# Cap build parallelism to avoid OOM when run alongside a docker build or +# on memory-constrained developer machines. Override with CARGO_BUILD_JOBS. +CARGO_BUILD_JOBS_ARG=() +if [ -n "${CARGO_BUILD_JOBS:-}" ]; then + CARGO_BUILD_JOBS_ARG=(-j "${CARGO_BUILD_JOBS}") +fi + +echo "Building openshell-gateway and openshell-cli..." +cargo build ${CARGO_BUILD_JOBS_ARG[@]+"${CARGO_BUILD_JOBS_ARG[@]}"} \ + -p openshell-server --bin openshell-gateway +cargo build ${CARGO_BUILD_JOBS_ARG[@]+"${CARGO_BUILD_JOBS_ARG[@]}"} \ + -p openshell-cli --features openshell-core/dev-settings + +echo "Cross-compiling openshell-sandbox for ${SUPERVISOR_TARGET}..." +if ! command -v cargo-zigbuild >/dev/null 2>&1; then + cargo install --locked cargo-zigbuild +fi +rustup target add "${SUPERVISOR_TARGET}" >/dev/null 2>&1 || true +cargo zigbuild ${CARGO_BUILD_JOBS_ARG[@]+"${CARGO_BUILD_JOBS_ARG[@]}"} \ + --release -p openshell-sandbox --target "${SUPERVISOR_TARGET}" + +if [ ! -f "${SUPERVISOR_BIN}" ]; then + echo "ERROR: expected supervisor binary at ${SUPERVISOR_BIN}" >&2 + exit 1 +fi + +# ── Ensure a sandbox base image is available locally ──────────────── +# The bundled openshell-sandbox binary enforces a 'sandbox' user/group +# in the image. Use the community sandbox base image (also what real +# deployments default to). Callers can override with +# OPENSHELL_E2E_DOCKER_SANDBOX_IMAGE if they have a smaller local image +# with the required 'sandbox' user. +SANDBOX_IMAGE="${OPENSHELL_E2E_DOCKER_SANDBOX_IMAGE:-ghcr.io/nvidia/openshell-community/sandboxes/base:latest}" +if ! docker image inspect "${SANDBOX_IMAGE}" >/dev/null 2>&1; then + echo "Pulling ${SANDBOX_IMAGE}..." + docker pull "${SANDBOX_IMAGE}" +fi + +# ── Generate ephemeral mTLS PKI ────────────────────────────────────── +PKI_DIR="${WORKDIR}/pki" +mkdir -p "${PKI_DIR}" +cd "${PKI_DIR}" + +cat > openssl.cnf <<'EOF' +[req] +distinguished_name = dn +prompt = no +[dn] +CN = openshell-server +[san_server] +subjectAltName = @alt_server +[alt_server] +DNS.1 = localhost +DNS.2 = host.openshell.internal +DNS.3 = host.docker.internal +IP.1 = 127.0.0.1 +IP.2 = ::1 +[san_client] +subjectAltName = DNS:openshell-client +EOF + +openssl req -x509 -newkey rsa:2048 -nodes -days 30 \ + -keyout ca.key -out ca.crt -subj "/CN=openshell-e2e-ca" >/dev/null 2>&1 + +openssl req -newkey rsa:2048 -nodes -keyout server.key -out server.csr \ + -config openssl.cnf >/dev/null 2>&1 +openssl x509 -req -in server.csr -CA ca.crt -CAkey ca.key -CAcreateserial \ + -out server.crt -days 30 -extfile openssl.cnf -extensions san_server >/dev/null 2>&1 + +openssl req -newkey rsa:2048 -nodes -keyout client.key -out client.csr \ + -subj "/CN=openshell-client" >/dev/null 2>&1 +openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial \ + -out client.crt -days 30 -extfile openssl.cnf -extensions san_client >/dev/null 2>&1 + +cd "${ROOT}" + +# ── Pick free ports ────────────────────────────────────────────────── +pick_port() { + python3 -c 'import socket; s=socket.socket(); s.bind(("",0)); print(s.getsockname()[1]); s.close()' +} +HOST_PORT=$(pick_port) +HEALTH_PORT=$(pick_port) +while [ "${HEALTH_PORT}" = "${HOST_PORT}" ]; do + HEALTH_PORT=$(pick_port) +done + +STATE_DIR="${WORKDIR}/state" +mkdir -p "${STATE_DIR}" + +SSH_HANDSHAKE_SECRET=$(openssl rand -hex 32) + +# Containers started by the docker driver reach the host gateway via +# host.openshell.internal (mapped to host-gateway by the driver). The +# gateway itself binds to 0.0.0.0:${HOST_PORT}. +GATEWAY_ENDPOINT="https://host.openshell.internal:${HOST_PORT}" + +echo "Starting openshell-gateway on port ${HOST_PORT} (health :${HEALTH_PORT})..." +# shellcheck disable=SC2086 +"${GATEWAY_BIN}" \ + --port "${HOST_PORT}" \ + --health-port "${HEALTH_PORT}" \ + --drivers docker \ + --tls-cert "${PKI_DIR}/server.crt" \ + --tls-key "${PKI_DIR}/server.key" \ + --tls-client-ca "${PKI_DIR}/ca.crt" \ + --db-url "sqlite:${STATE_DIR}/gateway.db?mode=rwc" \ + --grpc-endpoint "${GATEWAY_ENDPOINT}" \ + --docker-supervisor-bin "${SUPERVISOR_BIN}" \ + --docker-tls-ca "${PKI_DIR}/ca.crt" \ + --docker-tls-cert "${PKI_DIR}/client.crt" \ + --docker-tls-key "${PKI_DIR}/client.key" \ + --sandbox-image "${SANDBOX_IMAGE}" \ + --sandbox-image-pull-policy IfNotPresent \ + --ssh-handshake-secret "${SSH_HANDSHAKE_SECRET}" \ + --ssh-gateway-host 127.0.0.1 \ + --ssh-gateway-port "${HOST_PORT}" \ + >"${GATEWAY_LOG}" 2>&1 & +GATEWAY_PID=$! + +# ── Install mTLS material for the CLI ──────────────────────────────── +GATEWAY_NAME="openshell-e2e-docker-${HOST_PORT}" +GATEWAY_CONFIG_DIR="${HOME}/.config/openshell/gateways/${GATEWAY_NAME}" +mkdir -p "${GATEWAY_CONFIG_DIR}/mtls" +cp "${PKI_DIR}/ca.crt" "${GATEWAY_CONFIG_DIR}/mtls/ca.crt" +cp "${PKI_DIR}/client.crt" "${GATEWAY_CONFIG_DIR}/mtls/tls.crt" +cp "${PKI_DIR}/client.key" "${GATEWAY_CONFIG_DIR}/mtls/tls.key" + +export OPENSHELL_GATEWAY="${GATEWAY_NAME}" +export OPENSHELL_GATEWAY_ENDPOINT="https://127.0.0.1:${HOST_PORT}" +export OPENSHELL_PROVISION_TIMEOUT=180 + +# ── Wait for gateway readiness ─────────────────────────────────────── +echo "Waiting for gateway to become healthy..." +elapsed=0 +timeout=120 +while [ "${elapsed}" -lt "${timeout}" ]; do + if ! kill -0 "${GATEWAY_PID}" 2>/dev/null; then + echo "ERROR: openshell-gateway exited before becoming healthy" + exit 1 + fi + if "${CLI_BIN}" status >/dev/null 2>&1; then + echo "Gateway healthy after ${elapsed}s." + break + fi + sleep 2 + elapsed=$((elapsed + 2)) +done +if [ "${elapsed}" -ge "${timeout}" ]; then + echo "ERROR: gateway did not become healthy within ${timeout}s" + exit 1 +fi + +# ── Run the smoke test ─────────────────────────────────────────────── +echo "Running e2e smoke test (gateway: ${OPENSHELL_GATEWAY}, endpoint: ${OPENSHELL_GATEWAY_ENDPOINT})..." +cargo test --manifest-path e2e/rust/Cargo.toml --features e2e --test smoke -- --nocapture + +echo "Smoke test passed." diff --git a/tasks/test.toml b/tasks/test.toml index f24ea6f2b..ce6e18461 100644 --- a/tasks/test.toml +++ b/tasks/test.toml @@ -52,3 +52,7 @@ run = "uv run pytest -o python_files='test_*.py' -m gpu -n ${E2E_PARALLEL:-1} e2 description = "Boot openshell-vm and run smoke e2e (macOS ARM64; pass -- --vm-port=N [--vm-name=NAME] to reuse)" depends = ["build:docker:gateway", "vm:build"] run = "e2e/rust/e2e-vm.sh" + +["e2e:docker"] +description = "Run smoke e2e against a standalone gateway with the Docker compute driver" +run = "e2e/rust/e2e-docker.sh"