diff --git a/Cargo.lock b/Cargo.lock
index d5de42fb3..ec9fc2a81 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3224,6 +3224,7 @@ version = "0.0.0"
dependencies = [
"anyhow",
"axum 0.8.8",
+ "bollard",
"bytes",
"clap",
"futures",
@@ -3257,6 +3258,7 @@ dependencies = [
"serde_json",
"sha2 0.10.9",
"sqlx",
+ "tar",
"tempfile",
"thiserror 2.0.18",
"tokio",
diff --git a/architecture/gateway.md b/architecture/gateway.md
index 5dd2419af..7aed7542e 100644
--- a/architecture/gateway.md
+++ b/architecture/gateway.md
@@ -26,7 +26,7 @@ graph TD
SUP_REG["SupervisorSessionRegistry"]
STORE["Store
(SQLite / Postgres)"]
COMPUTE["ComputeRuntime"]
- DRIVER["ComputeDriver
(kubernetes / vm)"]
+ DRIVER["ComputeDriver
(kubernetes / docker / vm)"]
WATCH_BUS["SandboxWatchBus"]
LOG_BUS["TracingLogBus"]
PLAT_BUS["PlatformEventBus"]
@@ -75,6 +75,7 @@ graph TD
| TLS | `crates/openshell-server/src/tls.rs` | `TlsAcceptor` wrapping rustls with ALPN |
| Persistence | `crates/openshell-server/src/persistence/mod.rs` | `Store` enum (SQLite/Postgres), generic object CRUD, protobuf codec |
| Compute runtime | `crates/openshell-server/src/compute/mod.rs` | `ComputeRuntime`, gateway-owned sandbox lifecycle orchestration over a compute backend |
+| Compute driver: Docker | `crates/openshell-server/src/compute/docker.rs` | In-process Docker create/delete/watch, supervisor side-load, local daemon integration |
| Compute driver: Kubernetes | `crates/openshell-driver-kubernetes/src/driver.rs` | Kubernetes CRD create/delete/watch, pod template translation |
| Compute driver: VM | `crates/openshell-driver-vm/src/driver.rs` | Per-sandbox microVM create/delete/watch, supervisor-only guest boot |
| Sandbox index | `crates/openshell-server/src/sandbox_index.rs` | `SandboxIndex` -- in-memory name/pod-to-id correlation |
@@ -103,6 +104,7 @@ The gateway boots in `cli::run_cli` (`crates/openshell-server/src/cli.rs`) and p
1. Connect to the persistence store (`Store::connect`), which auto-detects SQLite vs Postgres from the URL prefix and runs migrations.
2. Create `ComputeRuntime` with a `ComputeDriver` implementation selected by `OPENSHELL_DRIVERS`:
- `kubernetes` wraps `KubernetesComputeDriver` in `ComputeDriverService`, so the gateway uses the `openshell.compute.v1.ComputeDriver` RPC surface even without transport.
+ - `docker` constructs `DockerComputeDriver` in-process, talks directly to the local Docker daemon through Bollard, and keeps Docker-only configuration (supervisor/TLS bind mounts) local to `openshell-server`.
- `vm` spawns the standalone `openshell-driver-vm` binary as a local compute-driver process, resolves it from `--driver-dir`, conventional libexec install paths, or a sibling of the gateway binary, connects to it over a Unix domain socket, and keeps the libkrun/rootfs runtime out of the gateway binary.
3. Build `ServerState` (shared via `Arc` across all handlers), including a fresh `SupervisorSessionRegistry`.
4. **Spawn background tasks**:
@@ -132,7 +134,12 @@ All configuration is via CLI flags with environment variable fallbacks. The `--d
| `--sandbox-namespace` | `OPENSHELL_SANDBOX_NAMESPACE` | `default` | Kubernetes namespace for sandbox CRDs |
| `--sandbox-image` | `OPENSHELL_SANDBOX_IMAGE` | None | Default container image for sandbox pods |
| `--grpc-endpoint` | `OPENSHELL_GRPC_ENDPOINT` | None | gRPC endpoint reachable from within the cluster (for supervisor callbacks) |
-| `--drivers` | `OPENSHELL_DRIVERS` | `kubernetes` | Compute backend to use. Current options are `kubernetes` and `vm`. |
+| `--drivers` | `OPENSHELL_DRIVERS` | `kubernetes` | Compute backend to use. Current options are `kubernetes`, `docker`, and `vm`. |
+| `--docker-supervisor-bin` | `OPENSHELL_DOCKER_SUPERVISOR_BIN` | Sibling `openshell-sandbox` → local cargo build → extracted from `--docker-supervisor-image` | Linux `openshell-sandbox` binary bind-mounted into Docker sandboxes as PID 1 |
+| `--docker-supervisor-image` | `OPENSHELL_DOCKER_SUPERVISOR_IMAGE` | `ghcr.io/nvidia/openshell/supervisor:` | Image the gateway pulls to extract the Linux supervisor binary when no explicit path or local build is available. The binary is cached under `$XDG_DATA_HOME/openshell/docker-supervisor//openshell-sandbox` and reused across restarts. |
+| `--docker-tls-ca` | `OPENSHELL_DOCKER_TLS_CA` | None | CA cert bind-mounted into Docker sandboxes at `/etc/openshell/tls/client/ca.crt` for gateway mTLS |
+| `--docker-tls-cert` | `OPENSHELL_DOCKER_TLS_CERT` | None | Client cert bind-mounted into Docker sandboxes at `/etc/openshell/tls/client/tls.crt` for gateway mTLS |
+| `--docker-tls-key` | `OPENSHELL_DOCKER_TLS_KEY` | None | Client private key bind-mounted into Docker sandboxes at `/etc/openshell/tls/client/tls.key` for gateway mTLS |
| `--vm-driver-state-dir` | `OPENSHELL_VM_DRIVER_STATE_DIR` | `target/openshell-vm-driver` | Host directory for VM sandbox rootfs, console logs, and runtime state |
| `--driver-dir` | `OPENSHELL_DRIVER_DIR` | unset | Override directory for `openshell-driver-vm`. When unset, the gateway searches `~/.local/libexec/openshell`, `/usr/local/libexec/openshell`, `/usr/local/libexec`, then a sibling binary. |
| `--vm-krun-log-level` | `OPENSHELL_VM_KRUN_LOG_LEVEL` | `1` | libkrun log level for VM helper processes |
@@ -599,6 +606,17 @@ The Helm chart template is at `deploy/helm/openshell/templates/statefulset.yaml`
The gateway reaches the sandbox exclusively through the supervisor-initiated `ConnectSupervisor` session, so the driver never returns sandbox network endpoints.
+### Docker Driver
+
+`DockerComputeDriver` (`crates/openshell-server/src/compute/docker.rs`) is built directly into the gateway. It connects to the local Docker daemon with Bollard and provisions one long-lived container per sandbox.
+
+- **Create**: Pulls the requested image according to `sandbox_image_pull_policy`, creates a labeled container, bind-mounts a Linux `openshell-sandbox` binary read-only at `/opt/openshell/bin/openshell-sandbox`, and starts that supervisor as PID 1. No sandbox ports are published.
+- **Persistence**: The Docker driver does not create a separate workspace volume. `/sandbox` lives on the container writable layer, so data persists across gateway restarts as long as the same container remains.
+- **Gateway callback**: When `OPENSHELL_GRPC_ENDPOINT` points at `localhost` or another loopback address, the driver rewrites it to `host.openshell.internal` inside the container and injects `host-gateway` aliases so the supervisor can still open its outbound `ConnectSupervisor` stream.
+- **TLS**: For `https://` gateway endpoints, the driver requires `--docker-tls-ca`, `--docker-tls-cert`, and `--docker-tls-key`. These files are bind-mounted read-only into `/etc/openshell/tls/client`, and the driver sets `OPENSHELL_TLS_CA`, `OPENSHELL_TLS_CERT`, and `OPENSHELL_TLS_KEY` to those paths.
+- **Limits**: V1 supports only `cpu_limit` and `memory_limit`, mapped to Docker `NanoCpus` and `Memory`. GPU requests, resource requests, `agent_socket_path`, and non-empty `platform_config` are rejected as failed preconditions.
+- **Watch stream**: The driver polls Docker for OpenShell-managed containers, emits snapshot diffs and deletions, and rebuilds its state from labels after gateway restart. Containers running under Docker restart policy `unless-stopped` come back after daemon restart without any inbound port setup.
+
### VM Driver
`VmDriver` (`crates/openshell-driver-vm/src/driver.rs`) is served by the standalone `openshell-driver-vm` process. The gateway spawns that binary on demand and talks to it over the internal `openshell.compute.v1.ComputeDriver` gRPC contract via a Unix domain socket.
diff --git a/crates/openshell-core/src/config.rs b/crates/openshell-core/src/config.rs
index 3217a783d..061feffe5 100644
--- a/crates/openshell-core/src/config.rs
+++ b/crates/openshell-core/src/config.rs
@@ -15,6 +15,7 @@ use std::str::FromStr;
pub enum ComputeDriverKind {
Kubernetes,
Vm,
+ Docker,
Podman,
}
@@ -24,6 +25,7 @@ impl ComputeDriverKind {
match self {
Self::Kubernetes => "kubernetes",
Self::Vm => "vm",
+ Self::Docker => "docker",
Self::Podman => "podman",
}
}
@@ -42,9 +44,10 @@ impl FromStr for ComputeDriverKind {
match value.trim().to_ascii_lowercase().as_str() {
"kubernetes" => Ok(Self::Kubernetes),
"vm" => Ok(Self::Vm),
+ "docker" => Ok(Self::Docker),
"podman" => Ok(Self::Podman),
other => Err(format!(
- "unsupported compute driver '{other}'. expected one of: kubernetes, vm, podman"
+ "unsupported compute driver '{other}'. expected one of: kubernetes, vm, docker, podman"
)),
}
}
@@ -385,12 +388,16 @@ mod tests {
"podman".parse::().unwrap(),
ComputeDriverKind::Podman
);
+ assert_eq!(
+ "docker".parse::().unwrap(),
+ ComputeDriverKind::Docker
+ );
}
#[test]
fn compute_driver_kind_rejects_unknown_values() {
- let err = "docker".parse::().unwrap_err();
- assert!(err.contains("unsupported compute driver 'docker'"));
+ let err = "firecracker".parse::().unwrap_err();
+ assert!(err.contains("unsupported compute driver 'firecracker'"));
}
#[test]
diff --git a/crates/openshell-sandbox/src/procfs.rs b/crates/openshell-sandbox/src/procfs.rs
index a6dd379b8..1ce91dd20 100644
--- a/crates/openshell-sandbox/src/procfs.rs
+++ b/crates/openshell-sandbox/src/procfs.rs
@@ -576,9 +576,12 @@ mod tests {
}
/// An unlinked executable whose filename contains non-UTF-8 bytes must
- /// still strip exactly one kernel-added `" (deleted)"` suffix. We operate
- /// on raw bytes via `OsStrExt`, so invalid UTF-8 is not a reason to skip
- /// the strip and return a path that downstream `stat()` calls will reject.
+ /// still resolve to its original path. Some kernels append a literal
+ /// `" (deleted)"` suffix to `/proc//exe` after unlink while others
+ /// do not for this edge case, so the assertion has to tolerate both.
+ ///
+ /// When the suffix is present, we still need to strip exactly one copy
+ /// while operating on raw bytes via `OsStrExt`.
#[cfg(target_os = "linux")]
#[test]
fn binary_path_strips_suffix_for_non_utf8_filename() {
@@ -603,13 +606,10 @@ mod tests {
wait_for_child_exec(pid, &exe_path);
std::fs::remove_file(&exe_path).unwrap();
- // Sanity: raw readlink ends with " (deleted)" and is not valid UTF-8.
+ // Sanity: the raw readlink remains non-UTF-8 after unlink.
let raw = std::fs::read_link(format!("/proc/{pid}/exe")).unwrap();
let raw_bytes = raw.as_os_str().as_bytes();
- assert!(
- raw_bytes.ends_with(b" (deleted)"),
- "kernel should append ' (deleted)' to unlinked exe readlink"
- );
+ let kernel_appended_deleted_suffix = raw_bytes.ends_with(b" (deleted)");
assert!(
std::str::from_utf8(raw_bytes).is_err(),
"test precondition: raw readlink must contain non-UTF-8 bytes"
@@ -619,12 +619,19 @@ mod tests {
binary_path(pid).expect("binary_path should succeed for non-UTF-8 unlinked path");
assert_eq!(
resolved, exe_path,
- "binary_path must strip exactly one ' (deleted)' suffix for non-UTF-8 paths"
- );
- assert!(
- !resolved.as_os_str().as_bytes().ends_with(b" (deleted)"),
- "stripped path must not end with ' (deleted)'"
+ "binary_path must resolve non-UTF-8 unlinked paths back to the original filename"
);
+ if kernel_appended_deleted_suffix {
+ assert!(
+ !resolved.as_os_str().as_bytes().ends_with(b" (deleted)"),
+ "stripped path must not end with ' (deleted)'"
+ );
+ } else {
+ assert_eq!(
+ raw, exe_path,
+ "kernels that omit the deleted suffix should report the original unlinked path"
+ );
+ }
let _ = child.kill();
let _ = child.wait();
diff --git a/crates/openshell-server/Cargo.toml b/crates/openshell-server/Cargo.toml
index b2524ff0b..5af790092 100644
--- a/crates/openshell-server/Cargo.toml
+++ b/crates/openshell-server/Cargo.toml
@@ -67,6 +67,7 @@ sqlx = { workspace = true }
reqwest = { workspace = true }
uuid = { workspace = true }
url = { workspace = true }
+bollard = { version = "0.20" }
hmac = "0.12"
sha2 = "0.10"
hex = "0.4"
@@ -74,6 +75,8 @@ russh = "0.57"
rand = "0.9"
petname = "2"
ipnet = "2"
+tar = "0.4"
+tempfile = "3"
[features]
dev-settings = ["openshell-core/dev-settings"]
@@ -81,7 +84,6 @@ dev-settings = ["openshell-core/dev-settings"]
[dev-dependencies]
hyper-rustls = { version = "0.27", default-features = false, features = ["native-tokio", "http1", "tls12", "logging", "ring", "webpki-tokio"] }
rcgen = { version = "0.13", features = ["crypto", "pem"] }
-tempfile = "3"
tokio-tungstenite = { workspace = true }
futures-util = "0.3"
wiremock = "0.6"
diff --git a/crates/openshell-server/src/cli.rs b/crates/openshell-server/src/cli.rs
index 0b64de803..9f664ebee 100644
--- a/crates/openshell-server/src/cli.rs
+++ b/crates/openshell-server/src/cli.rs
@@ -11,7 +11,7 @@ use std::path::PathBuf;
use tracing::info;
use tracing_subscriber::EnvFilter;
-use crate::compute::VmComputeConfig;
+use crate::compute::{DockerComputeConfig, VmComputeConfig};
use crate::{run_server, tracing_bus::TracingLogBus};
/// `OpenShell` gateway process - gRPC and HTTP server with protocol multiplexing.
@@ -165,6 +165,33 @@ struct Args {
#[arg(long, env = "OPENSHELL_VM_TLS_KEY")]
vm_tls_key: Option,
+ /// Linux `openshell-sandbox` binary bind-mounted into Docker sandboxes.
+ ///
+ /// When unset the gateway falls back to (in order) a sibling
+ /// `openshell-sandbox` next to the gateway binary, a local cargo build,
+ /// or extracting the binary from `--docker-supervisor-image`.
+ #[arg(long, env = "OPENSHELL_DOCKER_SUPERVISOR_BIN")]
+ docker_supervisor_bin: Option,
+
+ /// Image the Docker driver pulls to extract the Linux
+ /// `openshell-sandbox` binary when no explicit `--docker-supervisor-bin`
+ /// override or local build is available. Defaults to
+ /// `ghcr.io/nvidia/openshell/supervisor:`.
+ #[arg(long, env = "OPENSHELL_DOCKER_SUPERVISOR_IMAGE")]
+ docker_supervisor_image: Option,
+
+ /// CA certificate bind-mounted into Docker sandboxes for gateway mTLS.
+ #[arg(long, env = "OPENSHELL_DOCKER_TLS_CA")]
+ docker_tls_ca: Option,
+
+ /// Client certificate bind-mounted into Docker sandboxes for gateway mTLS.
+ #[arg(long, env = "OPENSHELL_DOCKER_TLS_CERT")]
+ docker_tls_cert: Option,
+
+ /// Client private key bind-mounted into Docker sandboxes for gateway mTLS.
+ #[arg(long, env = "OPENSHELL_DOCKER_TLS_KEY")]
+ docker_tls_key: Option,
+
/// Disable TLS entirely — listen on plaintext HTTP.
/// Use this when the gateway sits behind a reverse proxy or tunnel
/// (e.g. Cloudflare Tunnel) that terminates TLS at the edge.
@@ -283,6 +310,14 @@ async fn run_from_args(args: Args) -> Result<()> {
guest_tls_key: args.vm_tls_key,
};
+ let docker_config = DockerComputeConfig {
+ supervisor_bin: args.docker_supervisor_bin,
+ supervisor_image: args.docker_supervisor_image,
+ guest_tls_ca: args.docker_tls_ca,
+ guest_tls_cert: args.docker_tls_cert,
+ guest_tls_key: args.docker_tls_key,
+ };
+
if args.disable_tls {
info!("TLS disabled — listening on plaintext HTTP");
} else if args.disable_gateway_auth {
@@ -291,7 +326,7 @@ async fn run_from_args(args: Args) -> Result<()> {
info!(bind = %config.bind_address, "Starting OpenShell server");
- run_server(config, vm_config, tracing_log_bus)
+ run_server(config, vm_config, docker_config, tracing_log_bus)
.await
.into_diagnostic()
}
diff --git a/crates/openshell-server/src/compute/docker.rs b/crates/openshell-server/src/compute/docker.rs
new file mode 100644
index 000000000..d5a868209
--- /dev/null
+++ b/crates/openshell-server/src/compute/docker.rs
@@ -0,0 +1,1983 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! Bundled Docker compute driver.
+
+use bollard::Docker;
+use bollard::errors::Error as BollardError;
+use bollard::models::{
+ ContainerCreateBody, ContainerSummary, ContainerSummaryStateEnum, HostConfig, Mount,
+ MountTypeEnum, RestartPolicy, RestartPolicyNameEnum,
+};
+use bollard::query_parameters::{
+ CreateContainerOptionsBuilder, CreateImageOptions, DownloadFromContainerOptionsBuilder,
+ ListContainersOptionsBuilder, RemoveContainerOptionsBuilder,
+};
+use bytes::Bytes;
+use futures::{Stream, StreamExt};
+use openshell_core::proto::compute::v1::{
+ CreateSandboxRequest, CreateSandboxResponse, DeleteSandboxRequest, DeleteSandboxResponse,
+ DriverCondition, DriverSandbox, DriverSandboxStatus, DriverSandboxTemplate,
+ GetCapabilitiesRequest, GetCapabilitiesResponse, GetSandboxRequest, GetSandboxResponse,
+ ListSandboxesRequest, ListSandboxesResponse, StopSandboxRequest, StopSandboxResponse,
+ ValidateSandboxCreateRequest, ValidateSandboxCreateResponse, WatchSandboxesDeletedEvent,
+ WatchSandboxesEvent, WatchSandboxesRequest, WatchSandboxesSandboxEvent,
+ compute_driver_server::ComputeDriver, watch_sandboxes_event,
+};
+use openshell_core::{Config, Error, Result as CoreResult};
+use std::collections::HashMap;
+use std::io::Read;
+use std::path::{Path, PathBuf};
+use std::pin::Pin;
+use std::sync::Arc;
+use std::time::Duration;
+use tokio::sync::{broadcast, mpsc};
+use tokio_stream::wrappers::ReceiverStream;
+use tonic::{Request, Response, Status};
+use tracing::{info, warn};
+use url::{Host, Url};
+
+const WATCH_BUFFER: usize = 128;
+const WATCH_POLL_INTERVAL: Duration = Duration::from_secs(2);
+const WATCH_POLL_MAX_BACKOFF: Duration = Duration::from_secs(30);
+
+const MANAGED_BY_LABEL_KEY: &str = "openshell.ai/managed-by";
+const MANAGED_BY_LABEL_VALUE: &str = "openshell";
+const SANDBOX_ID_LABEL_KEY: &str = "openshell.ai/sandbox-id";
+const SANDBOX_NAME_LABEL_KEY: &str = "openshell.ai/sandbox-name";
+const SANDBOX_NAMESPACE_LABEL_KEY: &str = "openshell.ai/sandbox-namespace";
+
+const SUPERVISOR_MOUNT_PATH: &str = "/opt/openshell/bin/openshell-sandbox";
+#[cfg(test)]
+const TLS_MOUNT_DIR: &str = "/etc/openshell/tls/client";
+const TLS_CA_MOUNT_PATH: &str = "/etc/openshell/tls/client/ca.crt";
+const TLS_CERT_MOUNT_PATH: &str = "/etc/openshell/tls/client/tls.crt";
+const TLS_KEY_MOUNT_PATH: &str = "/etc/openshell/tls/client/tls.key";
+const SANDBOX_COMMAND: &str = "sleep infinity";
+const HOST_OPENSHELL_INTERNAL: &str = "host.openshell.internal";
+const HOST_DOCKER_INTERNAL: &str = "host.docker.internal";
+
+/// Default image holding the Linux `openshell-sandbox` binary. The gateway
+/// pulls this image and extracts the binary to a host-side cache when no
+/// explicit `--docker-supervisor-bin` override or local build is available.
+const DEFAULT_DOCKER_SUPERVISOR_IMAGE_REPO: &str = "ghcr.io/nvidia/openshell/supervisor";
+
+/// Image tag baked in at compile time to pair the gateway with a matching
+/// supervisor image. Mirrors the pattern used by `openshell-bootstrap`:
+/// defaults to `"dev"`; CI overrides with a release version via the
+/// `OPENSHELL_IMAGE_TAG` env var during `cargo build`.
+const DEFAULT_DOCKER_SUPERVISOR_IMAGE_TAG: &str = match option_env!("OPENSHELL_IMAGE_TAG") {
+ Some(tag) => tag,
+ None => "dev",
+};
+
+/// Path to the supervisor binary inside the `openshell/supervisor` image.
+const SUPERVISOR_IMAGE_BINARY_PATH: &str = "/usr/local/bin/openshell-sandbox";
+
+/// Return the default `ghcr.io/nvidia/openshell/supervisor:` reference
+/// used when no supervisor binary override is provided.
+pub fn default_docker_supervisor_image() -> String {
+ format!("{DEFAULT_DOCKER_SUPERVISOR_IMAGE_REPO}:{DEFAULT_DOCKER_SUPERVISOR_IMAGE_TAG}")
+}
+
+/// Queried by the Docker driver to decide when a sandbox's supervisor
+/// relay is live. Implementations return `true` once a sandbox has an
+/// active `ConnectSupervisor` session registered.
+///
+/// The driver cannot observe the supervisor's SSH socket directly (it
+/// lives inside the container), so it leans on this signal to flip the
+/// Ready condition from `DependenciesNotReady` to `True`.
+pub trait SupervisorReadiness: Send + Sync + 'static {
+ fn is_supervisor_connected(&self, sandbox_id: &str) -> bool;
+}
+
+/// Gateway-local configuration for the bundled Docker compute driver.
+#[derive(Debug, Clone, Default)]
+pub struct DockerComputeConfig {
+ /// Optional override for the Linux `openshell-sandbox` binary mounted into containers.
+ pub supervisor_bin: Option,
+
+ /// Optional override for the image the gateway pulls to extract the
+ /// Linux `openshell-sandbox` binary when no explicit binary path or
+ /// local build is available. Defaults to
+ /// `ghcr.io/nvidia/openshell/supervisor:`.
+ pub supervisor_image: Option,
+
+ /// Host-side CA certificate for Docker sandbox mTLS.
+ pub guest_tls_ca: Option,
+
+ /// Host-side client certificate for Docker sandbox mTLS.
+ pub guest_tls_cert: Option,
+
+ /// Host-side private key for Docker sandbox mTLS.
+ pub guest_tls_key: Option,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub(crate) struct DockerGuestTlsPaths {
+ pub(crate) ca: PathBuf,
+ pub(crate) cert: PathBuf,
+ pub(crate) key: PathBuf,
+}
+
+#[derive(Debug, Clone)]
+struct DockerDriverRuntimeConfig {
+ default_image: String,
+ image_pull_policy: String,
+ grpc_endpoint: String,
+ ssh_socket_path: String,
+ ssh_handshake_secret: String,
+ ssh_handshake_skew_secs: u64,
+ log_level: String,
+ supervisor_bin: PathBuf,
+ guest_tls: Option,
+ daemon_version: String,
+}
+
+#[derive(Clone)]
+pub struct DockerComputeDriver {
+ docker: Arc,
+ config: DockerDriverRuntimeConfig,
+ events: broadcast::Sender,
+ supervisor_readiness: Arc,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+struct DockerResourceLimits {
+ nano_cpus: Option,
+ memory_bytes: Option,
+}
+
+type WatchStream =
+ Pin> + Send + 'static>>;
+
+impl DockerComputeDriver {
+ pub async fn new(
+ config: &Config,
+ docker_config: &DockerComputeConfig,
+ supervisor_readiness: Arc,
+ ) -> CoreResult {
+ if config.grpc_endpoint.trim().is_empty() {
+ return Err(Error::config(
+ "grpc_endpoint is required when using the docker compute driver",
+ ));
+ }
+
+ let docker = Docker::connect_with_local_defaults()
+ .map_err(|err| Error::execution(format!("failed to create Docker client: {err}")))?;
+ let version = docker.version().await.map_err(|err| {
+ Error::execution(format!("failed to query Docker daemon version: {err}"))
+ })?;
+ let daemon_arch = normalize_docker_arch(version.arch.as_deref().unwrap_or_default());
+ let supervisor_bin = resolve_supervisor_bin(&docker, docker_config, &daemon_arch).await?;
+ let guest_tls = docker_guest_tls_paths(config, docker_config)?;
+
+ let driver = Self {
+ docker: Arc::new(docker),
+ config: DockerDriverRuntimeConfig {
+ default_image: config.sandbox_image.clone(),
+ image_pull_policy: config.sandbox_image_pull_policy.clone(),
+ grpc_endpoint: config.grpc_endpoint.clone(),
+ ssh_socket_path: config.sandbox_ssh_socket_path.clone(),
+ ssh_handshake_secret: config.ssh_handshake_secret.clone(),
+ ssh_handshake_skew_secs: config.ssh_handshake_skew_secs,
+ log_level: config.log_level.clone(),
+ supervisor_bin,
+ guest_tls,
+ daemon_version: version.version.unwrap_or_else(|| "unknown".to_string()),
+ },
+ events: broadcast::channel(WATCH_BUFFER).0,
+ supervisor_readiness,
+ };
+
+ let poll_driver = driver.clone();
+ tokio::spawn(async move {
+ poll_driver.poll_loop().await;
+ });
+
+ Ok(driver)
+ }
+
+ fn capabilities(&self) -> GetCapabilitiesResponse {
+ GetCapabilitiesResponse {
+ driver_name: "docker".to_string(),
+ driver_version: self.config.daemon_version.clone(),
+ default_image: self.config.default_image.clone(),
+ supports_gpu: false,
+ }
+ }
+
+ fn validate_sandbox(&self, sandbox: &DriverSandbox) -> Result<(), Status> {
+ let spec = sandbox
+ .spec
+ .as_ref()
+ .ok_or_else(|| Status::invalid_argument("sandbox.spec is required"))?;
+ let template = spec
+ .template
+ .as_ref()
+ .ok_or_else(|| Status::invalid_argument("sandbox.spec.template is required"))?;
+
+ if template.image.trim().is_empty() {
+ return Err(Status::failed_precondition(
+ "docker sandboxes require a template image",
+ ));
+ }
+ if spec.gpu {
+ return Err(Status::failed_precondition(
+ "docker compute driver does not support gpu sandboxes",
+ ));
+ }
+ if !template.agent_socket_path.trim().is_empty() {
+ return Err(Status::failed_precondition(
+ "docker compute driver does not support template.agent_socket_path",
+ ));
+ }
+ if template
+ .platform_config
+ .as_ref()
+ .is_some_and(|config| !config.fields.is_empty())
+ {
+ return Err(Status::failed_precondition(
+ "docker compute driver does not support template.platform_config",
+ ));
+ }
+
+ let _ = docker_resource_limits(template)?;
+ Ok(())
+ }
+
+ async fn get_sandbox_snapshot(
+ &self,
+ sandbox_id: &str,
+ sandbox_name: &str,
+ ) -> Result