diff --git a/Cargo.lock b/Cargo.lock
index 4b29a0c7f..b67ffe5eb 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -402,6 +402,12 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd307490d624467aa6f74b0eabb77633d1f758a7b25f12bceb0b22e08d9726f6"
+[[package]]
+name = "base64"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
+
[[package]]
name = "base64"
version = "0.21.7"
@@ -808,6 +814,27 @@ version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c"
+[[package]]
+name = "const_format"
+version = "0.2.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4481a617ad9a412be3b97c5d403fef8ed023103368908b9c50af598ff467cc1e"
+dependencies = [
+ "const_format_proc_macros",
+ "konst",
+]
+
+[[package]]
+name = "const_format_proc_macros"
+version = "0.2.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-xid",
+]
+
[[package]]
name = "constant_time_eq"
version = "0.4.2"
@@ -1166,6 +1193,37 @@ dependencies = [
"syn 1.0.109",
]
+[[package]]
+name = "derive_builder"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
+dependencies = [
+ "derive_builder_macro",
+]
+
+[[package]]
+name = "derive_builder_core"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
+dependencies = [
+ "darling",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "derive_builder_macro"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
+dependencies = [
+ "derive_builder_core",
+ "syn 2.0.117",
+]
+
[[package]]
name = "dialoguer"
version = "0.11.0"
@@ -1633,6 +1691,18 @@ dependencies = [
"wasm-bindgen",
]
+[[package]]
+name = "getset"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9cf0fc11e47561d47397154977bc219f4cf809b2974facc3ccb3b89e2436f912"
+dependencies = [
+ "proc-macro-error2",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
[[package]]
name = "ghash"
version = "0.5.1"
@@ -1837,6 +1907,15 @@ dependencies = [
"itoa",
]
+[[package]]
+name = "http-auth"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "150fa4a9462ef926824cf4519c84ed652ca8f4fbae34cb8af045b5cbcaf98822"
+dependencies = [
+ "memchr",
+]
+
[[package]]
name = "http-body"
version = "1.0.1"
@@ -2349,6 +2428,21 @@ dependencies = [
"thiserror 1.0.69",
]
+[[package]]
+name = "jwt"
+version = "0.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6204285f77fe7d9784db3fdc449ecce1a0114927a51d5a41c4c7a292011c015f"
+dependencies = [
+ "base64 0.13.1",
+ "crypto-common 0.1.7",
+ "digest 0.10.7",
+ "hmac",
+ "serde",
+ "serde_json",
+ "sha2 0.10.9",
+]
+
[[package]]
name = "k8s-openapi"
version = "0.21.1"
@@ -2362,6 +2456,21 @@ dependencies = [
"serde_json",
]
+[[package]]
+name = "konst"
+version = "0.2.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "128133ed7824fcd73d6e7b17957c5eb7bacb885649bd8c69708b2331a10bcefb"
+dependencies = [
+ "konst_macro_rules",
+]
+
+[[package]]
+name = "konst_macro_rules"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4933f3f57a8e9d9da04db23fb153356ecaf00cbd14aee46279c33dc80925c37"
+
[[package]]
name = "kube"
version = "0.90.0"
@@ -2966,6 +3075,60 @@ dependencies = [
"memchr",
]
+[[package]]
+name = "oci-client"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b74df13319e08bc386d333d3dc289c774c88cc543cae31f5347db07b5ec2172"
+dependencies = [
+ "bytes",
+ "chrono",
+ "futures-util",
+ "http",
+ "http-auth",
+ "jwt",
+ "lazy_static",
+ "oci-spec",
+ "olpc-cjson",
+ "regex",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "sha2 0.10.9",
+ "thiserror 2.0.18",
+ "tokio",
+ "tracing",
+ "unicase",
+]
+
+[[package]]
+name = "oci-spec"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc3da52b83ce3258fbf29f66ac784b279453c2ac3c22c5805371b921ede0d308"
+dependencies = [
+ "const_format",
+ "derive_builder",
+ "getset",
+ "regex",
+ "serde",
+ "serde_json",
+ "strum 0.27.2",
+ "strum_macros 0.27.2",
+ "thiserror 2.0.18",
+]
+
+[[package]]
+name = "olpc-cjson"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "696183c9b5fe81a7715d074fd632e8bd46f4ccc0231a3ed7fc580a80de5f7083"
+dependencies = [
+ "serde",
+ "serde_json",
+ "unicode-normalization",
+]
+
[[package]]
name = "once_cell"
version = "1.21.4"
@@ -3095,14 +3258,21 @@ name = "openshell-driver-vm"
version = "0.0.0"
dependencies = [
"clap",
+ "flate2",
"futures",
"libc",
"libloading",
"miette",
"nix",
+ "oci-client",
"openshell-core",
"prost-types",
+ "serde",
+ "serde_json",
+ "sha2 0.10.9",
"tar",
+ "tempfile",
+ "thiserror 2.0.18",
"tokio",
"tokio-stream",
"tonic",
@@ -3996,7 +4166,7 @@ dependencies = [
"lru",
"paste",
"stability",
- "strum",
+ "strum 0.26.3",
"unicode-segmentation",
"unicode-truncate",
"unicode-width 0.1.14",
@@ -4112,12 +4282,14 @@ dependencies = [
"sync_wrapper",
"tokio",
"tokio-rustls",
+ "tokio-util",
"tower 0.5.3",
"tower-http 0.6.8",
"tower-service",
"url",
"wasm-bindgen",
"wasm-bindgen-futures",
+ "wasm-streams",
"web-sys",
"webpki-roots 1.0.6",
]
@@ -5102,9 +5274,15 @@ version = "0.26.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
dependencies = [
- "strum_macros",
+ "strum_macros 0.26.4",
]
+[[package]]
+name = "strum"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf"
+
[[package]]
name = "strum_macros"
version = "0.26.4"
@@ -5118,6 +5296,18 @@ dependencies = [
"syn 2.0.117",
]
+[[package]]
+name = "strum_macros"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
[[package]]
name = "subtle"
version = "2.6.1"
@@ -5764,6 +5954,12 @@ version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
+[[package]]
+name = "unicase"
+version = "2.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"
+
[[package]]
name = "unicode-bidi"
version = "0.3.18"
@@ -6035,6 +6231,19 @@ dependencies = [
"wasmparser",
]
+[[package]]
+name = "wasm-streams"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
+dependencies = [
+ "futures-util",
+ "js-sys",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "web-sys",
+]
+
[[package]]
name = "wasmparser"
version = "0.244.0"
diff --git a/architecture/vm-driver.md b/architecture/vm-driver.md
new file mode 100644
index 000000000..be3a6c6ec
--- /dev/null
+++ b/architecture/vm-driver.md
@@ -0,0 +1,271 @@
+# VM Compute Driver
+
+> Status: Experimental. The VM compute driver is a second-generation
+> compute backend for OpenShell sandboxes. Kubernetes remains the default.
+
+## Overview
+
+`openshell-driver-vm` is an in-process compute driver that runs each
+sandbox as a libkrun microVM on the host. Unlike the Kubernetes driver,
+it has no orchestrator dependency — the driver is a single binary that
+exposes the `ComputeDriver` gRPC service and manages VMs directly.
+
+A sandbox spec can optionally include `template.image`, an OCI image
+reference. When set, the driver treats the image as the **sandbox
+payload** (the user's container filesystem), not the guest OS. The fixed
+libkrun guest rootfs still boots the control plane (init script,
+supervisor, SSH); the OCI image is mounted as an overlay and the
+supervisor `pivot_root`s into it before launching the image entrypoint.
+
+## OCI container execution model
+
+```mermaid
+flowchart TB
+ subgraph host["Host"]
+ driver["openshell-driver-vm"]
+ subgraph oci["OCI manager"]
+ pull["oci-client: pull manifest, config, layers"]
+ flatten["flatten layers (apply whiteouts)"]
+ inject["inject sandbox user, /sandbox, /tmp, /etc stubs"]
+ build["build squashfs via mksquashfs (zstd)"]
+ pull --> flatten --> inject --> build
+ end
+ cache[("<state>/oci-cache/
blobs/, fs/<digest>.<arch>.squashfs,
meta/*.json")]
+ statedir[("Per-sandbox state dir
sandbox-state.raw (ext4 upper + workdir)
rootfs-console.log")]
+ driver --> oci --> cache
+ driver --> statedir
+ end
+
+ driver -- "krun_add_disk3 × 2 + set_exec env" --> guest
+
+ subgraph guest["Guest VM"]
+ direction TB
+ vda["/dev/vda = RO base squashfs"]
+ vdb["/dev/vdb = sandbox-state.raw"]
+ base["/base (ro)"]
+ st["/state (ext4)"]
+ vda -- "mount ro" --> base
+ vdb -- "mkfs.ext4 + mount" --> st
+
+ overlay["/state/merged
overlay(lower=/base, upper=/state/upper,
work=/state/work)"]
+ workspace["/state/workspace
bind-mounted over /sandbox"]
+ base --> overlay
+ st --> overlay
+ st --> workspace
+ workspace --> overlay
+
+ pivot["pivot_root /state/merged
supervisor sees overlay as /"]
+ overlay --> pivot
+
+ supervisor["openshell-sandbox --workdir <OCI workdir> -- <OCI argv>
policy, Landlock, seccomp, SSH, OCSF logging"]
+ pivot --> supervisor
+ end
+```
+
+### Host pipeline
+
+`crates/openshell-driver-vm/src/oci/` owns the host pipeline. The
+top-level entrypoint is `oci::prepare(puller, cache, build_opts,
+image_ref, env_overrides)`:
+
+| Module | Responsibility |
+|---|---|
+| `client.rs` | Anonymous pull via `oci-client` with a platform resolver pinned to `linux/amd64` or `linux/arm64`. Normalizes the OCI image config into `ImageConfig`. |
+| `flatten.rs` | Applies OCI layer tars in order with whiteout handling (`.wh.*`, `.wh..wh..opq`). Rejects absolute/parent-traversal paths. Dispatches on media type (`tar`, `tar+gzip`). |
+| `compat.rs` | Injects `sandbox:10001:10001` into `/etc/passwd` + `/etc/group`, ensures `/sandbox` (0755) and `/tmp` (1777) exist, writes placeholder `/etc/hosts` and `/etc/resolv.conf`. Idempotent. Picks best shell (`/bin/sh` → `/sbin/nologin` → `/bin/false`). |
+| `fs_image.rs` | Shells out to `mksquashfs` with explicit binary path (no `$PATH` reliance), zstd by default. |
+| `cache.rs` | Content-addressed layout `blobs/ + fs/..squashfs + meta/..json + tmp/`. Atomic writes; idempotent `lookup()` + `install_fs_image()`. |
+| `metadata.rs` | `LaunchMetadata::build` — argv = `Entrypoint + Cmd` (precedence), workdir fallback `/sandbox`, env merge `OCI < template < spec`. `to_guest_env_vars()` packs into `OPENSHELL_OCI_ARGC/ARGV_/ENV_COUNT/ENV_/WORKDIR`. |
+| `pipeline.rs` | End-to-end orchestrator. On cache hit, zero network I/O. On miss: pull → flatten → inject → build → install. |
+
+Cache is keyed by `(manifest digest, platform)`. Repeated launches of
+the same image skip pull and rebuild entirely — the driver just attaches
+the cached squashfs to the VM.
+
+```mermaid
+flowchart LR
+ req["CreateSandbox
template.image=<ref>"] --> resolve[effective_image_ref]
+ resolve --> pull["oci-client pull
manifest digest"]
+ pull --> lookup{"cache.lookup(digest, platform)"}
+ lookup -- hit --> attach[attach cached squashfs
+ per-sandbox state disk]
+ lookup -- miss --> layers[fetch layers]
+ layers --> flat[flatten + whiteout]
+ flat --> compat[compat inject]
+ compat --> mksquash[mksquashfs zstd]
+ mksquash --> install[atomic install_fs_image
write metadata]
+ install --> attach
+ attach --> launch[launch microVM]
+```
+
+### Guest init and pivot
+
+`crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh` is the
+guest's PID 1. OCI mode is gated on `OPENSHELL_OCI_ARGC` being set in
+the guest environ (delivered via libkrun `set_exec`).
+
+```mermaid
+flowchart TD
+ boot([PID 1: init boots]) --> mountfs[Mount /proc, /sys, /dev, /tmp, /run]
+ mountfs --> net[Bring up eth0 + DHCP]
+ net --> gate{OPENSHELL_OCI_ARGC set?}
+ gate -- No --> legacy[exec openshell-sandbox --workdir /sandbox
legacy guest-rootfs boot]
+ gate -- Yes --> resolve[Resolve base + state disks
by /sys/block/vd*/serial]
+ resolve --> mntbase[mount -o ro base → /base]
+ mntbase --> fmt{state disk
formatted?}
+ fmt -- No --> mkfs[mkfs.ext4 state disk]
+ fmt -- Yes --> mntstate
+ mkfs --> mntstate[mount state → /state]
+ mntstate --> mkdirs[mkdir /state/upper /state/work
/state/merged /state/workspace]
+ mkdirs --> overlay[mount -t overlay overlay
lowerdir=/base,upperdir=/state/upper,
workdir=/state/work /state/merged]
+ overlay --> bindws[bind-mount /state/workspace
over /state/merged/sandbox]
+ bindws --> resolv[Synthesize /etc/resolv.conf
if image lacks one]
+ resolv --> tls[Copy $OPENSHELL_TLS_CA →
/state/merged/opt/openshell/tls/ca.crt]
+ tls --> copysup[Copy supervisor binary into
/state/merged/opt/openshell/bin/]
+ copysup --> bindps[bind-mount /proc /sys /dev
into /state/merged]
+ bindps --> pivot[pivot_root /state/merged
umount -l /.old_root]
+ pivot --> translate[Translate OPENSHELL_OCI_ENV_* →
OPENSHELL_CONTAINER_ENV_*
set OPENSHELL_CONTAINER_MODE=1]
+ translate --> execsup[exec openshell-sandbox
--workdir $OCI_WORKDIR -- $OCI_ARGV]
+```
+
+`oci_launch_supervisor` steps:
+
+1. Resolves the RO base device (`block_id=oci-base`) and state device
+ (`block_id=sandbox-state`) by walking `/sys/block/vd*/serial`. Falls
+ back to `/dev/vda` / `/dev/vdb` when serial lookup is unavailable;
+ `OPENSHELL_VM_OCI_BASE_DEVICE` / `OPENSHELL_VM_STATE_DEVICE` short-
+ circuit the lookup for tests and operator debugging.
+2. Mounts the RO base at `/base`.
+3. Formats the state device with ext4 on first boot, mounts at `/state`.
+4. Creates `/state/upper`, `/state/work`, `/state/merged`, and
+ `/state/workspace`.
+5. Mounts overlay
+ `lowerdir=/base,upperdir=/state/upper,workdir=/state/work` at
+ `/state/merged`.
+6. Bind-mounts `/state/workspace` over the image's `/sandbox` so the
+ workdir is writable on the state disk.
+7. Synthesizes `/etc/resolv.conf` if the image didn't ship one.
+8. Copies the gateway-issued TLS CA (if `$OPENSHELL_TLS_CA` is set)
+ into `/opt/openshell/tls/ca.crt` inside the overlay so post-pivot
+ SSL trust paths stay valid.
+9. Copies the supervisor binary into the upper layer (reaches the state
+ disk, not the RO base).
+10. Bind-mounts `/proc`, `/sys`, `/dev` into the overlay.
+11. Bind-mounts `/state/merged` onto itself, `pivot_root`s into it, and
+ lazy-unmounts the old root.
+12. Translates `OPENSHELL_OCI_ENV_` → `OPENSHELL_CONTAINER_ENV_`,
+ sets `OPENSHELL_CONTAINER_MODE=1`, and unsets the OCI source vars.
+13. Reconstructs argv from `OPENSHELL_OCI_ARGV_` and execs
+ `openshell-sandbox --workdir "$OCI_WORKDIR" -- `.
+
+### Supervisor clean-env mode
+
+`crates/openshell-sandbox/src/container_env.rs` gates on
+`OPENSHELL_CONTAINER_MODE=1`. When active, the supervisor calls
+`Command::env_clear()` on the child and applies only the documented
+allowlist:
+
+- `HOME=/sandbox`, `PATH=`, `TERM=xterm`
+- Container env from `OPENSHELL_CONTAINER_ENV_` (OCI + template/spec
+ merge)
+- `OPENSHELL_SANDBOX=1` (applied last — images cannot override the
+ marker)
+- Provider env, proxy env, TLS env from policy (layered on top by the
+ existing spawn path)
+
+Control-plane vars (`OPENSHELL_SSH_HANDSHAKE_SECRET`, driver internals,
+etc.) never reach the child process. When `OPENSHELL_CONTAINER_MODE` is
+unset, the supervisor keeps its historical env-inheritance behavior.
+
+## Storage: shared RO base + per-sandbox CoW
+
+The overlay design replaces an earlier "unpack fresh tar per sandbox"
+model that's still described in the initial plan:
+
+```mermaid
+flowchart TB
+ subgraph shared["Shared (host, per-image)"]
+ base[("<state>/oci-cache/fs/
<digest>.<plat>.squashfs
(read-only, never GC'd per sandbox)")]
+ end
+ subgraph persandbox["Per-sandbox state dir"]
+ raw[("sandbox-state.raw
sparse 16 GiB ext4")]
+ upper["/state/upper
overlay upper"]
+ work["/state/work
overlay workdir"]
+ ws["/state/workspace
bind-mounted over /sandbox"]
+ raw --> upper
+ raw --> work
+ raw --> ws
+ end
+ subgraph view["Sandbox runtime view"]
+ merged["/ (post pivot_root)"]
+ end
+ base -- lowerdir --> merged
+ upper -- upperdir --> merged
+ work -- workdir --> merged
+ ws -- bind /sandbox --> merged
+```
+
+- **Base**: one squashfs per `(manifest digest, platform)`, shared
+ across every sandbox that uses the image. Never deleted by the
+ per-sandbox delete path.
+- **Upper + workdir**: per-sandbox ext4 on `sandbox-state.raw`. Sparse
+ 16 GiB default, grows on first write. Deleted with the sandbox state
+ dir on `DeleteSandbox`.
+- **Workspace**: `/state/workspace` bind-mounted over the image's
+ `/sandbox`. Persists alongside the state disk.
+
+Cold start for a repeat launch of the same image is near-instant: a
+block attach and two mounts; no registry round-trip, no layer
+flattening, no squashfs build.
+
+GC of the RO base cache is out of scope for v1. Operators must manage
+`/oci-cache/fs/*` and `/oci-cache/blobs/**` manually if
+they need to reclaim space.
+
+## Driver configuration
+
+| Flag / env var | Meaning |
+|---|---|
+| `--default-image` / `OPENSHELL_VM_DRIVER_DEFAULT_IMAGE` | Image used when a sandbox spec omits `template.image`. Advertised via `GetCapabilities.default_image`. Empty string disables defaulting — sandboxes without an image fall through to the legacy (non-OCI) guest-rootfs supervisor. |
+| `--mksquashfs-bin` / `OPENSHELL_VM_MKSQUASHFS` | Path to the `mksquashfs` binary. Required for OCI sandboxes. Unset → OCI requests are rejected with `FailedPrecondition`. |
+| `OPENSHELL_VM_DRIVER_STATE_DIR` | Root for per-sandbox state and `oci-cache/`. |
+
+`GetCapabilities` now reports:
+
+```json
+{
+ "driver_name": "openshell-driver-vm",
+ "driver_version": "",
+ "default_image": "",
+ "supports_gpu": false
+}
+```
+
+## v1 scope and assumptions
+
+- Public OCI registries only. No authentication.
+- Linux images only. `linux/amd64` or `linux/arm64` matching the host.
+- One image per sandbox. No init containers or sidecars.
+- The entrypoint always runs as `sandbox:sandbox` (UID/GID 10001). The
+ OCI `User` field is ignored in v1.
+- `template.agent_socket_path`, `template.platform_config`, and
+ `template.resources` are still rejected by the VM driver.
+- Sandbox lifetime is the entrypoint lifetime: when the OCI entrypoint
+ exits, the sandbox transitions to exited/error.
+- GPU is unsupported.
+- Squashfs is the fs-image format. erofs is a candidate for later.
+- No automatic cache GC.
+
+## Related files
+
+- `crates/openshell-driver-vm/src/driver.rs` — gRPC surface +
+ sandbox lifecycle.
+- `crates/openshell-driver-vm/src/runtime.rs` — libkrun launch, disk
+ + vsock wiring.
+- `crates/openshell-driver-vm/src/ffi.rs` — `libkrun` symbol loader.
+- `crates/openshell-driver-vm/src/state_disk.rs` — sparse state disk
+ create/grow + secure import socket dir.
+- `crates/openshell-driver-vm/src/oci/` — OCI pipeline.
+- `crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh` —
+ guest init + `oci_launch_supervisor`.
+- `crates/openshell-sandbox/src/container_env.rs` — supervisor
+ clean-env baseline for container mode.
diff --git a/crates/openshell-driver-vm/Cargo.toml b/crates/openshell-driver-vm/Cargo.toml
index 368716ef9..8e90d8607 100644
--- a/crates/openshell-driver-vm/Cargo.toml
+++ b/crates/openshell-driver-vm/Cargo.toml
@@ -36,6 +36,16 @@ libc = "0.2"
libloading = "0.8"
tar = "0.4"
zstd = "0.13"
+serde = { workspace = true }
+serde_json = { workspace = true }
+thiserror = { workspace = true }
+sha2 = "0.10"
+flate2 = "1"
+tempfile = "3"
+oci-client = { version = "0.15", default-features = false, features = ["rustls-tls"] }
+
+[dev-dependencies]
+tempfile = "3"
[lints]
workspace = true
diff --git a/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh b/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh
index 70dda5acb..ed6f433b2 100644
--- a/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh
+++ b/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh
@@ -72,6 +72,170 @@ tcp_probe() {
fi
}
+resolve_block_device_by_serial() {
+ # libkrun's `krun_add_disk3` exposes the caller-supplied block_id as the
+ # virtio-blk serial, which Linux surfaces at /sys/block//serial.
+ # Walk virtio-blk devices (vd*) and return the /dev path whose serial
+ # matches $1. This makes the guest tolerant to attach-order changes.
+ local target_serial="$1"
+ local block
+ for block in /sys/block/vd*; do
+ [ -d "$block" ] || continue
+ local serial_file="$block/serial"
+ [ -r "$serial_file" ] || continue
+ local serial
+ serial=$(cat "$serial_file" 2>/dev/null || true)
+ if [ "$serial" = "$target_serial" ]; then
+ printf '/dev/%s\n' "$(basename "$block")"
+ return 0
+ fi
+ done
+ return 1
+}
+
+oci_launch_supervisor() {
+ # Enter OCI overlay mode: mount the shared read-only squashfs base plus a
+ # per-sandbox ext4 upper, overlay them, pivot_root into the merged view,
+ # then exec the supervisor post-pivot so container paths like /sandbox and
+ # /tmp are the real paths from the supervisor's POV.
+
+ # Prefer block-ID resolution so the mount points don't silently break if
+ # libkrun ever changes virtio-blk attach order. Env var overrides are kept
+ # for operator escape hatches and test harnesses.
+ local base_dev="${OPENSHELL_VM_OCI_BASE_DEVICE:-}"
+ local state_dev="${OPENSHELL_VM_STATE_DEVICE:-}"
+
+ if [ -z "$base_dev" ]; then
+ base_dev=$(resolve_block_device_by_serial "oci-base" || true)
+ fi
+ if [ -z "$state_dev" ]; then
+ state_dev=$(resolve_block_device_by_serial "sandbox-state" || true)
+ fi
+
+ # Fall back to attach-order defaults only when serial lookup returns nothing
+ # (older guest kernels or missing /sys/block//serial).
+ if [ -z "$base_dev" ]; then
+ ts "WARNING: could not resolve RO base by serial=oci-base; falling back to /dev/vda"
+ base_dev=/dev/vda
+ fi
+ if [ -z "$state_dev" ]; then
+ ts "WARNING: could not resolve state disk by serial=sandbox-state; falling back to /dev/vdb"
+ state_dev=/dev/vdb
+ fi
+
+ if [ ! -b "$base_dev" ]; then
+ ts "ERROR: OCI base device $base_dev not found"
+ exit 1
+ fi
+ if [ ! -b "$state_dev" ]; then
+ ts "ERROR: OCI state device $state_dev not found"
+ exit 1
+ fi
+
+ ts "OCI block devices resolved: base=$base_dev state=$state_dev"
+
+ mkdir -p /base /state
+ if ! mount -o ro "$base_dev" /base 2>/dev/null; then
+ ts "ERROR: failed to mount read-only base $base_dev at /base"
+ exit 1
+ fi
+
+ if ! blkid "$state_dev" >/dev/null 2>&1; then
+ ts "formatting sandbox state disk $state_dev"
+ mkfs.ext4 -F -q -L openshell-sandbox-state "$state_dev" >/dev/null 2>&1 || {
+ ts "ERROR: mkfs.ext4 failed on $state_dev"
+ exit 1
+ }
+ fi
+ if ! mount -o noatime "$state_dev" /state 2>/dev/null; then
+ ts "ERROR: failed to mount state disk $state_dev at /state"
+ exit 1
+ fi
+
+ mkdir -p /state/upper /state/work /state/merged /state/workspace
+ if ! mount -t overlay overlay \
+ -o "lowerdir=/base,upperdir=/state/upper,workdir=/state/work" \
+ /state/merged 2>/dev/null; then
+ ts "ERROR: failed to mount overlay at /state/merged"
+ exit 1
+ fi
+
+ # The image's /sandbox is RO (it lives in the base); bind the writable
+ # workspace over it so the container process can write to /sandbox.
+ mkdir -p /state/merged/sandbox
+ mount --bind /state/workspace /state/merged/sandbox
+
+ # Synthesize /etc/resolv.conf inside the image if the image does not
+ # provide one; reuse the guest's DHCP-populated one.
+ if [ ! -s /state/merged/etc/resolv.conf ] && [ -s /etc/resolv.conf ]; then
+ mkdir -p /state/merged/etc
+ cp /etc/resolv.conf /state/merged/etc/resolv.conf 2>/dev/null || true
+ fi
+
+ # Mirror TLS CA bundle into the merged view so SSL trust survives the pivot.
+ if [ -n "${OPENSHELL_TLS_CA:-}" ] && [ -f "$OPENSHELL_TLS_CA" ]; then
+ mkdir -p /state/merged/opt/openshell/tls
+ cp "$OPENSHELL_TLS_CA" /state/merged/opt/openshell/tls/ca.crt 2>/dev/null || true
+ fi
+
+ # Supervisor binary must be reachable post-pivot. Copy it into the upper
+ # layer (writes land on the state disk, not the RO base).
+ mkdir -p /state/merged/opt/openshell/bin
+ if [ ! -x /state/merged/opt/openshell/bin/openshell-sandbox ]; then
+ cp /opt/openshell/bin/openshell-sandbox \
+ /state/merged/opt/openshell/bin/openshell-sandbox
+ chmod 0755 /state/merged/opt/openshell/bin/openshell-sandbox
+ fi
+
+ # Ensure the kernel pseudo-filesystems are available after pivot.
+ mkdir -p /state/merged/proc /state/merged/sys /state/merged/dev
+ mount --bind /proc /state/merged/proc 2>/dev/null || true
+ mount --bind /sys /state/merged/sys 2>/dev/null || true
+ mount --bind /dev /state/merged/dev 2>/dev/null || true
+
+ # pivot_root requires the new root to be a mount point distinct from the
+ # current root, so bind-mount /state/merged onto itself.
+ mount --bind /state/merged /state/merged
+ mkdir -p /state/merged/.old_root
+ cd /state/merged
+ pivot_root . .old_root
+ cd /
+ umount -l /.old_root 2>/dev/null || true
+ rmdir /.old_root 2>/dev/null || true
+
+ # Translate OCI metadata env into the supervisor's container-mode contract.
+ local env_count="${OPENSHELL_OCI_ENV_COUNT:-0}"
+ export OPENSHELL_CONTAINER_ENV_COUNT="$env_count"
+ local idx=0
+ while [ "$idx" -lt "$env_count" ]; do
+ local src_var="OPENSHELL_OCI_ENV_$idx"
+ export "OPENSHELL_CONTAINER_ENV_$idx=${!src_var:-}"
+ unset "$src_var"
+ idx=$((idx + 1))
+ done
+ export OPENSHELL_CONTAINER_MODE=1
+
+ local argc="${OPENSHELL_OCI_ARGC:-0}"
+ if [ "$argc" -lt 1 ]; then
+ ts "ERROR: OCI image has no runnable command (argc=0)"
+ exit 1
+ fi
+ local -a argv=()
+ idx=0
+ while [ "$idx" -lt "$argc" ]; do
+ local src_var="OPENSHELL_OCI_ARGV_$idx"
+ argv+=("${!src_var:-}")
+ unset "$src_var"
+ idx=$((idx + 1))
+ done
+
+ local workdir="${OPENSHELL_OCI_WORKDIR:-/sandbox}"
+ unset OPENSHELL_OCI_ARGC OPENSHELL_OCI_ENV_COUNT OPENSHELL_OCI_WORKDIR
+
+ ts "OCI overlay ready; exec'ing supervisor (argc=$argc workdir=$workdir)"
+ exec /opt/openshell/bin/openshell-sandbox --workdir "$workdir" -- "${argv[@]}"
+}
+
rewrite_openshell_endpoint_if_needed() {
local endpoint="${OPENSHELL_ENDPOINT:-}"
[ -n "$endpoint" ] || return 0
@@ -184,5 +348,13 @@ export USER=sandbox
rewrite_openshell_endpoint_if_needed
+# OCI image mode: if the driver staged an OCI payload via krun set_exec env,
+# prepare the overlay rootfs, pivot_root, and exec the supervisor post-pivot.
+# Otherwise fall through to the default guest rootfs supervisor boot.
+if [ -n "${OPENSHELL_OCI_ARGC:-}" ]; then
+ ts "OCI image mode: OPENSHELL_OCI_ARGC=${OPENSHELL_OCI_ARGC}"
+ oci_launch_supervisor
+fi
+
ts "starting openshell-sandbox supervisor"
exec /opt/openshell/bin/openshell-sandbox --workdir /sandbox
diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs
index 3d3fbf4b6..14634da2d 100644
--- a/crates/openshell-driver-vm/src/driver.rs
+++ b/crates/openshell-driver-vm/src/driver.rs
@@ -65,6 +65,13 @@ pub struct VmDriverConfig {
pub guest_tls_ca: Option,
pub guest_tls_cert: Option,
pub guest_tls_key: Option,
+ /// Default OCI image used when the sandbox spec omits `template.image`.
+ /// Empty string means "no default" — sandboxes without an image will
+ /// fall back to the historical non-OCI guest rootfs supervisor.
+ pub default_image: String,
+ /// Path to the `mksquashfs` binary. When unset, OCI-image sandboxes
+ /// are rejected with `FailedPrecondition`.
+ pub mksquashfs_bin: Option,
}
impl Default for VmDriverConfig {
@@ -82,6 +89,8 @@ impl Default for VmDriverConfig {
guest_tls_ca: None,
guest_tls_cert: None,
guest_tls_key: None,
+ default_image: String::new(),
+ mksquashfs_bin: None,
}
}
}
@@ -173,12 +182,22 @@ struct SandboxRecord {
process: Arc>,
}
-#[derive(Debug, Clone)]
+#[derive(Clone)]
pub struct VmDriver {
config: VmDriverConfig,
launcher_bin: PathBuf,
registry: Arc>>,
events: broadcast::Sender,
+ /// Shared OCI cache and puller for this driver process.
+ /// Populated once per platform; `None` when the host arch is unsupported.
+ oci: Option>,
+}
+
+/// Lazily-initialized OCI state attached to the driver.
+pub struct VmOci {
+ pub puller: crate::oci::OciPuller,
+ pub cache: crate::oci::CacheLayout,
+ pub platform: crate::oci::Platform,
}
impl VmDriver {
@@ -207,11 +226,25 @@ impl VmDriver {
};
let (events, _) = broadcast::channel(WATCH_BUFFER);
+
+ let oci = crate::oci::Platform::host().map(|platform| {
+ let cache = crate::oci::CacheLayout::new(config.state_dir.join("oci-cache"));
+ // Errors here are surfaced lazily at first sandbox-create; the
+ // driver still starts so non-OCI sandboxes continue to work.
+ let _ = cache.ensure_dirs();
+ Arc::new(VmOci {
+ puller: crate::oci::OciPuller::new(platform),
+ cache,
+ platform,
+ })
+ });
+
Ok(Self {
config,
launcher_bin,
registry: Arc::new(Mutex::new(HashMap::new())),
events,
+ oci,
})
}
@@ -220,7 +253,7 @@ impl VmDriver {
GetCapabilitiesResponse {
driver_name: DRIVER_NAME.to_string(),
driver_version: openshell_core::VERSION.to_string(),
- default_image: String::new(),
+ default_image: self.config.default_image.clone(),
supports_gpu: false,
}
}
@@ -261,6 +294,9 @@ impl VmDriver {
})?;
}
+ let oci_launch = self.resolve_oci_launch(sandbox, &state_dir).await?;
+ let is_oci = oci_launch.is_some();
+
let console_output = state_dir.join("rootfs-console.log");
let mut command = Command::new(&self.launcher_bin);
command.kill_on_drop(true);
@@ -282,7 +318,15 @@ impl VmDriver {
command
.arg("--vm-port")
.arg(format!("{ssh_port}:{GUEST_SSH_PORT}"));
- for env in build_guest_environment(sandbox, &self.config) {
+ if let Some(oci) = oci_launch.as_ref() {
+ command.arg("--vm-ro-base-disk").arg(&oci.base_disk_path);
+ command.arg("--vm-state-disk").arg(&oci.state_disk_path);
+ }
+ let mut guest_env = build_guest_environment(sandbox, &self.config, is_oci);
+ if let Some(oci) = oci_launch.as_ref() {
+ guest_env.extend(oci.guest_env_vars.iter().cloned());
+ }
+ for env in guest_env {
command.arg("--vm-env").arg(env);
}
@@ -433,6 +477,76 @@ impl VmDriver {
snapshots
}
+ /// Run the OCI pipeline for this sandbox if `template.image` (or the
+ /// driver's default image) is set, and materialize the per-sandbox state
+ /// disk. Returns `None` for legacy non-OCI sandboxes.
+ async fn resolve_oci_launch(
+ &self,
+ sandbox: &Sandbox,
+ state_dir: &Path,
+ ) -> Result