diff --git a/.claude/launch.json b/.claude/launch.json
index 9ccce1cb4..0d123c94e 100644
--- a/.claude/launch.json
+++ b/.claude/launch.json
@@ -6,6 +6,24 @@
       "runtimeExecutable": "bash",
       "runtimeArgs": ["-c", "source ~/.nvm/nvm.sh && cd frontend && npm run dev"],
       "port": 5173
+    },
+    {
+      "name": "backend",
+      "runtimeExecutable": "bash",
+      "runtimeArgs": [
+        "-c",
+        "CUDA_VISIBLE_DEVICES='' uv run daydream-scope --port 8033"
+      ],
+      "port": 8033
+    },
+    {
+      "name": "scope-cloud",
+      "runtimeExecutable": "bash",
+      "runtimeArgs": [
+        "-c",
+        "CUDA_VISIBLE_DEVICES='' SCOPE_CLOUD_MODE=livepeer SCOPE_CLOUD_APP_ID='daydream/scope-livepeer-pr-971--preview/ws' uv run daydream-scope"
+      ],
+      "port": 8000
     }
   ]
 }
diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml
new file mode 100644
index 000000000..012aa0094
--- /dev/null
+++ b/.github/workflows/eval.yml
@@ -0,0 +1,65 @@
+name: Agent Evals
+
+# Manual-dispatch only. These evals hit the live Anthropic API, so we do NOT
+# run them on push/pull_request — they cost money and are inherently noisy.
+on:
+  workflow_dispatch:
+    inputs:
+      case:
+        description: "Case name to run (blank = all cases)"
+        required: false
+        default: ""
+      runs:
+        description: "Samples per case"
+        required: false
+        default: "5"
+      model:
+        description: "Model id override (blank = default)"
+        required: false
+        default: ""
+      fail_threshold:
+        description: "Overall pass-rate threshold (0-100; blank = no gate)"
+        required: false
+        default: ""
+
+jobs:
+  evals:
+    runs-on: ubuntu-latest
+    name: Run Scope agent evals
+    env:
+      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          enable-cache: true
+          version: "0.9.11"
+
+      - name: Install dependencies
+        run: uv sync --group dev
+
+      - name: Run evals
+        shell: bash
+        run: |
+          args=(--runs "${{ inputs.runs }}")
+          if [ -n "${{ inputs.case }}" ]; then
+            args+=(--case "${{ inputs.case }}")
+          fi
+          if [ -n "${{ inputs.model }}" ]; then
+            args+=(--model "${{ inputs.model }}")
+          fi
+          if [ -n "${{ inputs.fail_threshold }}" ]; then
+            args+=(--fail-threshold "${{ inputs.fail_threshold }}")
+          fi
+          uv run python -m evals "${args[@]}"
+
+      - name: Upload artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: eval-artifacts
+          path: evals/outputs/
+          retention-days: 14
diff --git a/.gitignore b/.gitignore
index e89385653..629a0789d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,3 +24,6 @@ notes/
 .cursor/
 .specstory/
 *.local*
+
+# Eval harness artifacts
+evals/outputs/
diff --git a/evals/README.md b/evals/README.md
new file mode 100644
index 000000000..173130a4e
--- /dev/null
+++ b/evals/README.md
@@ -0,0 +1,138 @@
+# Scope Agent Eval Harness
+
+Measures how often the agentic workflow builder produces a workflow that
+matches the user's intent from a single natural-language prompt.
+
+Each **case** = one prompt + structural checks. The runner drives the real
+agent via in-process ASGI (no uvicorn, no port) N times and prints a
+pass-rate table per case.
+
+## Quickstart
+
+```bash
+# Install deps (one-time):
+uv sync --group dev
+
+# Ensure an Anthropic key is set:
+export ANTHROPIC_API_KEY=sk-ant-...
+
+# Run everything, 5 samples per case (default):
+uv run python -m evals
+
+# Run just one case, 1 sample (fast smoke):
+uv run python -m evals --case starter-ltx-text-to-video --runs 1
+
+# Cheaper iteration:
+uv run python -m evals --model claude-haiku-4-5
+
+# Enforce a bar in CI-like mode:
+uv run python -m evals --runs 10 --fail-threshold 90
+```
+
+Artifacts land in `evals/outputs/<case>/r<NN>/`:
+
+- `proposal.json` — the full graph the agent proposed.
+- `meta.json` — pass/fail, failures, rationale, wall time.
+- `trace.jsonl` — every SSE event the agent emitted (one per line).
+
+## Authoring a case
+
+Drop a file in `evals/cases/my-case.yaml`:
+
+```yaml
+name: my-case
+description: one-line explanation of what good looks like
+prompt: |
+  A natural-language prompt — as if a user typed it into the agent chat.
+runs: 5
+expect:
+  # Each entry is a single-key mapping: {check_name: argument}.
+  - pipelines_include: [longlive]
+  - wire_present: { kind: vace_to_pipeline }
+  - no_validator_errors: true
+forbid:
+  - bad_handle_prefix: "parameter:"
+```
+
+### Available checks
+
+Registered in [`grader.py`](grader.py):
+
+| Check | Argument | Passes when… |
+| ----- | -------- | ------------ |
+| `pipelines_equal` | `[ids]` | Pipeline nodes' `pipeline_id`s exactly equal the set. |
+| `pipelines_include` | `[ids]` | Pipeline nodes include every id in the list (extras ok). |
+| `pipelines_count_at_least` | `int` | At least N pipeline nodes exist (any ids). Good for vague prompts. |
+| `lora_count_at_least` | `int` | Total LoRA entries across `lora` UI nodes ≥ N. |
+| `wire_present` | `{kind, …}` | An edge of the named kind exists. See below. |
+| `node_present` | `{type, count?, min_items?}` | ≥ `count` UI nodes of `type`. For `prompt_list`, `min_items` asserts list length. |
+| `no_validator_errors` | _(any)_ | `_validate_proposal()` returns zero errors on the graph. |
+| `bad_handle_prefix` | `"parameter:"` | (Forbid) No edge handle starts with the prefix. |
+| `orphan_sinks` | _(any)_ | (Forbid) Every top-level `sink` node has at least one incoming top-level `stream` edge. Catches cases where the agent tacks on an extra sink that isn't wired to anything. |
+
+`wire_present` kinds:
+
+| Kind | Extra args | Matches |
+| ---- | ---------- | ------- |
+| `slider_to_pipeline_param` | `target_handle: "param:noise_scale"` | UI-value node → pipeline's `targetHandle`. |
+| `vace_to_pipeline` | — | VACE UI node → pipeline's `param:__vace`. |
+| `image_to_vace` | — | Image (or value) node → VACE node's `param:ref_image`/`first_frame`/`last_frame`. |
+| `prompt_to_pipeline` | — | Any source → pipeline's `param:__prompt`. |
+| `lora_to_pipeline` | — | LoRA node → pipeline's `param:__loras`. |
+| `prompt_list_to_pipeline` | — | `prompt_list` UI node → pipeline's `param:__prompt`. |
+| `trigger_to_prompt_list` | — | Value source → `prompt_list`'s `param:trigger`/`param:cycle`. |
+| `pipeline_to_record` | — | A pipeline's stream output → a `record` UI node. |
+
+Adding a new check type = adding a function to `grader.py` and registering
+it in `CHECKS`. The YAML format picks it up automatically.
+
+### Case tone: precise vs. vague
+
+Real users send prompts across a wide range of specificity. Cases should
+cover that range:
+
+- **Precise** (`complex-krea-prompt-switch-record`) — the prompt names the
+  pipeline, exact counts, specific behaviors. Graders assert the precise
+  structure: `pipelines_include: [krea-realtime-video]`,
+  `node_present: { type: prompt_list, min_items: 5 }`, specific wires.
+- **Vague** (`vague-capture-moments`) — the prompt says what the user
+  wants to *do*, not how. Graders assert only what the intent clearly
+  implies (`pipelines_count_at_least: 1`, `node_present: { type: record }`).
+  The agent gets latitude on everything else; the eval measures whether
+  it makes reasonable choices.
+
+Prefer more vague cases as pass-rate on precise ones improves — vague
+ones surface filling-the-gaps failures that don't show up when every
+detail is spelled out.
+
+## Pytest integration
+
+A single smoke test at `tests/test_evals_smoke.py` runs one case under
+`@pytest.mark.eval`. Default `pytest` skips it (pytest-ini addopts
+`-m 'not eval'`). To include it:
+
+```bash
+uv run pytest -m eval
+```
+
+This only verifies the harness wires up end-to-end — it doesn't enforce
+pass-rates. For pass-rate enforcement, use `python -m evals`.
+
+## CI
+
+There is a `.github/workflows/eval.yml` that runs on manual dispatch only
+(`workflow_dispatch`). It is **not** hooked into `pull_request` or `push`
+— LLM evals cost money and are inherently noisy at the edges. Gate launch
+decisions on the number, not on PR green.
+
+## Design notes
+
+- The driver uses `httpx.ASGITransport` + `asgi-lifespan` so we hit the
+  real `/api/v1/agent/chat` endpoint without spawning a server. This is
+  the same endpoint the frontend uses, so behavior is identical to
+  production.
+- Each case spins up an isolated `AgentSession`; no cross-case
+  contamination. Conversation history does not leak between runs.
+- Grading is deterministic and structural. No LLM-as-judge in v1.
+- Model/provider overrides flow through the on-disk agent config file so
+  runs respect the same resolution order the server uses.
diff --git a/evals/__init__.py b/evals/__init__.py
new file mode 100644
index 000000000..42cc95f24
--- /dev/null
+++ b/evals/__init__.py
@@ -0,0 +1,10 @@
+"""Eval harness for the Scope agentic workflow builder.
+
+Each "case" is a YAML file in ``evals/cases/`` describing a natural-language
+prompt, how many times to sample the model, and structural checks to run on
+the resulting workflow proposal. The runner drives the real agent via an
+in-process ASGI transport and grades proposals deterministically.
+
+This package is NOT imported by the running server; it is only exercised by
+``python -m evals`` (CLI) and the opt-in ``pytest -m eval`` smoke test.
+"""
diff --git a/evals/__main__.py b/evals/__main__.py
new file mode 100644
index 000000000..dda39d6e0
--- /dev/null
+++ b/evals/__main__.py
@@ -0,0 +1,10 @@
+"""``python -m evals`` entry point."""
+
+from __future__ import annotations
+
+import sys
+
+from .runner import main
+
+if __name__ == "__main__":
+    raise SystemExit(main(sys.argv[1:]))
diff --git a/evals/case.py b/evals/case.py
new file mode 100644
index 000000000..8ccd4b5de
--- /dev/null
+++ b/evals/case.py
@@ -0,0 +1,104 @@
+"""YAML → Case dataclass loader for the eval harness.
+
+A case file looks like::
+
+    name: starter-mythical-creature
+    description: |
+      Reproduces the Mythical Creature teaching starter.
+    prompt: |
+      I want a slime creature ...
+    runs: 5
+    expect:
+      - pipelines_equal: [longlive]
+      - wire_present: { kind: vace_to_pipeline }
+    forbid:
+      - bad_handle_prefix: "parameter:"
+
+Each entry under ``expect`` / ``forbid`` is a single-key mapping whose key is
+the name of a check in :mod:`evals.grader` and whose value is the check
+argument. We deliberately keep the format flat and declarative so adding a
+case is just dropping a new YAML file.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+
+@dataclass
+class CheckSpec:
+    """One graded check: ``(name, arg)`` where ``name`` resolves to a function
+    in :mod:`evals.grader`."""
+
+    name: str
+    arg: Any
+
+
+@dataclass
+class Case:
+    name: str
+    prompt: str
+    description: str = ""
+    runs: int = 5
+    expect: list[CheckSpec] = field(default_factory=list)
+    forbid: list[CheckSpec] = field(default_factory=list)
+    source_path: Path | None = None
+    # When true, the case passes iff the agent did NOT emit a
+    # ``workflow_proposal`` SSE event. Used for runtime-tweak cases where
+    # the right tool is ``update_parameters`` and re-proposing the graph
+    # is the regression we want to catch.
+    forbid_proposal: bool = False
+
+
+def _parse_check_list(raw: list[Any], context: str) -> list[CheckSpec]:
+    """Convert a list of single-key mappings to ``CheckSpec``s."""
+    out: list[CheckSpec] = []
+    for idx, entry in enumerate(raw or []):
+        if not isinstance(entry, dict) or len(entry) != 1:
+            raise ValueError(
+                f"{context}[{idx}] must be a single-key mapping, got: {entry!r}"
+            )
+        ((name, arg),) = entry.items()
+        if not isinstance(name, str):
+            raise ValueError(f"{context}[{idx}] check name must be a string")
+        out.append(CheckSpec(name=name, arg=arg))
+    return out
+
+
+def load_case(path: Path) -> Case:
+    """Load a single case YAML file into a :class:`Case`."""
+    data = yaml.safe_load(path.read_text()) or {}
+    if not isinstance(data, dict):
+        raise ValueError(f"{path}: expected a mapping at top level")
+
+    name = data.get("name") or path.stem
+    prompt = data.get("prompt")
+    if not isinstance(prompt, str) or not prompt.strip():
+        raise ValueError(f"{path}: 'prompt' is required and must be a non-empty string")
+
+    runs = data.get("runs", 5)
+    if not isinstance(runs, int) or runs < 1:
+        raise ValueError(f"{path}: 'runs' must be a positive integer")
+
+    return Case(
+        name=str(name),
+        prompt=prompt,
+        description=str(data.get("description") or ""),
+        runs=runs,
+        expect=_parse_check_list(data.get("expect") or [], f"{path}:expect"),
+        forbid=_parse_check_list(data.get("forbid") or [], f"{path}:forbid"),
+        source_path=path,
+        forbid_proposal=bool(data.get("forbid_proposal", False)),
+    )
+
+
+def discover_cases(cases_dir: Path) -> list[Case]:
+    """Load every ``*.yaml`` / ``*.yml`` case in ``cases_dir``, alpha-sorted."""
+    paths = sorted(
+        p for p in cases_dir.iterdir() if p.suffix in (".yaml", ".yml") and p.is_file()
+    )
+    return [load_case(p) for p in paths]
diff --git a/evals/cases/complex-krea-prompt-switch-record.yaml b/evals/cases/complex-krea-prompt-switch-record.yaml
new file mode 100644
index 000000000..d088bc90c
--- /dev/null
+++ b/evals/cases/complex-krea-prompt-switch-record.yaml
@@ -0,0 +1,29 @@
+name: complex-krea-prompt-switch-record
+description: |
+  Multi-concept request that tests: picking a specific pipeline (krea), wiring
+  a reference image via VACE, using a prompt_list with ≥5 items driven by a
+  button/trigger, and wiring the output into a record node. Verbatim phrasing
+  of the kind a user would type.
+prompt: |
+  Make a krea workflow that allows me to supply a reference image, switch
+  between 5 prompts with a button press, and record the output.
+runs: 5
+expect:
+  - pipelines_include: [krea-realtime-video]
+  # Reference image path: krea supports VACE, so the reference image flows
+  # image -> vace -> pipeline's param:__vace aggregate.
+  - wire_present: { kind: image_to_vace }
+  - wire_present: { kind: vace_to_pipeline }
+  # Prompt switching: prompt_list node with at least 5 entries, its output
+  # feeding the pipeline's aggregate prompt input.
+  - node_present: { type: prompt_list, min_items: 5 }
+  - wire_present: { kind: prompt_list_to_pipeline }
+  - wire_present: { kind: trigger_to_prompt_list }
+  # Recording: at least one record node wired to the pipeline's stream output.
+  - node_present: { type: record }
+  - wire_present: { kind: pipeline_to_record }
+  - no_validator_errors: true
+forbid:
+  - bad_handle_prefix: "parameter:"
+  - orphan_sinks: true
+  - overlapping_nodes: true
diff --git a/evals/cases/complex-pipeline-name-respect.yaml b/evals/cases/complex-pipeline-name-respect.yaml
new file mode 100644
index 000000000..9b9f06090
--- /dev/null
+++ b/evals/cases/complex-pipeline-name-respect.yaml
@@ -0,0 +1,16 @@
+name: complex-pipeline-name-respect
+description: |
+  User explicitly names krea. Agent must pick krea-realtime-video, not
+  substitute a different pipeline. Pressure-tests the "honor the user's
+  pipeline name" rule added to CORE PRINCIPLES.
+prompt: |
+  Give me a krea workflow for my webcam with a slider for noise_scale.
+runs: 5
+expect:
+  - pipelines_include: [krea-realtime-video]
+  - wire_present: { kind: slider_to_pipeline_param, target_handle: "param:noise_scale" }
+  - no_validator_errors: true
+forbid:
+  - bad_handle_prefix: "parameter:"
+  - orphan_sinks: true
+  - overlapping_nodes: true
diff --git a/evals/cases/complex-reference-image-no-invented-handles.yaml b/evals/cases/complex-reference-image-no-invented-handles.yaml
new file mode 100644
index 000000000..e6f3771c1
--- /dev/null
+++ b/evals/cases/complex-reference-image-no-invented-handles.yaml
@@ -0,0 +1,19 @@
+name: complex-reference-image-no-invented-handles
+description: |
+  Asks for reference-image conditioning on a VACE-capable pipeline. The
+  only correct route is image → vace → pipeline.param:__vace — NOT an
+  invented param:i2v_image / param:ref handle. The backend validator
+  catches invented handles, but we assert the positive VACE path here
+  so the agent can't silently skip reference-image support either.
+prompt: |
+  Set up longlive with my webcam and a reference image I can swap out.
+runs: 5
+expect:
+  - pipelines_include: [longlive]
+  - wire_present: { kind: image_to_vace }
+  - wire_present: { kind: vace_to_pipeline }
+  - no_validator_errors: true
+forbid:
+  - bad_handle_prefix: "parameter:"
+  - orphan_sinks: true
+  - overlapping_nodes: true
diff --git a/evals/cases/layout-nodes-spaced.yaml b/evals/cases/layout-nodes-spaced.yaml
new file mode 100644
index 000000000..07a6abdb2
--- /dev/null
+++ b/evals/cases/layout-nodes-spaced.yaml
@@ -0,0 +1,27 @@
+name: layout-nodes-spaced
+description: |
+  Regression guard for node positioning. Observed failure mode: the agent
+  placed trigger/subgraph/image/slider UI nodes at x=0 and x=320, which
+  collides with the frontend's top-level auto-layout strip (sources at x=50,
+  pipelines at x=350). Result: nodes visually stacked on top of each other
+  on the canvas even though edges were correct.
+
+  This case exercises the failure surface (many UI-state nodes alongside
+  multiple top-level nodes) and grades that NO two nodes overlap — relying
+  on either the agent placing them correctly OR the server-side
+  ``_reflow_ui_nodes`` safety net kicking in.
+prompt: |
+  Build me a longlive workflow where I can control three things live: a
+  noise_scale slider, a num_steps slider, and a prompt_list with at least
+  3 prompts that I can switch between with separate trigger buttons. Wire
+  them all up so I can tweak each one while it's running.
+runs: 5
+expect:
+  - pipelines_include: [longlive]
+  - node_present: { type: slider }
+  - node_present: { type: prompt_list, min_items: 3 }
+  - no_validator_errors: true
+forbid:
+  - bad_handle_prefix: "parameter:"
+  - orphan_sinks: true
+  - overlapping_nodes: true
diff --git a/evals/cases/runtime-tweak-no-repropose.yaml b/evals/cases/runtime-tweak-no-repropose.yaml
new file mode 100644
index 000000000..ae9092251
--- /dev/null
+++ b/evals/cases/runtime-tweak-no-repropose.yaml
@@ -0,0 +1,19 @@
+name: runtime-tweak-no-repropose
+description: |
+  Regression guard for "I should never have to reload a workflow just to
+  change a parameter." Observed failure: after the user asks to tweak a
+  single runtime-settable param on an already-running pipeline, the agent
+  calls ``propose_workflow`` to rebuild the entire graph, forcing the user
+  to approve and reload. The right tool is ``update_parameters``, which is
+  a silent live-tweak.
+
+  We frame the prompt so the only reasonable interpretation is "change this
+  one number on the running graph" — no new nodes, no rewiring. The grader
+  then forbids ``workflow_proposal`` entirely for this case via the
+  ``forbid_proposal`` Case field.
+prompt: |
+  My longlive workflow is already loaded and streaming. I just want to
+  change noise_scale to 0.85. Don't rebuild or reload the workflow —
+  just update that parameter live.
+runs: 5
+forbid_proposal: true
diff --git a/evals/cases/starter-dissolving-sunflower.yaml b/evals/cases/starter-dissolving-sunflower.yaml
new file mode 100644
index 000000000..249fe043f
--- /dev/null
+++ b/evals/cases/starter-dissolving-sunflower.yaml
@@ -0,0 +1,23 @@
+name: starter-dissolving-sunflower
+description: |
+  Reproduces the "Dissolving Sunflower" teaching starter: a camera feed runs
+  through video-depth-anything first, then into longlive with a dissolve
+  LoRA and VACE reference, then out through a passthrough pipeline.
+prompt: |
+  Build me a depth-driven dissolve effect on my camera. Chain
+  video-depth-anything into longlive into passthrough. On the longlive node,
+  load the dissolve LoRA ("daydream-scope-dissolve.safetensors") at weight
+  around 1.5, and enable VACE with the depth output used as the reference
+  video. The main prompt should describe a "dissolving sunflower in
+  abstract particles".
+runs: 5
+expect:
+  - pipelines_include: [video-depth-anything, longlive, passthrough]
+  - lora_count_at_least: 1
+  - wire_present: { kind: vace_to_pipeline }
+  - wire_present: { kind: prompt_to_pipeline }
+  - no_validator_errors: true
+forbid:
+  - bad_handle_prefix: "parameter:"
+  - orphan_sinks: true
+  - overlapping_nodes: true
diff --git a/evals/cases/starter-ltx-text-to-video.yaml b/evals/cases/starter-ltx-text-to-video.yaml
new file mode 100644
index 000000000..eb46ad524
--- /dev/null
+++ b/evals/cases/starter-ltx-text-to-video.yaml
@@ -0,0 +1,19 @@
+name: starter-ltx-text-to-video
+description: |
+  Reproduces the "LTX 2.3" teaching starter: a single ltx2 pipeline running
+  in text-to-video mode driven by a prompt.
+prompt: |
+  I want a simple text-to-video workflow using the ltx2 pipeline. Just one
+  pipeline node, wired straight to an output sink. Set up the prompt input
+  so I can type what I want to generate — for now, have it read
+  "a majestic lion striding across an open savannah". No LoRAs, no VACE,
+  no camera input needed; ltx2 is generating from text.
+runs: 5
+expect:
+  - pipelines_equal: [ltx2]
+  - wire_present: { kind: prompt_to_pipeline }
+  - no_validator_errors: true
+forbid:
+  - bad_handle_prefix: "parameter:"
+  - orphan_sinks: true
+  - overlapping_nodes: true
diff --git a/evals/cases/starter-mythical-creature.yaml b/evals/cases/starter-mythical-creature.yaml
new file mode 100644
index 000000000..61a89fd2d
--- /dev/null
+++ b/evals/cases/starter-mythical-creature.yaml
@@ -0,0 +1,24 @@
+name: starter-mythical-creature
+description: |
+  Reproduces the "Mythical Creature" teaching starter: longlive pipeline with
+  two style LoRAs (acid-lime + dissolve) and VACE enabled, plus a slider for
+  noise_scale that the user can tweak live.
+prompt: |
+  I want to turn my webcam feed into a morphing slime creature. Use the
+  longlive pipeline. Load two LoRAs I already have by filename —
+  "diffslime_acidzlime-000016.safetensors" and
+  "daydream-scope-dissolve.safetensors" — both at weight ~1.5 in permanent
+  merge mode. Turn VACE on so I can feed the input video back in as a
+  reference. Give me a live slider wired to noise_scale so I can tweak it
+  while it's running.
+runs: 5
+expect:
+  - pipelines_include: [longlive]
+  - lora_count_at_least: 2
+  - wire_present: { kind: slider_to_pipeline_param, target_handle: "param:noise_scale" }
+  - wire_present: { kind: lora_to_pipeline }
+  - no_validator_errors: true
+forbid:
+  - bad_handle_prefix: "parameter:"
+  - orphan_sinks: true
+  - overlapping_nodes: true
diff --git a/evals/cases/vague-capture-moments.yaml b/evals/cases/vague-capture-moments.yaml
new file mode 100644
index 000000000..0e7e5a353
--- /dev/null
+++ b/evals/cases/vague-capture-moments.yaml
@@ -0,0 +1,22 @@
+name: vague-capture-moments
+description: |
+  Deliberately vague prompt — user says what they want to *do*, not which
+  pipeline or what kind of node graph. Graders only check the bits that the
+  intent clearly implies: camera/webcam source, *some* pipeline, and a way
+  to capture output. This pressure-tests the agent's ability to fill gaps.
+prompt: |
+  I want to play around with my webcam and capture anything cool that happens.
+  Set up something simple I can tweak live.
+runs: 5
+expect:
+  # Intent is clear: there must be at least one generative pipeline. We do
+  # NOT assert *which* pipeline — the agent gets to pick.
+  - pipelines_count_at_least: 1
+  # Intent is clear: a way to record / capture output.
+  - node_present: { type: record }
+  - wire_present: { kind: pipeline_to_record }
+  - no_validator_errors: true
+forbid:
+  - bad_handle_prefix: "parameter:"
+  - orphan_sinks: true
+  - overlapping_nodes: true
diff --git a/evals/cases/vague-save-the-output.yaml b/evals/cases/vague-save-the-output.yaml
new file mode 100644
index 000000000..e839a33e0
--- /dev/null
+++ b/evals/cases/vague-save-the-output.yaml
@@ -0,0 +1,20 @@
+name: vague-save-the-output
+description: |
+  "Save" / "keep what I make" / "capture" all mean "add a record node".
+  This exercises the recording completeness-check item added to the
+  SYSTEM_PROMPT. Uses natural phrasing ("saves whatever I make") rather
+  than the literal word "record" to test intent-matching, not keyword-
+  matching.
+prompt: |
+  Set me up with a passthrough pipeline on my camera and make sure it
+  saves whatever I make.
+runs: 5
+expect:
+  - pipelines_include: [passthrough]
+  - node_present: { type: record }
+  - wire_present: { kind: pipeline_to_record }
+  - no_validator_errors: true
+forbid:
+  - bad_handle_prefix: "parameter:"
+  - orphan_sinks: true
+  - overlapping_nodes: true
diff --git a/evals/driver.py b/evals/driver.py
new file mode 100644
index 000000000..cb48da2ac
--- /dev/null
+++ b/evals/driver.py
@@ -0,0 +1,148 @@
+"""In-process driver for the Scope agent.
+
+Hits ``POST /api/v1/agent/chat`` via ``httpx.ASGITransport`` + ``asgi-lifespan``
+so no uvicorn server or port is needed. Parses the SSE stream, captures every
+event as a structured trace, and pulls out the final ``workflow_proposal``
+payload if the agent produced one.
+
+Contract::
+
+    result = await run_case(app, prompt, model=..., provider=...)
+    result.proposal  # dict | None — the `graph` from the workflow_proposal SSE
+    result.trace     # list[{event, data}] — every SSE event, in order
+    result.error     # str | None — provider/transport error if any
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from collections.abc import AsyncIterator
+from dataclasses import dataclass, field
+from typing import Any
+
+import httpx
+from asgi_lifespan import LifespanManager
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class DriveResult:
+    proposal: dict | None = None  # the 'graph' from workflow_proposal SSE
+    proposal_id: str | None = None
+    rationale: str = ""
+    trace: list[dict] = field(default_factory=list)
+    error: str | None = None
+    session_id: str | None = None
+
+
+async def _parse_sse_stream(resp: httpx.Response) -> AsyncIterator[dict]:
+    """Yield ``{event, data}`` dicts. Swallows malformed lines."""
+    current_event: str | None = None
+    async for raw_line in resp.aiter_lines():
+        if raw_line == "":
+            current_event = None
+            continue
+        line = raw_line.rstrip("\r")
+        if line.startswith("event:"):
+            current_event = line.split(":", 1)[1].strip()
+        elif line.startswith("data:"):
+            payload = line.split(":", 1)[1].strip()
+            try:
+                data = json.loads(payload)
+            except Exception:
+                data = {"_raw": payload}
+            yield {"event": current_event or "message", "data": data}
+
+
+async def run_case(
+    app: Any,
+    prompt: str,
+    *,
+    model_override: str | None = None,
+    provider_override: str | None = None,
+) -> DriveResult:
+    """Drive one agent turn with ``prompt`` and return the captured result.
+
+    ``app`` is the FastAPI app instance (usually ``scope.server.app.app``).
+    We pass a fresh session_id=None so the store mints one for each case —
+    no cross-case contamination.
+    """
+    # Apply provider/model overrides by writing to the config file on disk
+    # (that's what the app reads). We rely on the caller to have scoped this
+    # via EnvOverride if they want to reset it after.
+    if model_override or provider_override:
+        _patch_agent_config(model=model_override, provider=provider_override)
+
+    result = DriveResult()
+    transport = httpx.ASGITransport(app=app)
+    try:
+        # Scope's startup runs plugin installs, pipeline registration, WebRTC
+        # setup, and OSC init — way past asgi-lifespan's 5s default. Give it
+        # plenty of headroom; a cold first run on CI can take >30s.
+        async with LifespanManager(app, startup_timeout=180, shutdown_timeout=30):
+            async with httpx.AsyncClient(
+                transport=transport,
+                base_url="http://scope-eval.local",
+                timeout=httpx.Timeout(300.0, connect=10.0),
+            ) as client:
+                async with client.stream(
+                    "POST",
+                    "/api/v1/agent/chat",
+                    json={"message": prompt},
+                ) as resp:
+                    if resp.status_code != 200:
+                        body = await resp.aread()
+                        result.error = (
+                            f"HTTP {resp.status_code}: "
+                            f"{body.decode('utf-8', errors='replace')[:500]}"
+                        )
+                        return result
+                    result.session_id = resp.headers.get("x-agent-session-id")
+                    async for evt in _parse_sse_stream(resp):
+                        result.trace.append(evt)
+                        name = evt["event"]
+                        data = evt["data"]
+                        if name == "workflow_proposal":
+                            # First proposal wins — agent should only emit one.
+                            if result.proposal is None:
+                                result.proposal = data.get("graph")
+                                result.proposal_id = data.get("proposal_id")
+                                result.rationale = data.get("rationale") or ""
+                        elif name == "error":
+                            # Don't short-circuit — the turn_end still arrives
+                            # and the trace is useful for debugging.
+                            msg = data.get("message") or str(data)
+                            result.error = (result.error or "") + msg + "\n"
+                        elif name == "turn_end":
+                            # Agent finished. We don't need more events.
+                            break
+    except Exception as e:
+        logger.exception("driver transport error")
+        result.error = f"{type(e).__name__}: {e}"
+    return result
+
+
+def _patch_agent_config(*, model: str | None, provider: str | None) -> None:
+    """Best-effort update of the on-disk agent config. Safe to call repeatedly.
+
+    We do this by loading, mutating, saving via the same helpers the server
+    uses, so we respect any fields we don't know about.
+    """
+    from scope.server.agent_state import (
+        AgentConfig,
+        load_agent_config,
+        save_agent_config,
+    )
+
+    cfg = load_agent_config()
+    if provider:
+        cfg = AgentConfig(
+            provider=provider,  # type: ignore[arg-type]
+            model=model or cfg.model,
+            base_url=cfg.base_url,
+        )
+    elif model:
+        cfg = AgentConfig(provider=cfg.provider, model=model, base_url=cfg.base_url)
+    save_agent_config(cfg)
diff --git a/evals/grader.py b/evals/grader.py
new file mode 100644
index 000000000..8a041b203
--- /dev/null
+++ b/evals/grader.py
@@ -0,0 +1,586 @@
+"""Structural graders for workflow proposals.
+
+Each check is a pure function ``(graph, arg) -> CheckResult`` registered in
+:data:`CHECKS`. The YAML case format references checks by name (see
+:mod:`evals.case`), so adding a new check is: write a function, register it,
+reference it from a case file.
+
+We intentionally favor simple boolean-with-reason checks over complex
+"structural equivalence" comparisons — the three canonical failure modes
+we're trying to catch (missing VACE wire, unwired prompt, missing slider
+for a called-out parameter) are all detectable with trivial traversals.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+from dataclasses import dataclass
+from typing import Any
+
+# Re-use the backend validator so a regression there immediately shows up
+# here too.
+from scope.server.agent_tool_impls import (
+    _derive_pipeline_handles,
+    _validate_proposal,
+)
+
+
+@dataclass
+class CheckResult:
+    ok: bool
+    detail: str
+
+    @classmethod
+    def ok_(cls, detail: str = "") -> CheckResult:
+        return cls(ok=True, detail=detail)
+
+    @classmethod
+    def fail(cls, detail: str) -> CheckResult:
+        return cls(ok=False, detail=detail)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _pipeline_ids(graph: dict) -> list[str]:
+    return [
+        n["pipeline_id"]
+        for n in graph.get("nodes", []) or []
+        if n.get("type") == "pipeline" and n.get("pipeline_id")
+    ]
+
+
+def _pipeline_node_ids(graph: dict) -> set[str]:
+    return {
+        n["id"]
+        for n in graph.get("nodes", []) or []
+        if n.get("type") == "pipeline" and n.get("id")
+    }
+
+
+def _ui_nodes(graph: dict) -> list[dict]:
+    return (graph.get("ui_state") or {}).get("nodes") or []
+
+
+def _ui_edges(graph: dict) -> list[dict]:
+    return (graph.get("ui_state") or {}).get("edges") or []
+
+
+def _ui_node_type(graph: dict, node_id: str) -> str | None:
+    for n in _ui_nodes(graph):
+        if n.get("id") == node_id:
+            return n.get("type")
+    return None
+
+
+# Top-level (backend) graph helpers. The backend graph only accepts these
+# four node types; anything else lives in ui_state (see SYSTEM_PROMPT's
+# GRAPH SHAPE section).
+_TOP_LEVEL_TYPES = {"source", "pipeline", "sink", "record"}
+
+
+def _top_level_nodes(graph: dict) -> list[dict]:
+    return graph.get("nodes") or []
+
+
+def _top_level_edges(graph: dict) -> list[dict]:
+    return graph.get("edges") or []
+
+
+def _top_level_node_type(graph: dict, node_id: str) -> str | None:
+    for n in _top_level_nodes(graph):
+        if n.get("id") == node_id:
+            return n.get("type")
+    return None
+
+
+def _nodes_of_type(graph: dict, want_type: str) -> list[dict]:
+    """Return all nodes of ``want_type`` from wherever they legally live.
+
+    Top-level kinds (source/pipeline/sink/record) are searched in the
+    backend graph; everything else (slider, vace, prompt_list, ...)
+    lives in ui_state. This matches the producer-side split enforced by
+    the SYSTEM_PROMPT + backend validator.
+    """
+    if want_type in _TOP_LEVEL_TYPES:
+        return [n for n in _top_level_nodes(graph) if n.get("type") == want_type]
+    return [n for n in _ui_nodes(graph) if n.get("type") == want_type]
+
+
+# ---------------------------------------------------------------------------
+# Checks — expect / forbid semantics are both `ok=True` means "assertion
+# holds". The runner inverts for forbid.
+# ---------------------------------------------------------------------------
+
+
+def pipelines_equal(graph: dict, arg: Any) -> CheckResult:
+    want = set(arg or [])
+    got = set(_pipeline_ids(graph))
+    if got == want:
+        return CheckResult.ok_(f"pipelines={sorted(got)}")
+    missing = sorted(want - got)
+    extra = sorted(got - want)
+    parts = []
+    if missing:
+        parts.append(f"missing={missing}")
+    if extra:
+        parts.append(f"extra={extra}")
+    return CheckResult.fail(", ".join(parts) or f"got={sorted(got)}")
+
+
+def pipelines_include(graph: dict, arg: Any) -> CheckResult:
+    want = set(arg or [])
+    got = set(_pipeline_ids(graph))
+    missing = sorted(want - got)
+    if missing:
+        return CheckResult.fail(f"missing={missing}, got={sorted(got)}")
+    return CheckResult.ok_(f"got={sorted(got)}")
+
+
+def pipelines_count_at_least(graph: dict, arg: Any) -> CheckResult:
+    """Assert at least N pipeline nodes exist, without pinning which ones.
+
+    Useful for vague prompts where the agent gets to pick the pipeline.
+    """
+    min_count = int(arg)
+    got = _pipeline_ids(graph)
+    if len(got) >= min_count:
+        return CheckResult.ok_(f"{len(got)} pipeline(s): {sorted(set(got))}")
+    return CheckResult.fail(f"need >= {min_count} pipeline(s), got {len(got)}")
+
+
+def lora_count_at_least(graph: dict, arg: Any) -> CheckResult:
+    min_count = int(arg)
+    # Two reasonable places: a dedicated `lora` UI node, or a `lora` node
+    # with multiple entries in data.loras[]. Sum across both.
+    total = 0
+    lora_node_count = 0
+    for n in _ui_nodes(graph):
+        if n.get("type") == "lora":
+            lora_node_count += 1
+            inner = (n.get("data") or {}).get("loras") or []
+            total += max(1, len(inner))
+    if total >= min_count:
+        return CheckResult.ok_(
+            f"{total} lora entr(ies) across {lora_node_count} node(s)"
+        )
+    return CheckResult.fail(
+        f"need >= {min_count}, found {total} across {lora_node_count} lora node(s)"
+    )
+
+
+def no_validator_errors(graph: dict, _arg: Any) -> CheckResult:
+    """Re-run the backend validator. Getting a proposal at all implies this
+    passed once, but we re-assert so a silent regression in the validator is
+    still surfaced by the harness."""
+    # Build a minimal handles lookup so the validator can check pipeline
+    # targets. For pipelines we don't know, fall back to an empty shape;
+    # validator will treat that as "unknown" and only report errors on
+    # clearly-malformed edges rather than on handle-existence.
+    handles: dict[str, dict] = {}
+    for pid in set(_pipeline_ids(graph)):
+        # We don't have the live registry here; synthesize a permissive shape
+        # by deriving from an empty schema. Unknown-handle checks still fire
+        # for bad prefixes but not for handle names we can't verify.
+        handles[pid] = _derive_pipeline_handles(
+            pid,
+            {
+                "supports_prompts": True,
+                "supports_vace": True,
+                "supports_lora": True,
+                "produces_video": True,
+                "config_schema": {"properties": {}},
+            },
+        )
+    issues = _validate_proposal(graph, handles)
+    errs = [i for i in issues if i.get("severity") == "error"]
+    if errs:
+        first = errs[0].get("message", "")
+        return CheckResult.fail(f"{len(errs)} validator error(s); first: {first}")
+    return CheckResult.ok_("0 validator errors")
+
+
+# ---------------------------------------------------------------------------
+# wire_present — one check with a `kind` discriminator.
+# ---------------------------------------------------------------------------
+
+
+_VALUE_SOURCE_TYPES = {
+    "slider",
+    "knobs",
+    "primitive",
+    "trigger",
+    "control",
+    "subgraph",
+    "math",
+}
+
+
+def _edges_into(
+    graph: dict, target_id: str, target_handle: str | None = None
+) -> list[dict]:
+    out = []
+    for e in _ui_edges(graph):
+        if e.get("target") != target_id:
+            continue
+        if target_handle is not None and e.get("targetHandle") != target_handle:
+            continue
+        out.append(e)
+    return out
+
+
+def _edges_into_any_pipeline(graph: dict, target_handle: str) -> list[dict]:
+    pipe_ids = _pipeline_node_ids(graph)
+    out = []
+    for e in _ui_edges(graph):
+        if e.get("target") in pipe_ids and e.get("targetHandle") == target_handle:
+            out.append(e)
+    return out
+
+
+def wire_present(graph: dict, arg: Any) -> CheckResult:
+    if not isinstance(arg, dict) or "kind" not in arg:
+        return CheckResult.fail(f"wire_present needs {{kind: ...}}, got {arg!r}")
+    kind = arg["kind"]
+
+    if kind == "slider_to_pipeline_param":
+        target_handle = arg.get("target_handle")
+        if not target_handle:
+            return CheckResult.fail("slider_to_pipeline_param needs target_handle")
+        hits = _edges_into_any_pipeline(graph, target_handle)
+        if not hits:
+            return CheckResult.fail(
+                f"no ui_state edge targets a pipeline's {target_handle}"
+            )
+        # Source must be a value-producing UI node type.
+        for e in hits:
+            src_t = _ui_node_type(graph, e.get("source"))
+            if src_t in _VALUE_SOURCE_TYPES:
+                return CheckResult.ok_(f"{src_t}({e.get('source')}) -> {target_handle}")
+        return CheckResult.fail(
+            f"edge(s) into {target_handle} exist but none originate from "
+            f"a value-producing node (types: {sorted(_VALUE_SOURCE_TYPES)})"
+        )
+
+    if kind == "vace_to_pipeline":
+        hits = _edges_into_any_pipeline(graph, "param:__vace")
+        if not hits:
+            return CheckResult.fail("no edge targets pipeline's param:__vace")
+        for e in hits:
+            if _ui_node_type(graph, e.get("source")) == "vace":
+                return CheckResult.ok_(f"vace({e.get('source')}) -> param:__vace")
+        return CheckResult.fail(
+            "param:__vace edge exists but source is not a vace node"
+        )
+
+    if kind == "image_to_vace":
+        vace_handles = {"param:ref_image", "param:first_frame", "param:last_frame"}
+        for e in _ui_edges(graph):
+            tgt_t = _ui_node_type(graph, e.get("target"))
+            if tgt_t == "vace" and e.get("targetHandle") in vace_handles:
+                src_t = _ui_node_type(graph, e.get("source"))
+                # Accept either a dedicated 'image' node or a generic value
+                # source (primitive holding a path).
+                if src_t in {"image"} | _VALUE_SOURCE_TYPES:
+                    return CheckResult.ok_(
+                        f"{src_t}({e.get('source')}) -> vace.{e.get('targetHandle')}"
+                    )
+        return CheckResult.fail(
+            "no edge into a vace node's ref_image/first_frame/last_frame"
+        )
+
+    if kind == "prompt_to_pipeline":
+        hits = _edges_into_any_pipeline(graph, "param:__prompt")
+        if hits:
+            return CheckResult.ok_(f"{len(hits)} edge(s) -> param:__prompt")
+        return CheckResult.fail("no edge targets pipeline's param:__prompt")
+
+    if kind == "lora_to_pipeline":
+        hits = _edges_into_any_pipeline(graph, "param:__loras")
+        if hits:
+            return CheckResult.ok_(f"{len(hits)} edge(s) -> param:__loras")
+        return CheckResult.fail("no edge targets pipeline's param:__loras")
+
+    if kind == "pipeline_to_record":
+        # A record node is a top-level node type; the canonical wiring is a
+        # top-level stream edge `pipeline -> record`. We also accept a
+        # ui_state-shaped edge from a pipeline to a record node, since
+        # either is permissible at the schema level.
+        pipe_ids = _pipeline_node_ids(graph)
+
+        # Top-level form: {"from": <pipe>, "to_node": <rec>, "kind": "stream"}.
+        for e in _top_level_edges(graph):
+            if e.get("from") not in pipe_ids:
+                continue
+            if _top_level_node_type(graph, e.get("to_node")) != "record":
+                continue
+            if e.get("kind") != "stream":
+                continue
+            return CheckResult.ok_(
+                f"pipeline({e.get('from')}) -> record({e.get('to_node')}) "
+                f"(top-level stream edge)"
+            )
+
+        # ui_state form (less common but legal for composed graphs).
+        for e in _ui_edges(graph):
+            if e.get("source") not in pipe_ids:
+                continue
+            if _ui_node_type(graph, e.get("target")) != "record":
+                continue
+            sh = e.get("sourceHandle") or ""
+            if isinstance(sh, str) and sh.startswith("stream:"):
+                return CheckResult.ok_(
+                    f"pipeline({e.get('source')}) -> record({e.get('target')}) "
+                    f"via ui_state {sh}"
+                )
+        return CheckResult.fail(
+            "no stream edge (top-level or ui_state) wires a pipeline "
+            "output into a record node"
+        )
+
+    if kind == "prompt_list_to_pipeline":
+        # prompt_list node's param:prompt output → pipeline's param:__prompt.
+        hits = _edges_into_any_pipeline(graph, "param:__prompt")
+        if not hits:
+            return CheckResult.fail("no edge targets pipeline's param:__prompt")
+        for e in hits:
+            if _ui_node_type(graph, e.get("source")) == "prompt_list":
+                return CheckResult.ok_(
+                    f"prompt_list({e.get('source')}) -> param:__prompt"
+                )
+        return CheckResult.fail(
+            "param:__prompt edge exists but source is not a prompt_list node"
+        )
+
+    if kind == "trigger_to_prompt_list":
+        # Some value source → prompt_list's param:trigger (or param:cycle).
+        accepted = {"param:trigger", "param:cycle"}
+        for e in _ui_edges(graph):
+            if _ui_node_type(graph, e.get("target")) != "prompt_list":
+                continue
+            if e.get("targetHandle") not in accepted:
+                continue
+            src_t = _ui_node_type(graph, e.get("source"))
+            if src_t in _VALUE_SOURCE_TYPES:
+                return CheckResult.ok_(
+                    f"{src_t}({e.get('source')}) -> prompt_list.{e.get('targetHandle')}"
+                )
+        return CheckResult.fail(
+            "no edge from a value-producing source into a prompt_list's "
+            "param:trigger or param:cycle"
+        )
+
+    return CheckResult.fail(f"unknown wire_present kind: {kind!r}")
+
+
+def node_present(graph: dict, arg: Any) -> CheckResult:
+    """Assert at least N nodes of a given type exist.
+
+    arg: ``{type: "record", count: 1, min_items: 5}``
+    - ``type`` (required) — node type. Top-level kinds
+      (source/pipeline/sink/record) are searched in the backend graph;
+      everything else (slider, vace, prompt_list, ...) in ui_state.
+    - ``count`` (default 1) — minimum number of nodes of that type.
+    - ``min_items`` (optional) — if set AND type=="prompt_list", at least one
+      such node must have ``data.promptListItems`` of length ≥ min_items.
+    """
+    if not isinstance(arg, dict) or "type" not in arg:
+        return CheckResult.fail(f"node_present needs {{type: ...}}, got {arg!r}")
+    want_type = arg["type"]
+    want_count = int(arg.get("count", 1))
+    min_items = arg.get("min_items")
+
+    nodes = _nodes_of_type(graph, want_type)
+    if len(nodes) < want_count:
+        return CheckResult.fail(
+            f"need >= {want_count} node(s) of type {want_type!r}, got {len(nodes)}"
+        )
+
+    if min_items is not None:
+        # Look for at least one node whose item list is long enough.
+        threshold = int(min_items)
+        max_seen = 0
+        for n in nodes:
+            items = (n.get("data") or {}).get("promptListItems") or []
+            if isinstance(items, list):
+                max_seen = max(max_seen, len(items))
+        if max_seen < threshold:
+            return CheckResult.fail(
+                f"{want_type} exists but longest promptListItems is {max_seen}, "
+                f"need >= {threshold}"
+            )
+        return CheckResult.ok_(
+            f"{len(nodes)} {want_type} node(s); longest list has {max_seen} item(s)"
+        )
+
+    return CheckResult.ok_(f"{len(nodes)} {want_type} node(s)")
+
+
+# ---------------------------------------------------------------------------
+# forbid checks
+# ---------------------------------------------------------------------------
+
+
+def bad_handle_prefix(graph: dict, arg: Any) -> CheckResult:
+    """Forbid check: returns ok=True if NO edge uses the given prefix."""
+    prefix = str(arg)
+    for e in _ui_edges(graph):
+        for side in ("sourceHandle", "targetHandle"):
+            h = e.get(side)
+            if isinstance(h, str) and h.startswith(prefix):
+                return CheckResult.fail(
+                    f"edge {e.get('id', '?')} {side}={h!r} starts with forbidden {prefix!r}"
+                )
+    return CheckResult.ok_(f"no edge handle starts with {prefix!r}")
+
+
+def overlapping_nodes(graph: dict, _arg: Any) -> CheckResult:
+    """Forbid check: no two nodes on the canvas may overlap.
+
+    Observed failure: the agent picks UI-node positions like (0,0), (0,80),
+    (320,40) that look "neat" in isolation but collide with the frontend's
+    top-level auto-layout strip (sources at x=50, pipelines at x=350, sinks
+    at x=650, records at x=950). The server-side ``_reflow_ui_nodes`` should
+    catch this and reassign, so this check is a regression detector: if it
+    ever fires in an eval, either the agent is producing new layout patterns
+    reflow doesn't cover OR reflow has a bug.
+
+    We use the same bounding-box logic as ``_reflow_ui_nodes``: UI nodes are
+    240×140 (280 tall for image/vace/subgraph), top-level nodes are the
+    200×60 that ``graphConfigToFlow`` drops at x=50/350/650/950, row-spaced
+    by 160 starting at y=50.
+    """
+    # Mirror the constants used by the server-side reflow (keeping them
+    # duplicated here is intentional — if either set drifts, the eval is
+    # exactly the place we want to catch it).
+    FE_START_X = 50
+    FE_START_Y = 50
+    FE_COLUMN_GAP = 300
+    FE_ROW_GAP = 100
+    FE_NODE_W = 200
+    FE_NODE_H = 60
+
+    UI_NODE_W = 240
+    UI_NODE_H_DEFAULT = 140
+    UI_NODE_H_TALL = 280
+    TALL_TYPES = {"image", "vace", "subgraph"}
+
+    type_to_col = {"source": 0, "pipeline": 1, "sink": 2, "record": 3}
+
+    def rects_overlap(
+        a: tuple[float, float, float, float],
+        b: tuple[float, float, float, float],
+    ) -> bool:
+        ax, ay, aw, ah = a
+        bx, by, bw, bh = b
+        return not (ax + aw <= bx or bx + bw <= ax or ay + ah <= by or by + bh <= ay)
+
+    # Predict top-level rectangles the frontend will render.
+    top_by_col: dict[int, list[str]] = {}
+    for n in _top_level_nodes(graph):
+        col = type_to_col.get(n.get("type"))
+        if col is None or not n.get("id"):
+            continue
+        top_by_col.setdefault(col, []).append(n["id"])
+
+    rects: list[tuple[tuple[float, float, float, float], str]] = []
+    for col, ids in top_by_col.items():
+        for i, nid in enumerate(ids):
+            rects.append(
+                (
+                    (
+                        float(FE_START_X + col * FE_COLUMN_GAP),
+                        float(FE_START_Y + i * (FE_NODE_H + FE_ROW_GAP)),
+                        float(FE_NODE_W),
+                        float(FE_NODE_H),
+                    ),
+                    f"top:{nid}",
+                )
+            )
+
+    # UI-state rectangles use whatever position the agent (or reflow) set.
+    for n in _ui_nodes(graph):
+        pos = n.get("position") or {}
+        try:
+            x = float(pos.get("x", 0))
+            y = float(pos.get("y", 0))
+        except (TypeError, ValueError):
+            return CheckResult.fail(
+                f"ui node {n.get('id')!r} has invalid position {pos!r}"
+            )
+        h = UI_NODE_H_TALL if n.get("type") in TALL_TYPES else UI_NODE_H_DEFAULT
+        rects.append(((x, y, float(UI_NODE_W), float(h)), f"ui:{n.get('id') or '?'}"))
+
+    for i, (ra, ida) in enumerate(rects):
+        for j in range(i + 1, len(rects)):
+            rb, idb = rects[j]
+            if rects_overlap(ra, rb):
+                return CheckResult.fail(f"{ida} overlaps {idb}")
+
+    return CheckResult.ok_(f"no overlaps among {len(rects)} node(s)")
+
+
+def orphan_sinks(graph: dict, _arg: Any) -> CheckResult:
+    """Forbid check: every top-level sink must have an incoming stream edge.
+
+    Observed failure: agent occasionally emits a second ``sink`` node not
+    wired to anything, producing a valid-but-dead canvas element. Passes
+    validation (disconnected sinks aren't illegal) but is obviously wrong.
+
+    We scan top-level ``graph.edges`` for any ``stream`` edge whose
+    ``to_node`` is each top-level sink. A sink with zero such edges is an
+    orphan.
+    """
+    sinks = [n for n in _top_level_nodes(graph) if n.get("type") == "sink"]
+    if not sinks:
+        # No sinks at all isn't what this check is about — other checks
+        # can assert presence if they need to.
+        return CheckResult.ok_("no sinks to inspect")
+
+    orphans: list[str] = []
+    for s in sinks:
+        sink_id = s.get("id")
+        has_incoming = any(
+            e.get("to_node") == sink_id and e.get("kind") == "stream"
+            for e in _top_level_edges(graph)
+        )
+        if not has_incoming:
+            orphans.append(str(sink_id))
+
+    if orphans:
+        return CheckResult.fail(
+            f"{len(orphans)}/{len(sinks)} sink(s) have no incoming stream edge: "
+            f"{orphans}"
+        )
+    return CheckResult.ok_(f"all {len(sinks)} sink(s) wired")
+
+
+# ---------------------------------------------------------------------------
+# Registry
+# ---------------------------------------------------------------------------
+
+
+CHECKS: dict[str, Callable[[dict, Any], CheckResult]] = {
+    "pipelines_equal": pipelines_equal,
+    "pipelines_include": pipelines_include,
+    "pipelines_count_at_least": pipelines_count_at_least,
+    "lora_count_at_least": lora_count_at_least,
+    "no_validator_errors": no_validator_errors,
+    "wire_present": wire_present,
+    "node_present": node_present,
+    "bad_handle_prefix": bad_handle_prefix,
+    "orphan_sinks": orphan_sinks,
+    "overlapping_nodes": overlapping_nodes,
+}
+
+
+def run_check(name: str, graph: dict, arg: Any) -> CheckResult:
+    fn = CHECKS.get(name)
+    if fn is None:
+        return CheckResult.fail(f"unknown check: {name!r}")
+    try:
+        return fn(graph, arg)
+    except Exception as e:  # defensive — a buggy check must not kill the run
+        return CheckResult.fail(f"{type(e).__name__} in check {name}: {e}")
diff --git a/evals/runner.py b/evals/runner.py
new file mode 100644
index 000000000..863db3021
--- /dev/null
+++ b/evals/runner.py
@@ -0,0 +1,343 @@
+"""Runner: execute cases, grade proposals, print a summary, dump artifacts."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import os
+import sys
+from dataclasses import dataclass, field
+from pathlib import Path
+from time import perf_counter
+
+from .case import Case, discover_cases, load_case
+from .driver import DriveResult, run_case
+from .grader import run_check
+
+logger = logging.getLogger("evals")
+
+
+EVALS_ROOT = Path(__file__).resolve().parent
+DEFAULT_CASES_DIR = EVALS_ROOT / "cases"
+DEFAULT_OUTPUT_DIR = EVALS_ROOT / "outputs"
+
+
+@dataclass
+class RunResult:
+    case_name: str
+    run_index: int
+    passed: bool
+    failures: list[str] = field(default_factory=list)
+    drive: DriveResult | None = None
+    wall_seconds: float = 0.0
+
+
+@dataclass
+class CaseSummary:
+    case: Case
+    runs: list[RunResult]
+
+    @property
+    def pass_count(self) -> int:
+        return sum(1 for r in self.runs if r.passed)
+
+    @property
+    def rate_pct(self) -> float:
+        if not self.runs:
+            return 0.0
+        return 100.0 * self.pass_count / len(self.runs)
+
+    @property
+    def grouped_failures(self) -> list[str]:
+        """Human-readable failure labels grouped by run index."""
+        return [
+            f"r{r.run_index}: {'; '.join(r.failures)}"
+            for r in self.runs
+            if not r.passed and r.failures
+        ]
+
+
+# ---------------------------------------------------------------------------
+# Grading
+# ---------------------------------------------------------------------------
+
+
+def _grade(case: Case, drive: DriveResult) -> tuple[bool, list[str]]:
+    """Return ``(passed, failure_reasons)`` for a single run."""
+    failures: list[str] = []
+
+    # Runtime-tweak cases: the agent must NOT emit workflow_proposal. If
+    # it does, that's a regression ("the user asked for a param change
+    # but we rebuilt the whole graph"). No graph-based checks are run —
+    # there shouldn't be a graph to check.
+    if case.forbid_proposal:
+        if drive.error and drive.proposal is None:
+            return False, [f"driver error: {drive.error.strip()[:200]}"]
+        if drive.proposal is not None:
+            return False, [
+                "forbid_proposal: agent emitted workflow_proposal when it "
+                "should have used update_parameters instead"
+            ]
+        return True, []
+
+    if drive.error and drive.proposal is None:
+        return False, [f"driver error: {drive.error.strip()[:200]}"]
+
+    if drive.proposal is None:
+        return False, [
+            "no workflow_proposal SSE event seen — agent likely gave a "
+            "text-only response or failed before proposing"
+        ]
+
+    graph = drive.proposal
+
+    def _fail(name: str, arg: object, detail: str) -> None:
+        arg_repr = json.dumps(arg, default=str) if not isinstance(arg, str) else arg
+        failures.append(f"{name}({arg_repr}): {detail}")
+
+    for spec in case.expect:
+        res = run_check(spec.name, graph, spec.arg)
+        if not res.ok:
+            _fail(spec.name, spec.arg, res.detail)
+
+    # `forbid`: check returning ok=True means the forbidden pattern was
+    # NOT present, which is the success condition. Checks in forbid are
+    # the same named functions as in expect; we invert nothing — the
+    # `bad_handle_prefix` etc. are themselves phrased as "ok if absent".
+    for spec in case.forbid:
+        res = run_check(spec.name, graph, spec.arg)
+        if not res.ok:
+            _fail(f"forbid.{spec.name}", spec.arg, res.detail)
+
+    return (not failures), failures
+
+
+# ---------------------------------------------------------------------------
+# Artifact writing
+# ---------------------------------------------------------------------------
+
+
+def _write_artifacts(
+    output_dir: Path, case_name: str, run_index: int, run: RunResult
+) -> None:
+    out = output_dir / case_name / f"r{run_index:02d}"
+    out.mkdir(parents=True, exist_ok=True)
+    drive = run.drive or DriveResult()
+    (out / "proposal.json").write_text(
+        json.dumps(drive.proposal or {}, indent=2, default=str)
+    )
+    (out / "meta.json").write_text(
+        json.dumps(
+            {
+                "case": case_name,
+                "run_index": run_index,
+                "passed": run.passed,
+                "failures": run.failures,
+                "rationale": drive.rationale,
+                "proposal_id": drive.proposal_id,
+                "session_id": drive.session_id,
+                "wall_seconds": round(run.wall_seconds, 3),
+                "error": drive.error,
+            },
+            indent=2,
+            default=str,
+        )
+    )
+    # SSE trace as JSONL for easy grepping.
+    with (out / "trace.jsonl").open("w") as f:
+        for evt in drive.trace:
+            f.write(json.dumps(evt, default=str) + "\n")
+
+
+# ---------------------------------------------------------------------------
+# Orchestration
+# ---------------------------------------------------------------------------
+
+
+async def _run_single(
+    app,
+    case: Case,
+    run_index: int,
+    *,
+    model_override: str | None,
+    provider_override: str | None,
+) -> RunResult:
+    t0 = perf_counter()
+    drive = await run_case(
+        app,
+        case.prompt,
+        model_override=model_override,
+        provider_override=provider_override,
+    )
+    wall = perf_counter() - t0
+    passed, failures = _grade(case, drive)
+    return RunResult(
+        case_name=case.name,
+        run_index=run_index,
+        passed=passed,
+        failures=failures,
+        drive=drive,
+        wall_seconds=wall,
+    )
+
+
+async def run_cases(
+    cases: list[Case],
+    *,
+    runs_override: int | None = None,
+    model_override: str | None = None,
+    provider_override: str | None = None,
+    output_dir: Path = DEFAULT_OUTPUT_DIR,
+) -> list[CaseSummary]:
+    # Import here so a plain ``python -m evals --help`` doesn't pay the
+    # Scope import cost.
+    from scope.server.app import app  # noqa: PLC0415
+
+    summaries: list[CaseSummary] = []
+    for case in cases:
+        n = runs_override or case.runs
+        run_results: list[RunResult] = []
+        for i in range(1, n + 1):
+            logger.info(f"[{case.name}] run {i}/{n}...")
+            rr = await _run_single(
+                app,
+                case,
+                i,
+                model_override=model_override,
+                provider_override=provider_override,
+            )
+            _write_artifacts(output_dir, case.name, i, rr)
+            run_results.append(rr)
+            status = "PASS" if rr.passed else "FAIL"
+            detail = "" if rr.passed else f" — {'; '.join(rr.failures)[:160]}"
+            logger.info(
+                f"[{case.name}] run {i}/{n} {status} ({rr.wall_seconds:.1f}s){detail}"
+            )
+        summaries.append(CaseSummary(case=case, runs=run_results))
+    return summaries
+
+
+# ---------------------------------------------------------------------------
+# Printing
+# ---------------------------------------------------------------------------
+
+
+def print_summary(summaries: list[CaseSummary], output_dir: Path) -> tuple[int, int]:
+    """Return ``(total_pass, total_runs)``."""
+    # Column widths
+    name_w = max((len(s.case.name) for s in summaries), default=4)
+    name_w = max(name_w, 4)
+
+    header = f"{'case'.ljust(name_w)}  runs  pass  rate   failures"
+    print(header)
+    total_pass = total_runs = 0
+    for s in summaries:
+        failures = "; ".join(s.grouped_failures)[:200]
+        total_pass += s.pass_count
+        total_runs += len(s.runs)
+        print(
+            f"{s.case.name.ljust(name_w)}  "
+            f"{len(s.runs):>4}  "
+            f"{s.pass_count:>4}  "
+            f"{s.rate_pct:>4.0f}%  "
+            f"{failures}"
+        )
+    rule_w = max(len(header), 60)
+    print("─" * rule_w)
+    overall_rate = 100.0 * total_pass / total_runs if total_runs else 0.0
+    print(
+        f"{'overall'.ljust(name_w)}  {total_runs:>4}  {total_pass:>4}  "
+        f"{overall_rate:>4.0f}%"
+    )
+    print(f"\nArtifacts: {output_dir}/<case>/<run>/{{proposal.json,trace.jsonl}}")
+    return total_pass, total_runs
+
+
+# ---------------------------------------------------------------------------
+# Entry points
+# ---------------------------------------------------------------------------
+
+
+def _resolve_cases(cases_dir: Path, selected: list[str] | None) -> list[Case]:
+    if not selected:
+        return discover_cases(cases_dir)
+    out: list[Case] = []
+    for s in selected:
+        candidate = cases_dir / (s if s.endswith((".yaml", ".yml")) else f"{s}.yaml")
+        if not candidate.exists():
+            raise FileNotFoundError(f"no such case: {candidate}")
+        out.append(load_case(candidate))
+    return out
+
+
+def main(argv: list[str] | None = None) -> int:
+    import argparse
+
+    logging.basicConfig(
+        level=os.environ.get("EVALS_LOG_LEVEL", "INFO"),
+        format="%(asctime)s %(levelname)s %(message)s",
+    )
+    p = argparse.ArgumentParser(prog="python -m evals")
+    p.add_argument(
+        "--case",
+        action="append",
+        default=[],
+        help="Case name (with or without .yaml). Repeatable. Omit for all cases.",
+    )
+    p.add_argument("--runs", type=int, default=None, help="Override runs per case.")
+    p.add_argument("--model", default=None, help="Override model id.")
+    p.add_argument("--provider", default=None, help="Override provider.")
+    p.add_argument(
+        "--cases-dir",
+        default=str(DEFAULT_CASES_DIR),
+        help="Directory containing case YAMLs.",
+    )
+    p.add_argument(
+        "--output-dir",
+        default=str(DEFAULT_OUTPUT_DIR),
+        help="Where to write per-run artifacts.",
+    )
+    p.add_argument(
+        "--fail-threshold",
+        type=float,
+        default=None,
+        help="Exit non-zero if overall pass-rate < this percentage.",
+    )
+    args = p.parse_args(argv)
+
+    cases_dir = Path(args.cases_dir)
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    try:
+        cases = _resolve_cases(cases_dir, args.case)
+    except FileNotFoundError as e:
+        print(f"error: {e}", file=sys.stderr)
+        return 2
+
+    if not cases:
+        print(f"no cases found in {cases_dir}", file=sys.stderr)
+        return 2
+
+    summaries = asyncio.run(
+        run_cases(
+            cases,
+            runs_override=args.runs,
+            model_override=args.model,
+            provider_override=args.provider,
+            output_dir=output_dir,
+        )
+    )
+    print()
+    total_pass, total_runs = print_summary(summaries, output_dir)
+
+    if args.fail_threshold is not None and total_runs:
+        rate = 100.0 * total_pass / total_runs
+        if rate < args.fail_threshold:
+            print(
+                f"\nFAIL: overall {rate:.1f}% < threshold {args.fail_threshold:.1f}%",
+                file=sys.stderr,
+            )
+            return 1
+    return 0
diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index cf289cc1d..06e6cac4f 100644
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -9,6 +9,8 @@ import { CloudProvider } from "./lib/cloudContext";
 import { CloudStatusProvider } from "./hooks/useCloudStatus";
 import { OnboardingProvider } from "./contexts/OnboardingContext";
 import { BillingProvider } from "./contexts/BillingContext";
+import { AgentProvider } from "./contexts/AgentContext";
+import { AgentDrawer } from "./components/agent/AgentDrawer";
 import {
   handleOAuthCallback,
   initElectronAuthListener,
@@ -115,7 +117,18 @@ function App() {
                 <ServerInfoProvider>
                   <CloudProvider wsUrl={CLOUD_WS_URL} apiKey={CLOUD_KEY}>
                     <OnboardingProvider>
-                      <StreamPage />
+                      <AgentProvider>
+                        {/* Row: main app (StreamPage) + optional agent
+                            drawer. Drawer is a flex sibling, not a fixed
+                            overlay, so the graph/perform panels resize to
+                            fill remaining width and nothing is obscured. */}
+                        <div className="flex h-screen min-h-0 w-screen overflow-hidden">
+                          <div className="flex-1 min-w-0 h-full overflow-hidden">
+                            <StreamPage />
+                          </div>
+                          <AgentDrawer />
+                        </div>
+                      </AgentProvider>
                     </OnboardingProvider>
                   </CloudProvider>
                 </ServerInfoProvider>
diff --git a/frontend/src/components/Header.tsx b/frontend/src/components/Header.tsx
index c3927d901..9661c2f94 100644
--- a/frontend/src/components/Header.tsx
+++ b/frontend/src/components/Header.tsx
@@ -70,7 +70,7 @@ export function Header({
   const [settingsOpen, setSettingsOpen] = useState(false);
   const [pluginsOpen, setPluginsOpen] = useState(false);
   const [initialTab, setInitialTab] = useState<
-    "general" | "account" | "api-keys" | "loras" | "osc" | "billing"
+    "general" | "account" | "api-keys" | "agent" | "loras" | "osc" | "billing"
   >("general");
   const [initialPluginPath, setInitialPluginPath] = useState("");
   const [pluginsInitialTab, setPluginsInitialTab] = useState<
diff --git a/frontend/src/components/SettingsDialog.tsx b/frontend/src/components/SettingsDialog.tsx
index 83cc6fcef..e0fed58be 100644
--- a/frontend/src/components/SettingsDialog.tsx
+++ b/frontend/src/components/SettingsDialog.tsx
@@ -2,6 +2,7 @@ import { useState, useEffect } from "react";
 import { Dialog, DialogContent } from "./ui/dialog";
 import { Tabs, TabsContent, TabsList, TabsTrigger } from "./ui/tabs";
 import { AccountTab } from "./settings/AccountTab";
+import { AgentProviderTab } from "./settings/AgentProviderTab";
 import { ApiKeysTab } from "./settings/ApiKeysTab";
 import { GeneralTab } from "./settings/GeneralTab";
 import { ReportBugDialog } from "./ReportBugDialog";
@@ -24,6 +25,7 @@ interface SettingsDialogProps {
     | "account"
     | "billing"
     | "api-keys"
+    | "agent"
     | "loras"
     | "osc"
     | "dmx"
@@ -160,6 +162,12 @@ export function SettingsDialog({
             >
               API Keys
             </TabsTrigger>
+            <TabsTrigger
+              value="agent"
+              className="w-full justify-start px-3 py-2 hover:bg-muted/50 data-[state=active]:bg-muted"
+            >
+              Agent
+            </TabsTrigger>
             <TabsTrigger
               value="loras"
               className="w-full justify-start px-3 py-2 hover:bg-muted/50 data-[state=active]:bg-muted"
@@ -210,6 +218,9 @@ export function SettingsDialog({
             <TabsContent value="api-keys" className="mt-0">
               <ApiKeysTab isActive={open && activeTab === "api-keys"} />
             </TabsContent>
+            <TabsContent value="agent" className="mt-0">
+              <AgentProviderTab isActive={open && activeTab === "agent"} />
+            </TabsContent>
             <TabsContent value="loras" className="mt-0">
               <LoRAsTab
                 loraFiles={loraFiles}
diff --git a/frontend/src/components/agent/AgentDrawer.tsx b/frontend/src/components/agent/AgentDrawer.tsx
new file mode 100644
index 000000000..c2b210099
--- /dev/null
+++ b/frontend/src/components/agent/AgentDrawer.tsx
@@ -0,0 +1,175 @@
+import { useCallback, useEffect, useRef, useState } from "react";
+import { X, Trash2, StopCircle } from "lucide-react";
+import { useAgent } from "@/contexts/AgentContext";
+import { Button } from "@/components/ui/button";
+import { ChatTranscript } from "./ChatTranscript";
+import { Composer } from "./Composer";
+
+const DRAWER_WIDTH_KEY = "scope:agent:drawer:width";
+const MIN_WIDTH = 320;
+const MAX_WIDTH = 900;
+const DEFAULT_WIDTH = 440;
+
+export function AgentDrawer() {
+  const {
+    drawerOpen,
+    setDrawerOpen,
+    messages,
+    isStreaming,
+    config,
+    configError,
+    sendMessage,
+    abort,
+    resetSession,
+    decideProposal,
+    pendingProposal,
+  } = useAgent();
+
+  const [width, setWidth] = useState<number>(() => {
+    const stored = localStorage.getItem(DRAWER_WIDTH_KEY);
+    const parsed = stored ? parseInt(stored, 10) : NaN;
+    return Number.isFinite(parsed)
+      ? Math.min(MAX_WIDTH, Math.max(MIN_WIDTH, parsed))
+      : DEFAULT_WIDTH;
+  });
+  const draggingRef = useRef(false);
+
+  useEffect(() => {
+    localStorage.setItem(DRAWER_WIDTH_KEY, String(width));
+  }, [width]);
+
+  const onDragStart = useCallback(
+    (e: React.MouseEvent) => {
+      e.preventDefault();
+      draggingRef.current = true;
+      const startX = e.clientX;
+      const startWidth = width;
+      const onMove = (me: MouseEvent) => {
+        if (!draggingRef.current) return;
+        const delta = startX - me.clientX;
+        const next = Math.min(
+          MAX_WIDTH,
+          Math.max(MIN_WIDTH, startWidth + delta)
+        );
+        setWidth(next);
+      };
+      const onUp = () => {
+        draggingRef.current = false;
+        window.removeEventListener("mousemove", onMove);
+        window.removeEventListener("mouseup", onUp);
+      };
+      window.addEventListener("mousemove", onMove);
+      window.addEventListener("mouseup", onUp);
+    },
+    [width]
+  );
+
+  if (!drawerOpen) return null;
+
+  const needsKey =
+    !!config &&
+    !configError &&
+    config.key_sources[config.provider] == null &&
+    config.provider !== "self_hosted";
+
+  return (
+    <div
+      className="relative h-full shrink-0 bg-[#0f0f0f] border-l border-[rgba(255,255,255,0.08)] flex flex-col"
+      style={{ width }}
+      role="complementary"
+      aria-label="Scope Agent"
+    >
+      {/* Resize handle — dragging grows the drawer toward the graph (the
+          delta is inverted because the handle sits on the LEFT edge of the
+          drawer but we track the mouse moving LEFTWARD as "wider drawer"). */}
+      <div
+        role="separator"
+        aria-orientation="vertical"
+        onMouseDown={onDragStart}
+        className="absolute top-0 left-0 bottom-0 w-1 -ml-0.5 cursor-col-resize hover:bg-[rgba(255,255,255,0.12)]"
+        title="Drag to resize"
+      />
+
+      {/* Header */}
+      <div className="flex items-center justify-between px-4 py-3 border-b border-[rgba(255,255,255,0.08)]">
+        <div className="flex items-center gap-2">
+          <div className="text-sm font-medium text-[#fafafa]">Scope Agent</div>
+          {config && (
+            <span className="text-[10px] uppercase tracking-wide text-[#8c8c8d] rounded px-1.5 py-0.5 border border-[rgba(255,255,255,0.08)]">
+              {config.provider === "anthropic"
+                ? `Claude • ${config.model}`
+                : config.provider === "openai_compatible"
+                  ? `OpenAI • ${config.model}`
+                  : `Local • ${config.model}`}
+            </span>
+          )}
+        </div>
+        <div className="flex items-center gap-1">
+          {isStreaming && (
+            <Button
+              variant="ghost"
+              size="sm"
+              onClick={abort}
+              title="Stop current response"
+            >
+              <StopCircle className="h-4 w-4" />
+            </Button>
+          )}
+          <Button
+            variant="ghost"
+            size="sm"
+            onClick={() => {
+              if (confirm("Clear this agent conversation?")) resetSession();
+            }}
+            title="New conversation"
+            disabled={messages.length === 0}
+          >
+            <Trash2 className="h-4 w-4" />
+          </Button>
+          <Button
+            variant="ghost"
+            size="sm"
+            onClick={() => setDrawerOpen(false)}
+            title="Close"
+          >
+            <X className="h-4 w-4" />
+          </Button>
+        </div>
+      </div>
+
+      {/* Banner: missing key / config error */}
+      {configError && (
+        <div className="px-4 py-2 text-xs bg-red-900/30 text-red-300 border-b border-red-900/40">
+          Failed to load agent config: {configError}
+        </div>
+      )}
+      {needsKey && (
+        <div className="px-4 py-2 text-xs bg-amber-900/20 text-amber-300 border-b border-amber-900/30">
+          No API key configured for{" "}
+          {config?.provider === "anthropic" ? "Anthropic" : "OpenAI-compatible"}
+          . Open Settings → API Keys to add one.
+        </div>
+      )}
+
+      {/* Transcript */}
+      <ChatTranscript
+        messages={messages}
+        pendingProposal={pendingProposal}
+        onDecide={decideProposal}
+      />
+
+      {/* Composer */}
+      <Composer
+        onSend={sendMessage}
+        disabled={isStreaming || (needsKey && messages.length === 0)}
+        placeholder={
+          needsKey && messages.length === 0
+            ? "Add an API key in Settings to start."
+            : isStreaming
+              ? "Agent is working…"
+              : "Ask the agent…"
+        }
+      />
+    </div>
+  );
+}
diff --git a/frontend/src/components/agent/ChatTranscript.tsx b/frontend/src/components/agent/ChatTranscript.tsx
new file mode 100644
index 000000000..11976ec21
--- /dev/null
+++ b/frontend/src/components/agent/ChatTranscript.tsx
@@ -0,0 +1,73 @@
+import { useEffect, useRef } from "react";
+import type { AgentMessage, AgentProposal } from "@/contexts/AgentContext";
+import { MessageBubble } from "./MessageBubble";
+import { WorkflowProposalCard } from "./WorkflowProposalCard";
+
+interface ChatTranscriptProps {
+  messages: AgentMessage[];
+  pendingProposal: AgentProposal | null;
+  onDecide: (approved: boolean, reason?: string) => Promise<void>;
+}
+
+export function ChatTranscript({
+  messages,
+  pendingProposal,
+  onDecide,
+}: ChatTranscriptProps) {
+  const scrollRef = useRef<HTMLDivElement | null>(null);
+  const stickyBottomRef = useRef<boolean>(true);
+
+  // Track whether user is at the bottom. If so, auto-scroll; otherwise leave
+  // their scroll position alone.
+  const onScroll = (e: React.UIEvent<HTMLDivElement>) => {
+    const el = e.currentTarget;
+    stickyBottomRef.current =
+      el.scrollHeight - el.scrollTop - el.clientHeight < 48;
+  };
+
+  useEffect(() => {
+    if (!stickyBottomRef.current) return;
+    const el = scrollRef.current;
+    if (el) el.scrollTop = el.scrollHeight;
+  }, [messages, pendingProposal]);
+
+  if (messages.length === 0 && !pendingProposal) {
+    return (
+      <div className="flex-1 overflow-y-auto p-6 text-sm text-[#8c8c8d]">
+        <p className="mb-2 text-[#b0b0b0] font-medium">
+          Tell me what you want to build.
+        </p>
+        <p className="mb-4 leading-relaxed">
+          I can pick pipelines, compose workflows, and tune parameters by
+          watching the output.
+        </p>
+        <ul className="space-y-1.5 text-xs">
+          <li className="leading-relaxed">
+            • "Hyperrealistic scene with 3–5 switchable prompts"
+          </li>
+          <li className="leading-relaxed">
+            • "It's not recognizing depth well"
+          </li>
+          <li className="leading-relaxed">
+            • "Help me record what I'm seeing"
+          </li>
+        </ul>
+      </div>
+    );
+  }
+
+  return (
+    <div
+      ref={scrollRef}
+      onScroll={onScroll}
+      className="flex-1 overflow-y-auto p-4 space-y-3"
+    >
+      {messages.map(m => (
+        <MessageBubble key={m.id} message={m} />
+      ))}
+      {pendingProposal && !pendingProposal.decision && (
+        <WorkflowProposalCard proposal={pendingProposal} onDecide={onDecide} />
+      )}
+    </div>
+  );
+}
diff --git a/frontend/src/components/agent/Composer.tsx b/frontend/src/components/agent/Composer.tsx
new file mode 100644
index 000000000..294f8beb6
--- /dev/null
+++ b/frontend/src/components/agent/Composer.tsx
@@ -0,0 +1,56 @@
+import { useRef, useState, type KeyboardEvent } from "react";
+import { Send } from "lucide-react";
+import { Button } from "@/components/ui/button";
+
+interface ComposerProps {
+  onSend: (text: string) => Promise<void>;
+  disabled?: boolean;
+  placeholder?: string;
+}
+
+export function Composer({ onSend, disabled, placeholder }: ComposerProps) {
+  const [value, setValue] = useState("");
+  const textareaRef = useRef<HTMLTextAreaElement>(null);
+
+  const send = async () => {
+    const text = value.trim();
+    if (!text || disabled) return;
+    setValue("");
+    await onSend(text);
+    textareaRef.current?.focus();
+  };
+
+  const onKeyDown = (e: KeyboardEvent<HTMLTextAreaElement>) => {
+    // Cmd/Ctrl+Enter or bare Enter to send (Shift+Enter inserts newline).
+    if (e.key === "Enter" && !e.shiftKey) {
+      e.preventDefault();
+      void send();
+    }
+  };
+
+  return (
+    <div className="border-t border-[rgba(255,255,255,0.08)] p-3 flex items-end gap-2">
+      <textarea
+        ref={textareaRef}
+        value={value}
+        onChange={e => setValue(e.target.value)}
+        onKeyDown={onKeyDown}
+        rows={2}
+        placeholder={
+          placeholder ?? (disabled ? "Agent is working…" : "Ask the agent…")
+        }
+        className="flex-1 resize-none rounded-md bg-[#141414] border border-[rgba(255,255,255,0.08)] px-3 py-2 text-sm text-[#e6e6e6] placeholder:text-[#595959] focus:outline-none focus:border-[#1f6feb] disabled:opacity-60"
+        disabled={disabled}
+      />
+      <Button
+        size="sm"
+        onClick={send}
+        disabled={disabled || !value.trim()}
+        className="gap-1.5"
+      >
+        <Send className="h-3.5 w-3.5" />
+        Send
+      </Button>
+    </div>
+  );
+}
diff --git a/frontend/src/components/agent/MessageBubble.tsx b/frontend/src/components/agent/MessageBubble.tsx
new file mode 100644
index 000000000..9c276865d
--- /dev/null
+++ b/frontend/src/components/agent/MessageBubble.tsx
@@ -0,0 +1,40 @@
+import { Loader2 } from "lucide-react";
+import type { AgentMessage } from "@/contexts/AgentContext";
+import { ToolCallBlock } from "./ToolCallBlock";
+
+export function MessageBubble({ message }: { message: AgentMessage }) {
+  if (message.role === "user") {
+    return (
+      <div className="flex justify-end">
+        <div className="rounded-lg px-3 py-2 text-sm bg-[#1f6feb] text-white max-w-[85%] whitespace-pre-wrap break-words">
+          {message.text}
+        </div>
+      </div>
+    );
+  }
+
+  const hasContent = message.text.length > 0 || message.toolCalls.length > 0;
+
+  return (
+    <div className="flex flex-col gap-1.5 max-w-[92%]">
+      {message.toolCalls.length > 0 && (
+        <div className="rounded-md border border-[rgba(255,255,255,0.05)] bg-[#121212]/50 px-1.5 py-1 flex flex-col">
+          {message.toolCalls.map(tc => (
+            <ToolCallBlock key={tc.id} call={tc} />
+          ))}
+        </div>
+      )}
+      {message.text && (
+        <div className="rounded-lg px-3 py-2 text-sm bg-[#1a1a1a] text-[#e6e6e6] border border-[rgba(255,255,255,0.06)] whitespace-pre-wrap break-words">
+          {message.text}
+        </div>
+      )}
+      {message.pending && !hasContent && (
+        <div className="inline-flex items-center gap-2 text-xs text-[#8c8c8d] px-2 py-1">
+          <Loader2 className="h-3 w-3 animate-spin" />
+          Thinking…
+        </div>
+      )}
+    </div>
+  );
+}
diff --git a/frontend/src/components/agent/ToolCallBlock.tsx b/frontend/src/components/agent/ToolCallBlock.tsx
new file mode 100644
index 000000000..5fd589b99
--- /dev/null
+++ b/frontend/src/components/agent/ToolCallBlock.tsx
@@ -0,0 +1,63 @@
+import { useState } from "react";
+import {
+  ChevronDown,
+  ChevronRight,
+  CheckCircle2,
+  XCircle,
+  Loader2,
+} from "lucide-react";
+import type { AgentToolCall } from "@/contexts/AgentContext";
+
+export function ToolCallBlock({ call }: { call: AgentToolCall }) {
+  const [expanded, setExpanded] = useState(false);
+
+  const Icon =
+    call.status === "running"
+      ? Loader2
+      : call.status === "error"
+        ? XCircle
+        : CheckCircle2;
+  const iconClass =
+    call.status === "running"
+      ? "h-3 w-3 animate-spin text-[#8c8c8d] shrink-0"
+      : call.status === "error"
+        ? "h-3 w-3 text-red-400 shrink-0"
+        : "h-3 w-3 text-emerald-400/80 shrink-0";
+
+  const hasDetail = !!(call.input && Object.keys(call.input).length > 0);
+
+  return (
+    <div>
+      <button
+        type="button"
+        onClick={() => hasDetail && setExpanded(v => !v)}
+        disabled={!hasDetail}
+        className="w-full flex items-center gap-1.5 px-1 py-0.5 text-[11px] text-[#8c8c8d] hover:text-[#cfd3da] disabled:hover:text-[#8c8c8d] disabled:cursor-default text-left"
+      >
+        {hasDetail ? (
+          expanded ? (
+            <ChevronDown className="h-2.5 w-2.5 shrink-0" />
+          ) : (
+            <ChevronRight className="h-2.5 w-2.5 shrink-0" />
+          )
+        ) : (
+          <span className="w-2.5 shrink-0" />
+        )}
+        <Icon className={iconClass} />
+        <span className="font-mono shrink-0">{call.name}</span>
+        {call.summary && (
+          <span className="truncate text-[#6e6e6e]">— {call.summary}</span>
+        )}
+      </button>
+      {expanded && hasDetail && (
+        <div className="pl-5 pr-1 pb-1 pt-0.5 text-[10px] text-[#8c8c8d] font-mono space-y-1">
+          {call.input && Object.keys(call.input).length > 0 && (
+            <pre className="whitespace-pre-wrap break-words bg-[#0f0f0f]/60 rounded px-2 py-1 border border-[rgba(255,255,255,0.04)]">
+              {JSON.stringify(call.input, null, 2)}
+            </pre>
+          )}
+        </div>
+      )}
+    </div>
+  );
+}
diff --git a/frontend/src/components/agent/WorkflowProposalCard.tsx b/frontend/src/components/agent/WorkflowProposalCard.tsx
new file mode 100644
index 000000000..f525cdab3
--- /dev/null
+++ b/frontend/src/components/agent/WorkflowProposalCard.tsx
@@ -0,0 +1,135 @@
+import { useState } from "react";
+import { Sparkles, Check, X } from "lucide-react";
+import type { AgentProposal } from "@/contexts/AgentContext";
+import { Button } from "@/components/ui/button";
+
+interface WorkflowProposalCardProps {
+  proposal: AgentProposal;
+  onDecide: (approved: boolean, reason?: string) => Promise<void>;
+}
+
+export function WorkflowProposalCard({
+  proposal,
+  onDecide,
+}: WorkflowProposalCardProps) {
+  const [submitting, setSubmitting] = useState<"approve" | "reject" | null>(
+    null
+  );
+  const [showRejectInput, setShowRejectInput] = useState(false);
+  const [rejectReason, setRejectReason] = useState("");
+  const [showGraph, setShowGraph] = useState(false);
+
+  const nodes = proposal.graph?.nodes ?? [];
+  const edges = proposal.graph?.edges ?? [];
+
+  const handleApprove = async () => {
+    setSubmitting("approve");
+    try {
+      await onDecide(true);
+    } finally {
+      setSubmitting(null);
+    }
+  };
+
+  const handleReject = async () => {
+    setSubmitting("reject");
+    try {
+      await onDecide(false, rejectReason.trim() || undefined);
+    } finally {
+      setSubmitting(null);
+    }
+  };
+
+  return (
+    <div className="rounded-lg border border-[rgba(31,111,235,0.4)] bg-[rgba(31,111,235,0.08)] p-3">
+      <div className="flex items-center gap-2 mb-2 text-sm font-medium text-[#fafafa]">
+        <Sparkles className="h-4 w-4 text-[#5597ff]" />
+        Workflow proposal
+      </div>
+
+      {proposal.rationale && (
+        <p className="text-xs text-[#cfd3da] mb-3 leading-relaxed whitespace-pre-wrap">
+          {proposal.rationale}
+        </p>
+      )}
+
+      <div className="text-[11px] text-[#8c8c8d] mb-2">
+        {nodes.length} node{nodes.length === 1 ? "" : "s"} · {edges.length} edge
+        {edges.length === 1 ? "" : "s"}
+        {proposal.pipelinesToLoad.length > 0 && (
+          <>
+            {" · loads "}
+            <span className="font-mono text-[#b0b0b0]">
+              {proposal.pipelinesToLoad.join(", ")}
+            </span>
+          </>
+        )}
+      </div>
+
+      <button
+        type="button"
+        onClick={() => setShowGraph(v => !v)}
+        className="text-[11px] text-[#5597ff] hover:underline mb-2"
+      >
+        {showGraph ? "Hide full graph" : "View full graph"}
+      </button>
+      {showGraph && (
+        <pre className="text-[10px] text-[#b0b0b0] bg-[#0f0f0f] rounded px-2 py-1.5 border border-[rgba(255,255,255,0.04)] overflow-auto max-h-48 mb-2">
+          {JSON.stringify(proposal.graph, null, 2)}
+        </pre>
+      )}
+
+      {showRejectInput ? (
+        <div className="space-y-2">
+          <textarea
+            value={rejectReason}
+            onChange={e => setRejectReason(e.target.value)}
+            placeholder="What should the agent try instead?"
+            className="w-full text-xs bg-[#0f0f0f] border border-[rgba(255,255,255,0.08)] rounded px-2 py-1.5 text-[#e6e6e6]"
+            rows={2}
+          />
+          <div className="flex gap-2">
+            <Button
+              size="sm"
+              variant="outline"
+              onClick={() => setShowRejectInput(false)}
+              disabled={submitting !== null}
+            >
+              Cancel
+            </Button>
+            <Button
+              size="sm"
+              variant="destructive"
+              onClick={handleReject}
+              disabled={submitting !== null}
+            >
+              Send rejection
+            </Button>
+          </div>
+        </div>
+      ) : (
+        <div className="flex gap-2">
+          <Button
+            size="sm"
+            onClick={handleApprove}
+            disabled={submitting !== null}
+            className="gap-1.5"
+          >
+            <Check className="h-3.5 w-3.5" />
+            {submitting === "approve" ? "Approving…" : "Approve"}
+          </Button>
+          <Button
+            size="sm"
+            variant="outline"
+            onClick={() => setShowRejectInput(true)}
+            disabled={submitting !== null}
+            className="gap-1.5"
+          >
+            <X className="h-3.5 w-3.5" />
+            Reject
+          </Button>
+        </div>
+      )}
+    </div>
+  );
+}
diff --git a/frontend/src/components/graph/GraphEditor.tsx b/frontend/src/components/graph/GraphEditor.tsx
index dda25008d..9af77d388 100644
--- a/frontend/src/components/graph/GraphEditor.tsx
+++ b/frontend/src/components/graph/GraphEditor.tsx
@@ -156,6 +156,10 @@ export interface GraphEditorHandle {
   loadWorkflow: (
     workflow: import("../../lib/workflowApi").ScopeWorkflow
   ) => void;
+  loadGraphConfig: (
+    graph: import("../../lib/api").GraphConfig,
+    label?: string
+  ) => void;
   updateNodeParam: (nodeId: string, key: string, value: unknown) => void;
   applyExternalParams: (
     params: Record<string, unknown>,
@@ -377,6 +381,15 @@ export const GraphEditor = forwardRef<GraphEditorHandle, GraphEditorProps>(
             workflow.metadata?.name ?? "workflow"
           );
         },
+        loadGraphConfig: (
+          graph: import("../../lib/api").GraphConfig,
+          label?: string
+        ) => {
+          loadGraphFromParsed(
+            graph as unknown as Record<string, unknown>,
+            label ?? "agent-proposal"
+          );
+        },
         updateNodeParam: handleNodeParameterChange,
         applyExternalParams: applyExternalNodeParams,
         clearGraph: handleClear,
diff --git a/frontend/src/components/graph/GraphToolbar.tsx b/frontend/src/components/graph/GraphToolbar.tsx
index 32cb1930a..88e325ae8 100644
--- a/frontend/src/components/graph/GraphToolbar.tsx
+++ b/frontend/src/components/graph/GraphToolbar.tsx
@@ -8,8 +8,10 @@ import {
   Trash2,
   Loader2,
   RotateCcw,
+  Sparkles,
 } from "lucide-react";
 import { NODE_TOKENS } from "./ui";
+import { useAgent } from "@/contexts/AgentContext";
 import {
   DropdownMenu,
   DropdownMenuTrigger,
@@ -66,6 +68,8 @@ export function GraphToolbar({
   const streamShortcut = getShortcutById("toggle-stream", shortcuts);
   const exportShortcut = getShortcutById("export", shortcuts);
 
+  const { toggleDrawer: toggleAgent, drawerOpen: agentOpen } = useAgent();
+
   return (
     <TooltipProvider delayDuration={400}>
       <div data-tour="add-node" className={NODE_TOKENS.toolbar}>
@@ -122,6 +126,23 @@ export function GraphToolbar({
           </DropdownMenuContent>
         </DropdownMenu>
 
+        <Tooltip>
+          <TooltipTrigger asChild>
+            <button
+              onClick={toggleAgent}
+              aria-pressed={agentOpen}
+              className={NODE_TOKENS.toolbarMenuButton}
+              data-tour="agent-button"
+            >
+              <Sparkles className="h-3.5 w-3.5" />
+              Agent
+            </button>
+          </TooltipTrigger>
+          <TooltipContent side="bottom">
+            {agentOpen ? "Close agent" : "Open agent"}
+          </TooltipContent>
+        </Tooltip>
+
         <input
           ref={fileInputRef}
           type="file"
diff --git a/frontend/src/components/settings/AgentProviderTab.tsx b/frontend/src/components/settings/AgentProviderTab.tsx
new file mode 100644
index 000000000..27478553d
--- /dev/null
+++ b/frontend/src/components/settings/AgentProviderTab.tsx
@@ -0,0 +1,205 @@
+import { useCallback, useEffect, useState } from "react";
+import { toast } from "sonner";
+import {
+  getAgentConfig,
+  testAgentConnection,
+  updateAgentConfig,
+  type AgentConfigResponse,
+  type AgentProvider,
+} from "@/lib/api";
+import { Button } from "../ui/button";
+import { Input } from "../ui/input";
+
+interface AgentProviderTabProps {
+  isActive: boolean;
+}
+
+const PROVIDER_LABELS: Record<AgentProvider, string> = {
+  anthropic: "Anthropic (Claude)",
+  openai_compatible: "OpenAI-compatible",
+  self_hosted: "Self-hosted (Ollama / vLLM / LM Studio)",
+};
+
+const PROVIDER_DEFAULT_MODEL: Record<AgentProvider, string> = {
+  anthropic: "claude-sonnet-4-6",
+  openai_compatible: "gpt-4o",
+  self_hosted: "llama3.1",
+};
+
+const PROVIDER_DEFAULT_BASE_URL: Record<AgentProvider, string> = {
+  anthropic: "",
+  openai_compatible: "https://api.openai.com/v1",
+  self_hosted: "http://localhost:11434/v1",
+};
+
+const ANTHROPIC_MODEL_OPTIONS = [
+  "claude-sonnet-4-6",
+  "claude-opus-4-7",
+  "claude-haiku-4-5-20251001",
+];
+
+export function AgentProviderTab({ isActive }: AgentProviderTabProps) {
+  const [config, setConfig] = useState<AgentConfigResponse | null>(null);
+  const [provider, setProvider] = useState<AgentProvider>("anthropic");
+  const [model, setModel] = useState("");
+  const [baseUrl, setBaseUrl] = useState("");
+  const [saving, setSaving] = useState(false);
+  const [testing, setTesting] = useState(false);
+
+  const fetchConfig = useCallback(async () => {
+    try {
+      const cfg = await getAgentConfig();
+      setConfig(cfg);
+      setProvider(cfg.provider);
+      setModel(cfg.model);
+      setBaseUrl(cfg.base_url ?? "");
+    } catch (e) {
+      toast.error(
+        e instanceof Error ? e.message : "Failed to load agent config"
+      );
+    }
+  }, []);
+
+  useEffect(() => {
+    if (isActive) void fetchConfig();
+  }, [isActive, fetchConfig]);
+
+  const handleProviderChange = (next: AgentProvider) => {
+    setProvider(next);
+    setModel(PROVIDER_DEFAULT_MODEL[next]);
+    setBaseUrl(PROVIDER_DEFAULT_BASE_URL[next]);
+  };
+
+  const handleSave = async () => {
+    setSaving(true);
+    try {
+      await updateAgentConfig({
+        provider,
+        model: model.trim() || PROVIDER_DEFAULT_MODEL[provider],
+        base_url: baseUrl.trim() || null,
+      });
+      toast.success("Agent config saved");
+      await fetchConfig();
+      // Keep the drawer's cached config in sync with settings changes.
+      window.dispatchEvent(new CustomEvent("scope:agent-config-changed"));
+    } catch (e) {
+      toast.error(e instanceof Error ? e.message : "Failed to save config");
+    } finally {
+      setSaving(false);
+    }
+  };
+
+  const handleTest = async () => {
+    setTesting(true);
+    try {
+      const result = await testAgentConnection();
+      if (result.ok) {
+        toast.success("Connection OK");
+      } else {
+        toast.error(`Connection failed: ${result.error ?? "unknown"}`);
+      }
+    } catch (e) {
+      toast.error(e instanceof Error ? e.message : "Test failed");
+    } finally {
+      setTesting(false);
+    }
+  };
+
+  const keySourceLabel = (p: AgentProvider): string => {
+    const source = config?.key_sources[p];
+    if (source === "env_var") return "Set via environment variable";
+    if (source === "stored") return "Stored in ~/.daydream-scope/";
+    if (p === "self_hosted") return "Usually no key required";
+    return "Not configured — add one in API Keys tab";
+  };
+
+  return (
+    <div className="space-y-6 max-w-xl">
+      <div>
+        <h3 className="text-lg font-semibold mb-1">Agent Provider</h3>
+        <p className="text-sm text-muted-foreground">
+          Choose which model powers the in-app agent. API keys live in the API
+          Keys tab.
+        </p>
+      </div>
+
+      <div className="space-y-3">
+        <label className="text-sm font-medium">Provider</label>
+        <div className="space-y-2">
+          {(Object.keys(PROVIDER_LABELS) as AgentProvider[]).map(p => (
+            <label
+              key={p}
+              className="flex items-start gap-3 p-3 border border-border rounded-md cursor-pointer hover:bg-muted/30"
+            >
+              <input
+                type="radio"
+                name="agent-provider"
+                value={p}
+                checked={provider === p}
+                onChange={() => handleProviderChange(p)}
+                className="mt-0.5"
+              />
+              <div className="flex-1">
+                <div className="text-sm font-medium">{PROVIDER_LABELS[p]}</div>
+                <div className="text-xs text-muted-foreground mt-0.5">
+                  {keySourceLabel(p)}
+                </div>
+              </div>
+            </label>
+          ))}
+        </div>
+      </div>
+
+      <div className="space-y-2">
+        <label className="text-sm font-medium">Model</label>
+        {provider === "anthropic" ? (
+          <select
+            value={model}
+            onChange={e => setModel(e.target.value)}
+            className="w-full bg-background border border-border rounded-md px-3 py-2 text-sm"
+          >
+            {ANTHROPIC_MODEL_OPTIONS.map(m => (
+              <option key={m} value={m}>
+                {m}
+              </option>
+            ))}
+            {!ANTHROPIC_MODEL_OPTIONS.includes(model) && model && (
+              <option value={model}>{model}</option>
+            )}
+          </select>
+        ) : (
+          <Input
+            value={model}
+            onChange={e => setModel(e.target.value)}
+            placeholder={PROVIDER_DEFAULT_MODEL[provider]}
+            className="text-sm"
+          />
+        )}
+      </div>
+
+      <div className="space-y-2">
+        <label className="text-sm font-medium">
+          Base URL{" "}
+          <span className="text-xs text-muted-foreground font-normal">
+            (optional override)
+          </span>
+        </label>
+        <Input
+          value={baseUrl}
+          onChange={e => setBaseUrl(e.target.value)}
+          placeholder={PROVIDER_DEFAULT_BASE_URL[provider] || "Default"}
+          className="text-sm"
+        />
+      </div>
+
+      <div className="flex gap-2 pt-2">
+        <Button onClick={handleSave} disabled={saving}>
+          {saving ? "Saving…" : "Save"}
+        </Button>
+        <Button variant="outline" onClick={handleTest} disabled={testing}>
+          {testing ? "Testing…" : "Test connection"}
+        </Button>
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/components/settings/ApiKeysTab.tsx b/frontend/src/components/settings/ApiKeysTab.tsx
index f05836f8c..efe118aab 100644
--- a/frontend/src/components/settings/ApiKeysTab.tsx
+++ b/frontend/src/components/settings/ApiKeysTab.tsx
@@ -56,6 +56,9 @@ export function ApiKeysTab({ isActive }: ApiKeysTabProps) {
           return next;
         });
         await fetchKeys();
+        // Let the agent chat drawer re-read its config so the "no key
+        // configured" banner disappears without a reload.
+        window.dispatchEvent(new CustomEvent("scope:agent-config-changed"));
       }
     } catch (error) {
       toast.error(
@@ -82,6 +85,7 @@ export function ApiKeysTab({ isActive }: ApiKeysTabProps) {
           return next;
         });
         await fetchKeys();
+        window.dispatchEvent(new CustomEvent("scope:agent-config-changed"));
       }
     } catch (error) {
       toast.error(
diff --git a/frontend/src/contexts/AgentContext.tsx b/frontend/src/contexts/AgentContext.tsx
new file mode 100644
index 000000000..e514f1bf0
--- /dev/null
+++ b/frontend/src/contexts/AgentContext.tsx
@@ -0,0 +1,421 @@
+import {
+  createContext,
+  useCallback,
+  useContext,
+  useEffect,
+  useMemo,
+  useRef,
+  useState,
+  type ReactNode,
+} from "react";
+import { toast } from "sonner";
+import {
+  decideAgentProposal,
+  getAgentConfig,
+  type AgentConfigResponse,
+  type GraphConfig,
+} from "@/lib/api";
+import { streamAgentChat, type AgentStreamEvent } from "@/lib/agentClient";
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+export interface AgentToolCall {
+  id: string;
+  name: string;
+  input?: Record<string, unknown>;
+  status: "running" | "done" | "error";
+  summary?: string;
+  ok?: boolean;
+}
+
+export interface AgentMessage {
+  id: string;
+  role: "user" | "assistant" | "system";
+  text: string;
+  toolCalls: AgentToolCall[];
+  isContinuation?: boolean;
+  pending?: boolean;
+  createdAt: number;
+}
+
+export interface AgentProposal {
+  proposalId: string;
+  graph: GraphConfig;
+  graphHash: string;
+  rationale: string;
+  pipelinesToLoad: string[];
+  diff: Record<string, unknown>;
+  decision?: "approved" | "rejected";
+}
+
+export type GraphImporter = (graph: GraphConfig, label?: string) => void;
+
+interface AgentContextValue {
+  drawerOpen: boolean;
+  setDrawerOpen: (open: boolean) => void;
+  toggleDrawer: () => void;
+
+  messages: AgentMessage[];
+  isStreaming: boolean;
+  sessionId: string | null;
+  pendingProposal: AgentProposal | null;
+  config: AgentConfigResponse | null;
+  configError: string | null;
+
+  sendMessage: (text: string) => Promise<void>;
+  abort: () => void;
+  resetSession: () => void;
+  decideProposal: (approved: boolean, reason?: string) => Promise<void>;
+  refreshConfig: () => Promise<void>;
+  // Registered once by StreamPage so the agent can write approved proposals
+  // into the React Flow canvas. Returns an unregister fn.
+  registerGraphImporter: (importer: GraphImporter) => () => void;
+}
+
+const AgentContext = createContext<AgentContextValue | null>(null);
+
+function makeId(prefix: string): string {
+  return `${prefix}_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`;
+}
+
+// ---------------------------------------------------------------------------
+// Provider
+// ---------------------------------------------------------------------------
+
+export function AgentProvider({ children }: { children: ReactNode }) {
+  const [drawerOpen, setDrawerOpen] = useState(false);
+  const [messages, setMessages] = useState<AgentMessage[]>([]);
+  const [isStreaming, setIsStreaming] = useState(false);
+  const [sessionId, setSessionId] = useState<string | null>(null);
+  const [pendingProposal, setPendingProposal] = useState<AgentProposal | null>(
+    null
+  );
+  const [config, setConfig] = useState<AgentConfigResponse | null>(null);
+  const [configError, setConfigError] = useState<string | null>(null);
+
+  const abortRef = useRef<AbortController | null>(null);
+  const sessionIdRef = useRef<string | null>(null);
+  sessionIdRef.current = sessionId;
+  const graphImporterRef = useRef<GraphImporter | null>(null);
+
+  const registerGraphImporter = useCallback((importer: GraphImporter) => {
+    graphImporterRef.current = importer;
+    return () => {
+      if (graphImporterRef.current === importer) {
+        graphImporterRef.current = null;
+      }
+    };
+  }, []);
+
+  const refreshConfig = useCallback(async () => {
+    try {
+      const cfg = await getAgentConfig();
+      setConfig(cfg);
+      setConfigError(null);
+    } catch (e) {
+      setConfigError(e instanceof Error ? e.message : String(e));
+    }
+  }, []);
+
+  useEffect(() => {
+    void refreshConfig();
+  }, [refreshConfig]);
+
+  // Pick up API key / provider changes from Settings without requiring a
+  // page reload. Settings tabs dispatch "scope:agent-config-changed" after
+  // a successful save; we re-read /agent/config and the drawer's
+  // "needsKey" banner + composer disabled-state recompute automatically.
+  useEffect(() => {
+    const handler = () => {
+      void refreshConfig();
+    };
+    window.addEventListener("scope:agent-config-changed", handler);
+    return () => {
+      window.removeEventListener("scope:agent-config-changed", handler);
+    };
+  }, [refreshConfig]);
+
+  const abort = useCallback(() => {
+    abortRef.current?.abort();
+    abortRef.current = null;
+    setIsStreaming(false);
+  }, []);
+
+  const resetSession = useCallback(() => {
+    abort();
+    setMessages([]);
+    setPendingProposal(null);
+    setSessionId(null);
+  }, [abort]);
+
+  // Core streaming pipeline. Shared between sendMessage and decideProposal.
+  const runStream = useCallback(
+    async (
+      text: string,
+      { isContinuation = false }: { isContinuation?: boolean } = {}
+    ) => {
+      const controller = new AbortController();
+      abortRef.current?.abort();
+      abortRef.current = controller;
+
+      const userMsgId = makeId("m");
+      const assistantMsgId = makeId("m");
+
+      // Append user message (hide continuations from the visible list — they're
+      // synthetic, not authored by the user).
+      if (!isContinuation) {
+        setMessages(prev => [
+          ...prev,
+          {
+            id: userMsgId,
+            role: "user",
+            text,
+            toolCalls: [],
+            createdAt: Date.now(),
+          },
+        ]);
+      }
+
+      // Append streaming assistant placeholder.
+      setMessages(prev => [
+        ...prev,
+        {
+          id: assistantMsgId,
+          role: "assistant",
+          text: "",
+          toolCalls: [],
+          pending: true,
+          createdAt: Date.now(),
+        },
+      ]);
+      setIsStreaming(true);
+
+      const updateAssistant = (mutator: (m: AgentMessage) => AgentMessage) => {
+        setMessages(prev =>
+          prev.map(m => (m.id === assistantMsgId ? mutator(m) : m))
+        );
+      };
+
+      const handleEvent = (ev: AgentStreamEvent) => {
+        const data = ev.data as Record<string, unknown>;
+        switch (ev.event) {
+          case "text_delta":
+            updateAssistant(m => ({
+              ...m,
+              text: m.text + String(data.delta ?? ""),
+            }));
+            break;
+          case "tool_call_start":
+            updateAssistant(m => ({
+              ...m,
+              toolCalls: [
+                ...m.toolCalls,
+                {
+                  id: String(data.id ?? ""),
+                  name: String(data.name ?? ""),
+                  status: "running",
+                },
+              ],
+            }));
+            break;
+          case "tool_call_input":
+            updateAssistant(m => ({
+              ...m,
+              toolCalls: m.toolCalls.map(tc =>
+                tc.id === String(data.id)
+                  ? {
+                      ...tc,
+                      input: data.input as Record<string, unknown> | undefined,
+                    }
+                  : tc
+              ),
+            }));
+            break;
+          case "tool_call_result":
+            updateAssistant(m => ({
+              ...m,
+              toolCalls: m.toolCalls.map(tc =>
+                tc.id === String(data.id)
+                  ? {
+                      ...tc,
+                      status: data.ok ? "done" : "error",
+                      ok: Boolean(data.ok),
+                      summary: String(data.summary ?? ""),
+                    }
+                  : tc
+              ),
+            }));
+            break;
+          case "workflow_proposal":
+            setPendingProposal({
+              proposalId: String(data.proposal_id ?? ""),
+              graph: data.graph as GraphConfig,
+              graphHash: String(data.graph_hash ?? ""),
+              rationale: String(data.rationale ?? ""),
+              pipelinesToLoad: (data.pipelines_to_load as string[]) ?? [],
+              diff: (data.diff as Record<string, unknown>) ?? {},
+            });
+            break;
+          case "error":
+            toast.error(String(data.message ?? "Agent error"));
+            updateAssistant(m => ({
+              ...m,
+              text:
+                m.text + `\n\n[Error: ${String(data.message ?? "unknown")}]`,
+            }));
+            break;
+          case "turn_end":
+            updateAssistant(m => ({ ...m, pending: false }));
+            break;
+          default:
+            break;
+        }
+      };
+
+      try {
+        const returnedSessionId = await streamAgentChat(text, {
+          sessionId: sessionIdRef.current,
+          isContinuation,
+          signal: controller.signal,
+          onEvent: handleEvent,
+        });
+        if (returnedSessionId && !sessionIdRef.current) {
+          setSessionId(returnedSessionId);
+        }
+      } catch (e) {
+        if ((e as Error).name === "AbortError") {
+          updateAssistant(m => ({
+            ...m,
+            pending: false,
+            text: m.text + "\n[stopped]",
+          }));
+        } else {
+          toast.error(e instanceof Error ? e.message : String(e));
+          updateAssistant(m => ({
+            ...m,
+            pending: false,
+            text: `${m.text}\n\n[${e instanceof Error ? e.message : "stream failed"}]`,
+          }));
+        }
+      } finally {
+        setIsStreaming(false);
+        if (abortRef.current === controller) {
+          abortRef.current = null;
+        }
+      }
+    },
+    []
+  );
+
+  const sendMessage = useCallback(
+    async (text: string) => {
+      if (!text.trim()) return;
+      await runStream(text.trim(), { isContinuation: false });
+    },
+    [runStream]
+  );
+
+  const decideProposal = useCallback(
+    async (approved: boolean, reason?: string) => {
+      const proposal = pendingProposal;
+      const sid = sessionIdRef.current;
+      if (!proposal || !sid) return;
+
+      // On approval, write the proposed graph into the React Flow canvas BEFORE
+      // we tell the backend. The backend's apply_workflow tool no longer starts
+      // a session — the user presses Play. This also means an approval with no
+      // importer registered still succeeds at the API layer (graceful fallback
+      // for any surface that doesn't render the canvas).
+      if (approved) {
+        const importer = graphImporterRef.current;
+        if (importer) {
+          try {
+            importer(proposal.graph, `agent-proposal-${proposal.proposalId}`);
+          } catch (e) {
+            toast.error(
+              `Failed to apply proposal to canvas: ${e instanceof Error ? e.message : String(e)}`
+            );
+            return;
+          }
+        } else {
+          toast.warning("Graph canvas not ready; proposal not applied.");
+          return;
+        }
+      }
+
+      try {
+        const response = await decideAgentProposal({
+          session_id: sid,
+          proposal_id: proposal.proposalId,
+          approved,
+          reason,
+        });
+        setPendingProposal(prev =>
+          prev && prev.proposalId === proposal.proposalId
+            ? { ...prev, decision: approved ? "approved" : "rejected" }
+            : prev
+        );
+        if (approved) {
+          toast.success("Proposal applied to graph. Press Play to start.");
+        }
+        await runStream(response.next_message, { isContinuation: true });
+        // Clear after the continuation turn finishes (or immediately on reject;
+        // we clear here regardless so the card disappears from the transcript).
+        setPendingProposal(null);
+      } catch (e) {
+        toast.error(e instanceof Error ? e.message : String(e));
+      }
+    },
+    [pendingProposal, runStream]
+  );
+
+  const toggleDrawer = useCallback(() => setDrawerOpen(o => !o), []);
+
+  const value = useMemo<AgentContextValue>(
+    () => ({
+      drawerOpen,
+      setDrawerOpen,
+      toggleDrawer,
+      messages,
+      isStreaming,
+      sessionId,
+      pendingProposal,
+      config,
+      configError,
+      sendMessage,
+      abort,
+      resetSession,
+      decideProposal,
+      refreshConfig,
+      registerGraphImporter,
+    }),
+    [
+      drawerOpen,
+      toggleDrawer,
+      messages,
+      isStreaming,
+      sessionId,
+      pendingProposal,
+      config,
+      configError,
+      sendMessage,
+      abort,
+      resetSession,
+      decideProposal,
+      refreshConfig,
+      registerGraphImporter,
+    ]
+  );
+
+  return (
+    <AgentContext.Provider value={value}>{children}</AgentContext.Provider>
+  );
+}
+
+export function useAgent(): AgentContextValue {
+  const ctx = useContext(AgentContext);
+  if (!ctx) throw new Error("useAgent must be used within AgentProvider");
+  return ctx;
+}
diff --git a/frontend/src/data/nodes/manifest.json b/frontend/src/data/nodes/manifest.json
new file mode 100644
index 000000000..cc3696655
--- /dev/null
+++ b/frontend/src/data/nodes/manifest.json
@@ -0,0 +1,374 @@
+{
+  "$comment": "UI node-type catalog. Served by the agent via list_node_types. When you add a new node component, append an entry here so the agent can compose with it.",
+  "$handle_convention": {
+    "note": "Every React Flow edge handle ID has the shape '<kind>:<name>'. 'param:<name>' is an event/value port; 'stream:<name>' is a frame/audio port. Example valid handle IDs: 'param:value', 'param:noise_scale', 'param:__prompt', 'param:__vace', 'stream:video', 'stream:vace_input_frames'. Do NOT use 'parameter:<name>' — it is invalid.",
+    "aggregate_pipeline_handles": {
+      "param:__prompt": "Pipeline prompt aggregate — connect from any string-valued output (primitive value, control value, prompt_blend prompts, subgraph prompt output). One wire replaces the built-in prompt text field.",
+      "param:__vace": "Pipeline VACE aggregate — connect from a 'vace' node's 'param:__vace' output. Pipeline must have supports_vace=true (check get_pipeline_handles).",
+      "param:__loras": "Pipeline LoRA aggregate — connect from a 'lora' node's 'param:lora' output. Pipeline must have supports_lora=true."
+    },
+    "pipeline_stream_inputs": [
+      "stream:video — required frame input on every compute pipeline",
+      "stream:vace_input_frames — only on VACE-capable pipelines",
+      "stream:vace_input_masks — only on VACE-capable pipelines"
+    ],
+    "subgraph_rule": "External edges reference a subgraph via the NAMES declared in 'data.subgraphInputs[].name' / 'data.subgraphOutputs[].name'. The edge handle is 'param:<that name>'. Internal wiring lives in 'data.subgraphNodes' + 'data.subgraphEdges'.",
+    "authoritative_source": "frontend/src/lib/graphUtils.ts -> buildHandleId(). When in doubt call get_pipeline_handles(pipeline_id) for an exact list of targetable handles on a given pipeline."
+  },
+  "version": 2,
+  "node_types": [
+    {
+      "type": "source",
+      "label": "Source",
+      "category": "io",
+      "description": "External input (camera, video file, Spout, NDI, Syphon).",
+      "config": {
+        "source_mode": {
+          "type": "enum",
+          "values": ["camera", "video_file", "spout", "ndi", "syphon"]
+        },
+        "source_name": {
+          "type": "string",
+          "description": "Device/file/sender name"
+        },
+        "source_flip_vertical": { "type": "boolean", "default": false }
+      },
+      "outputs": [{ "name": "video", "kind": "stream", "type": "frame" }]
+    },
+    {
+      "type": "pipeline",
+      "label": "Pipeline",
+      "category": "compute",
+      "description": "A generative or transform pipeline. Input parameter handles depend on the pipeline schema — always call get_pipeline_handles(pipeline_id) before wiring edges to a pipeline. The answer from that tool is authoritative.",
+      "config": {
+        "pipeline_id": { "type": "string", "required": true },
+        "tempo_sync": { "type": "boolean", "default": false }
+      },
+      "dynamic_handles": true,
+      "handle_source": "get_pipeline_handles(pipeline_id)",
+      "outputs": [{ "name": "video", "kind": "stream", "type": "frame" }]
+    },
+    {
+      "type": "sink",
+      "label": "Sink",
+      "category": "io",
+      "description": "Output — WebRTC preview by default; set sink_mode for Spout/NDI/Syphon output.",
+      "config": {
+        "sink_mode": {
+          "type": "enum",
+          "optional": true,
+          "values": ["spout", "ndi", "syphon"]
+        },
+        "sink_name": { "type": "string", "optional": true }
+      },
+      "inputs": [{ "name": "video", "kind": "stream", "type": "frame" }]
+    },
+    {
+      "type": "record",
+      "label": "Record",
+      "category": "io",
+      "description": "Records the input stream to an MP4 file. Controlled via start_recording / stop_recording.",
+      "inputs": [{ "name": "video", "kind": "stream", "type": "frame" }]
+    },
+    {
+      "type": "primitive",
+      "label": "Primitive",
+      "category": "data",
+      "description": "Static value producer — strings (prompts), numbers, booleans.",
+      "config": {
+        "valueType": {
+          "type": "enum",
+          "values": ["string", "number", "boolean"]
+        },
+        "value": { "type": "any" }
+      },
+      "outputs": [{ "name": "value", "kind": "parameter", "type": "any" }]
+    },
+    {
+      "type": "slider",
+      "label": "Slider",
+      "category": "control",
+      "description": "Continuous float control with min/max.",
+      "config": {
+        "min": { "type": "number" },
+        "max": { "type": "number" },
+        "default": { "type": "number" }
+      },
+      "outputs": [{ "name": "value", "kind": "parameter", "type": "number" }]
+    },
+    {
+      "type": "knobs",
+      "label": "Knobs",
+      "category": "control",
+      "description": "Bank of rotary knobs (multi-value).",
+      "outputs": [{ "name": "values", "kind": "parameter", "type": "number[]" }]
+    },
+    {
+      "type": "xypad",
+      "label": "XY Pad",
+      "category": "control",
+      "description": "2D positional control.",
+      "outputs": [
+        { "name": "x", "kind": "parameter", "type": "number" },
+        { "name": "y", "kind": "parameter", "type": "number" }
+      ]
+    },
+    {
+      "type": "bool",
+      "label": "Bool",
+      "category": "control",
+      "description": "Toggle.",
+      "outputs": [{ "name": "value", "kind": "parameter", "type": "boolean" }]
+    },
+    {
+      "type": "trigger",
+      "label": "Trigger",
+      "category": "control",
+      "description": "Momentary button — fires a pulse. Wire its 'param:value' into a subgraph input (e.g. 'param:trigger_a') or to a control node's 'param:item_N' switch input.",
+      "outputs": [{ "name": "value", "kind": "parameter", "type": "boolean" }]
+    },
+    {
+      "type": "control",
+      "label": "Control",
+      "category": "control",
+      "description": "Generic controller. In 'switch' mode (data.controlMode = 'switch') it selects among variadic 'param:item_N' inputs (numeric triggers) and emits the corresponding 'param:str_N' string slot on its 'param:value' output. Use this to build prompt-switcher subgraphs.",
+      "dynamic_handles": true,
+      "inputs": [
+        {
+          "name": "item_N",
+          "kind": "parameter",
+          "type": "number",
+          "variadic": true,
+          "note": "Selector triggers — param:item_0, param:item_1, ..."
+        },
+        {
+          "name": "str_N",
+          "kind": "parameter",
+          "type": "string",
+          "variadic": true,
+          "note": "In switch mode only — param:str_0, param:str_1, ... paired with item_N"
+        }
+      ],
+      "outputs": [{ "name": "value", "kind": "parameter", "type": "any" }]
+    },
+    {
+      "type": "math",
+      "label": "Math",
+      "category": "compute",
+      "description": "Arithmetic ops on numeric inputs.",
+      "inputs": [
+        { "name": "a", "kind": "parameter", "type": "number" },
+        { "name": "b", "kind": "parameter", "type": "number" }
+      ],
+      "outputs": [{ "name": "result", "kind": "parameter", "type": "number" }]
+    },
+    {
+      "type": "tuple",
+      "label": "Tuple",
+      "category": "data",
+      "description": "Group several values into a structured bundle.",
+      "inputs": [
+        { "name": "in", "kind": "parameter", "type": "any", "variadic": true }
+      ],
+      "outputs": [{ "name": "out", "kind": "parameter", "type": "tuple" }]
+    },
+    {
+      "type": "reroute",
+      "label": "Reroute",
+      "category": "layout",
+      "description": "Visual re-routing point — passes value through.",
+      "inputs": [{ "name": "in", "kind": "parameter", "type": "any" }],
+      "outputs": [{ "name": "out", "kind": "parameter", "type": "any" }]
+    },
+    {
+      "type": "note",
+      "label": "Note",
+      "category": "layout",
+      "description": "Freeform annotation — not executable."
+    },
+    {
+      "type": "image",
+      "label": "Image",
+      "category": "data",
+      "description": "Static image reference (local file path). Wire 'param:value' into a 'vace' node's 'param:ref_image' (or 'param:first_frame' / 'param:last_frame').",
+      "outputs": [{ "name": "value", "kind": "parameter", "type": "string" }]
+    },
+    {
+      "type": "audio",
+      "label": "Audio",
+      "category": "io",
+      "description": "Audio input/output.",
+      "outputs": [{ "name": "audio", "kind": "stream", "type": "audio" }]
+    },
+    {
+      "type": "vace",
+      "label": "VACE",
+      "category": "conditioning",
+      "description": "VACE reference-image conditioning node. Takes up to three image paths + an optional context_scale modulator, and emits a single aggregate 'param:__vace' output that plugs into a pipeline's 'param:__vace' input (pipeline must have supports_vace=true). Typical wiring: image -> vace.ref_image, then vace.__vace -> pipeline.__vace.",
+      "inputs": [
+        {
+          "name": "ref_image",
+          "kind": "parameter",
+          "type": "string",
+          "note": "File path for reference image. Wire from an 'image' node's 'param:value'."
+        },
+        {
+          "name": "first_frame",
+          "kind": "parameter",
+          "type": "string",
+          "optional": true
+        },
+        {
+          "name": "last_frame",
+          "kind": "parameter",
+          "type": "string",
+          "optional": true
+        },
+        {
+          "name": "context_scale",
+          "kind": "parameter",
+          "type": "number",
+          "optional": true,
+          "note": "Modulate 0..2 via a slider if desired; defaults to the node's internal value."
+        }
+      ],
+      "outputs": [
+        {
+          "name": "__vace",
+          "kind": "parameter",
+          "type": "vace",
+          "note": "Aggregate output — connect ONLY to a pipeline's 'param:__vace' input."
+        }
+      ]
+    },
+    {
+      "type": "lora",
+      "label": "LoRA",
+      "category": "conditioning",
+      "description": "LoRA adapter — attaches to a pipeline, sets scale. Wire 'param:lora' into a pipeline's 'param:__loras' (pipeline must have supports_lora=true).",
+      "outputs": [
+        {
+          "name": "lora",
+          "kind": "parameter",
+          "type": "lora",
+          "note": "Aggregate output — connect ONLY to a pipeline's 'param:__loras' input."
+        }
+      ]
+    },
+    {
+      "type": "midi",
+      "label": "MIDI",
+      "category": "io",
+      "description": "MIDI input (CC / note / clock).",
+      "outputs": [{ "name": "value", "kind": "parameter", "type": "number" }]
+    },
+    {
+      "type": "tempo",
+      "label": "Tempo",
+      "category": "io",
+      "description": "Ableton Link / MIDI-clock tempo source.",
+      "outputs": [
+        { "name": "bpm", "kind": "parameter", "type": "number" },
+        { "name": "phase", "kind": "parameter", "type": "number" }
+      ]
+    },
+    {
+      "type": "subgraph",
+      "label": "Subgraph",
+      "category": "layout",
+      "description": "Nested graph container. Internal nodes live in 'data.subgraphNodes' and internal edges in 'data.subgraphEdges'. The subgraph exposes external ports via 'data.subgraphInputs' and 'data.subgraphOutputs' — each entry has {name, portType: 'param'|'stream', paramType, innerNodeId, innerHandleId}. External edges from/to this subgraph use 'param:<subgraphInputs[].name>' or 'param:<subgraphOutputs[].name>' as the edge handle ID.",
+      "dynamic_handles": true,
+      "structure": {
+        "subgraphNodes": "Array of internal UI nodes (same shape as top-level ui_state.nodes).",
+        "subgraphEdges": "Array of internal edges between subgraphNodes (same shape as ui_state.edges).",
+        "subgraphInputs": "Array of exposed inputs: [{name, portType, paramType, innerNodeId, innerHandleId}].",
+        "subgraphOutputs": "Array of exposed outputs: same shape as subgraphInputs."
+      }
+    },
+    {
+      "type": "subgraph_input",
+      "label": "Subgraph Input",
+      "category": "layout",
+      "description": "Input port of the parent subgraph.",
+      "outputs": [{ "name": "value", "kind": "parameter", "type": "any" }]
+    },
+    {
+      "type": "subgraph_output",
+      "label": "Subgraph Output",
+      "category": "layout",
+      "description": "Output port of the parent subgraph.",
+      "inputs": [{ "name": "value", "kind": "parameter", "type": "any" }]
+    },
+    {
+      "type": "prompt_list",
+      "label": "Prompt List",
+      "category": "prompts",
+      "description": "Ordered list of text prompts, advanced one at a time via a trigger. Ideal for button-driven prompt switching between a fixed set of prompts. Store the prompts in 'data.promptListItems' (string[]); the node renders them in order and emits the current one on 'param:prompt'.",
+      "inputs": [
+        {
+          "name": "trigger",
+          "kind": "parameter",
+          "type": "trigger",
+          "note": "Pulse to advance to the next prompt. Wire a 'trigger' node's 'param:value' (or any value source) here."
+        },
+        {
+          "name": "cycle",
+          "kind": "parameter",
+          "type": "boolean",
+          "note": "When true, advancing past the last item wraps to the first."
+        }
+      ],
+      "outputs": [
+        {
+          "name": "prompt",
+          "kind": "parameter",
+          "type": "string",
+          "note": "Currently-selected prompt. Connect to a pipeline's 'param:__prompt' input."
+        }
+      ]
+    },
+    {
+      "type": "prompt_blend",
+      "label": "Prompt Blend",
+      "category": "prompts",
+      "description": "Blend/interpolate between multiple prompt lists over time.",
+      "dynamic_handles": true,
+      "inputs": [
+        {
+          "name": "prompt_N",
+          "kind": "parameter",
+          "type": "string",
+          "variadic": true
+        },
+        {
+          "name": "weight_N",
+          "kind": "parameter",
+          "type": "number",
+          "variadic": true
+        }
+      ],
+      "outputs": [
+        {
+          "name": "prompts",
+          "kind": "parameter",
+          "type": "string",
+          "note": "Connect to a pipeline's 'param:__prompt' input."
+        }
+      ]
+    },
+    {
+      "type": "scheduler",
+      "label": "Scheduler",
+      "category": "control",
+      "description": "Time-based scheduler — fires values at given beats / times.",
+      "outputs": [{ "name": "value", "kind": "parameter", "type": "any" }]
+    },
+    {
+      "type": "output",
+      "label": "Output",
+      "category": "io",
+      "description": "Alternate sink node used by some UI graphs — prefer 'sink'.",
+      "inputs": [{ "name": "video", "kind": "stream", "type": "frame" }]
+    }
+  ]
+}
diff --git a/frontend/src/lib/agentClient.ts b/frontend/src/lib/agentClient.ts
new file mode 100644
index 000000000..2ca7b2491
--- /dev/null
+++ b/frontend/src/lib/agentClient.ts
@@ -0,0 +1,100 @@
+/**
+ * Streaming client for POST /api/v1/agent/chat (SSE over POST).
+ *
+ * We use fetch + ReadableStream rather than EventSource because EventSource
+ * only supports GET and we want to POST the prompt body. The server emits
+ * standard SSE frames (`event: ...\ndata: ...\n\n`).
+ */
+
+export interface AgentStreamEvent {
+  event: string;
+  data: Record<string, unknown>;
+}
+
+export interface AgentStreamOptions {
+  sessionId?: string | null;
+  isContinuation?: boolean;
+  signal?: AbortSignal;
+  onEvent: (event: AgentStreamEvent) => void;
+}
+
+/**
+ * Open an SSE-over-POST stream to the agent chat endpoint and dispatch events.
+ * Resolves with the session id once the stream closes.
+ */
+export async function streamAgentChat(
+  message: string,
+  opts: AgentStreamOptions
+): Promise<string | null> {
+  const response = await fetch("/api/v1/agent/chat", {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      message,
+      session_id: opts.sessionId ?? null,
+      is_continuation: opts.isContinuation ?? false,
+    }),
+    signal: opts.signal,
+  });
+
+  if (!response.ok) {
+    const body = await response.text().catch(() => "");
+    throw new Error(
+      `Agent chat failed (${response.status}): ${body.slice(0, 300)}`
+    );
+  }
+
+  const sessionId = response.headers.get("X-Agent-Session-Id");
+
+  if (!response.body) {
+    throw new Error("Agent chat stream returned no body");
+  }
+
+  const reader = response.body.pipeThrough(new TextDecoderStream()).getReader();
+
+  let buffer = "";
+  try {
+    for (;;) {
+      const { value, done } = await reader.read();
+      if (done) break;
+      buffer += value;
+
+      let delimIdx = buffer.indexOf("\n\n");
+      while (delimIdx !== -1) {
+        const frame = buffer.slice(0, delimIdx);
+        buffer = buffer.slice(delimIdx + 2);
+        const parsed = parseFrame(frame);
+        if (parsed) opts.onEvent(parsed);
+        delimIdx = buffer.indexOf("\n\n");
+      }
+    }
+  } finally {
+    try {
+      reader.releaseLock();
+    } catch {
+      /* noop */
+    }
+  }
+
+  return sessionId;
+}
+
+function parseFrame(frame: string): AgentStreamEvent | null {
+  let eventName = "message";
+  const dataLines: string[] = [];
+  for (const line of frame.split("\n")) {
+    if (!line || line.startsWith(":")) continue;
+    if (line.startsWith("event:")) {
+      eventName = line.slice(6).trim();
+    } else if (line.startsWith("data:")) {
+      dataLines.push(line.slice(5).trim());
+    }
+  }
+  if (dataLines.length === 0) return null;
+  const joined = dataLines.join("\n");
+  try {
+    return { event: eventName, data: JSON.parse(joined) };
+  } catch {
+    return { event: eventName, data: { raw: joined } };
+  }
+}
diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts
index f419b67b2..3fe8fae51 100644
--- a/frontend/src/lib/api.ts
+++ b/frontend/src/lib/api.ts
@@ -1316,3 +1316,137 @@ export const getTempoSources = async (): Promise<TempoSourcesResponse> => {
   }
   return response.json();
 };
+
+// ---------------------------------------------------------------------------
+// Agent API
+// ---------------------------------------------------------------------------
+
+export type AgentProvider = "anthropic" | "openai_compatible" | "self_hosted";
+
+export interface AgentConfig {
+  provider: AgentProvider;
+  model: string;
+  base_url: string | null;
+}
+
+export interface AgentConfigResponse extends AgentConfig {
+  key_sources: Record<AgentProvider, "env_var" | "stored" | null>;
+}
+
+export interface AgentSessionInfo {
+  id: string;
+  created_at: number;
+  last_activity: number;
+  messages: number;
+  has_pending_proposal: boolean;
+}
+
+export interface AgentDecisionResponse {
+  ok: boolean;
+  session_id: string;
+  proposal_id: string;
+  approved: boolean;
+  next_message: string;
+}
+
+export const getAgentConfig = async (): Promise<AgentConfigResponse> => {
+  const response = await fetch("/api/v1/agent/config");
+  if (!response.ok) {
+    throw new Error(`Failed to get agent config: ${response.statusText}`);
+  }
+  return response.json();
+};
+
+export const updateAgentConfig = async (
+  update: Partial<AgentConfig>
+): Promise<{ ok: boolean; config: AgentConfig }> => {
+  const response = await fetch("/api/v1/agent/config", {
+    method: "PUT",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify(update),
+  });
+  if (!response.ok) {
+    const err = await response.text();
+    throw new Error(`Failed to update agent config: ${err}`);
+  }
+  return response.json();
+};
+
+export const testAgentConnection = async (): Promise<{
+  ok: boolean;
+  error?: string;
+  result?: Record<string, unknown>;
+}> => {
+  const response = await fetch("/api/v1/agent/test-connection", {
+    method: "POST",
+  });
+  if (!response.ok) {
+    const err = await response.text();
+    throw new Error(`Failed to test agent connection: ${err}`);
+  }
+  return response.json();
+};
+
+export const listAgentSessions = async (): Promise<{
+  sessions: AgentSessionInfo[];
+}> => {
+  const response = await fetch("/api/v1/agent/sessions");
+  if (!response.ok) {
+    throw new Error(`Failed to list agent sessions: ${response.statusText}`);
+  }
+  return response.json();
+};
+
+export const deleteAgentSession = async (sessionId: string): Promise<void> => {
+  const response = await fetch(
+    `/api/v1/agent/sessions/${encodeURIComponent(sessionId)}`,
+    { method: "DELETE" }
+  );
+  if (!response.ok) {
+    throw new Error(`Failed to delete agent session: ${response.statusText}`);
+  }
+};
+
+export const decideAgentProposal = async (body: {
+  session_id: string;
+  proposal_id: string;
+  approved: boolean;
+  reason?: string;
+}): Promise<AgentDecisionResponse> => {
+  const response = await fetch("/api/v1/agent/decision", {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify(body),
+  });
+  if (!response.ok) {
+    const err = await response.text();
+    throw new Error(`Failed to record agent decision: ${err}`);
+  }
+  return response.json();
+};
+
+export interface AgentNodeCatalog {
+  version: number;
+  node_types: Array<{
+    type: string;
+    label: string;
+    category: string;
+    description?: string;
+    config?: Record<string, unknown>;
+    inputs?: Array<{
+      name: string;
+      kind: string;
+      type: string;
+      variadic?: boolean;
+    }>;
+    outputs?: Array<{ name: string; kind: string; type: string }>;
+  }>;
+}
+
+export const getAgentNodeCatalog = async (): Promise<AgentNodeCatalog> => {
+  const response = await fetch("/api/v1/agent/node-catalog");
+  if (!response.ok) {
+    throw new Error(`Failed to get agent node catalog: ${response.statusText}`);
+  }
+  return response.json();
+};
diff --git a/frontend/src/pages/StreamPage.tsx b/frontend/src/pages/StreamPage.tsx
index 5486a15c8..960aab1a6 100644
--- a/frontend/src/pages/StreamPage.tsx
+++ b/frontend/src/pages/StreamPage.tsx
@@ -21,6 +21,7 @@ import {
 } from "../lib/workflowSettings";
 import { GraphEditor } from "../components/graph/GraphEditor";
 import type { GraphEditorHandle } from "../components/graph/GraphEditor";
+import { useAgent } from "../contexts/AgentContext";
 import type { TimelinePrompt } from "../components/PromptTimeline";
 import { StatusBar } from "../components/StatusBar";
 import { LogPanel } from "../components/LogPanel";
@@ -345,6 +346,14 @@ export function StreamPage() {
   const [graphMode, setGraphMode] = useState(true);
   const graphEditorRef = useRef<GraphEditorHandle>(null);
 
+  // Let the agent write approved workflow proposals into the canvas.
+  const { registerGraphImporter } = useAgent();
+  useEffect(() => {
+    return registerGraphImporter((graph, label) => {
+      graphEditorRef.current?.loadGraphConfig(graph, label);
+    });
+  }, [registerGraphImporter]);
+
   // When true, pipeline controls are disabled in Perform Mode
   // (set when user edits anything in Graph Mode, cleared when user clicks Clear)
   const [nonLinearGraph, setNonLinearGraph] = useState(false);
diff --git a/pyproject.toml b/pyproject.toml
index dd86c83e9..d32a97338 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -62,6 +62,7 @@ dependencies = [
     "mcp>=1.0.0",
     "livepeer-gateway",
     "protobuf>=5.27.0",
+    "anthropic>=0.40",
 ]
 
 [project.optional-dependencies]
@@ -139,10 +140,19 @@ dev = [
     "twine>=5.0.0",
     "pytest>=8.4.2",
     "freezegun>=1.5.5",
+    # Eval harness
+    "asgi-lifespan>=2.1",
+    "pyyaml>=6.0",
 ]
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
+# Exclude eval tests by default — they hit a live LLM API and cost money.
+# Run on demand with: uv run pytest -m eval
+addopts = "-m 'not eval'"
+markers = [
+    "eval: LLM-backed eval (hits live provider API; excluded from default run)",
+]
 
 [tool.ruff]
 line-length = 88
diff --git a/src/scope/server/agent_loop.py b/src/scope/server/agent_loop.py
new file mode 100644
index 000000000..9e4c9d031
--- /dev/null
+++ b/src/scope/server/agent_loop.py
@@ -0,0 +1,588 @@
+"""Agent loop — orchestrates provider turns, tool dispatch, and SSE emission.
+
+Design notes:
+- ``run_turn`` is an async generator that yields SSE-ready dict payloads.
+  The FastAPI route converts these to ``"event: ...\\ndata: ...\\n\\n"``.
+- Tool results are appended to ``session.messages`` in Anthropic shape so the
+  next provider call has full context.
+- Vision: when ``capture_frame`` returns a base64 JPEG, we wrap it as an image
+  content block inside the tool_result so multimodal models see it.
+- Proposals: ``propose_workflow`` returns normally, the loop emits a
+  ``workflow_proposal`` SSE event to the frontend, and the model's turn
+  continues as usual. Best practice (reinforced in the system prompt) is for
+  the model to stop after proposing so the user can approve/reject. On
+  approval, the frontend starts a new turn with an auto-generated user
+  message, which lets the model call ``apply_workflow``.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from collections.abc import AsyncIterator
+from typing import Any
+
+from fastapi import FastAPI
+
+from .agent_providers import (
+    LLMProvider,
+    ProviderError,
+    TextDelta,
+    ToolUseEnd,
+    ToolUseStart,
+    TurnEnd,
+    build_provider,
+)
+from .agent_state import AgentSession
+from .agent_tool_impls import TOOL_METHODS, AgentTools, build_tool_specs
+
+logger = logging.getLogger(__name__)
+
+
+SYSTEM_PROMPT = """\
+You are the Scope Agent, embedded in Daydream Scope — a real-time generative \
+AI video tool. Your job is to translate the user's intent into working graphs \
+and to iteratively tune parameters by observing output. Favor thoughtful \
+defaults over excessive questioning.
+
+CORE PRINCIPLES
+
+1. Introspect, never guess. Call list_pipelines and get_pipeline_schema \
+before proposing any graph or parameter change — pipeline authors add new \
+fields without warning. Never set a parameter you haven't verified in the \
+current schema.
+
+1a. Honor the user's pipeline name. If the user names a pipeline (\"krea\", \
+\"longlive\", \"ltx\", \"passthrough\", ...), match their word against both \
+the 'id' and 'name' fields in list_pipelines and use that one. If multiple \
+pipelines match, prefer the shortest id. Never silently substitute a \
+different pipeline because it's easier to wire — if the chosen pipeline \
+can't do what the user asked, say so and ask.
+
+2. Prefer composition over reinvention. Use list_blueprints to find \
+pre-built fragments (prompt switcher, LFO, timed cycler, etc.) and graft \
+them into proposals. Only hand-roll nodes when no blueprint fits.
+
+3. Propose ONLY for structural changes. propose_workflow is for adding, \
+removing, or rewiring nodes/edges — graph topology. After calling \
+propose_workflow, write a short text summary and stop; the UI will render \
+Approve/Reject. Approval writes the graph to the canvas automatically; \
+apply_workflow only confirms it. Never call start_session, load_pipeline, \
+or any session-starting tool after a proposal. The user presses Play.
+
+4. Runtime tweaks use update_parameters — NEVER re-propose. Prompts, \
+noise_scale, LoRA weights, VACE scale, prompt_list items, and any other \
+parameter already bound on the canvas are runtime-tunable via \
+update_parameters. Changing a prompt, a slider value, or swapping one \
+entry in a prompt list is NOT a structural change — do not re-propose \
+the whole graph to change text or numbers. If update_parameters returns \
+ok, trust it; do not second-guess with get_current_graph and then \
+re-propose. get_current_graph returns null when no session is running \
+(the user hasn't pressed Play) — that's normal and does NOT mean the \
+canvas is empty, and is not a reason to re-propose.
+
+5. See before you tune. When the user reports a visual issue (\"not \
+recognizing depth\", \"too noisy\", \"wrong style\"), call capture_frame \
+first so you have concrete evidence before adjusting.
+
+6. Be terse. Do NOT narrate your plan, reasoning, or field-to-label \
+mappings before you act. Skip phrases like \"Let me\", \"I'll\", \"Hmm\", \
+\"The field is X (labeled Y in the UI)\", \"Since prompt text lives in \
+the UI graph\", \"that route won't reliably...\", \"Let me update it the \
+right way\". Call the tool; report the result in one sentence. Tool \
+calls are already visible to the user in the chat — you do not need to \
+announce them.
+
+GRAPH SHAPE
+
+A proposed graph has two parts. The backend graph (top-level nodes/edges) \
+carries the runtime flow; it ONLY accepts node types source, pipeline, \
+sink, record. Anything else (triggers, sliders, knobs, primitives, \
+subgraphs, math, LFOs, MIDI, trigger buttons, prompt lists, etc.) is a UI \
+node and MUST live inside ui_state. Do NOT put UI nodes in top-level \
+nodes — pydantic validation will reject the proposal.
+
+  {
+    \"nodes\": [                         // backend only
+      {\"id\": \"input\", \"type\": \"source\", \"source_mode\": \"camera|video_file|ndi|syphon|spout\"},
+      {\"id\": \"pipe\", \"type\": \"pipeline\", \"pipeline_id\": \"longlive\"},
+      {\"id\": \"output\", \"type\": \"sink\"}
+    ],
+    \"edges\": [                         // backend stream/parameter edges
+      {\"from\": \"input\", \"from_port\": \"video\", \"to_node\": \"pipe\", \"to_port\": \"video\", \"kind\": \"stream\"},
+      {\"from\": \"pipe\", \"from_port\": \"video\", \"to_node\": \"output\", \"to_port\": \"video\", \"kind\": \"stream\"}
+    ],
+    \"ui_state\": {                      // frontend-only overlay
+      \"nodes\": [ /* trigger, subgraph, primitive, slider, knobs, math, ... */ ],
+      \"edges\": [ /* wires between UI nodes and into pipeline parameters   */ ]
+    }
+  }
+
+When grafting a blueprint from get_blueprint: copy its nodes/edges into \
+ui_state.nodes / ui_state.edges (NOT top-level). Then wire its outputs \
+into your pipeline's modulatable parameters via ui_state.edges. Use \
+get_current_graph on a loaded workflow to see a concrete example of the \
+split before composing.
+
+Record nodes: add {\"id\": \"rec\", \"type\": \"record\"} at top-level and \
+fan out from pipeline output with a stream edge. Multiple sinks are \
+supported.
+
+WIRING (ui_state.edges)
+
+Every ui_state edge handle has the shape '<kind>:<name>' where kind is \
+either 'param' (discrete value) or 'stream' (frames/audio). The literal \
+prefix 'parameter:' is INVALID — always use 'param:'. If a proposed edge \
+uses any other prefix, the validator will reject the proposal.
+
+Before emitting any edge whose target is a pipeline node, call \
+get_pipeline_handles(pipeline_id). It returns the authoritative list of \
+valid stream_inputs / stream_outputs / param_inputs for that pipeline, \
+including aggregate handles (param:__prompt, param:__vace, param:__loras) \
+that only exist for VACE/LoRA-capable pipelines.
+
+Never fabricate a parameter name. If an expected handle isn't in \
+get_pipeline_handles, it does not exist. In particular: when the user asks \
+for reference-image / image-to-video conditioning, the answer is always \
+the VACE chain (image → vace.param:ref_image + vace.param:__vace → \
+pipeline.param:__vace) — NOT an invented handle like param:i2v_image, \
+param:ref, or param:reference. If the pipeline isn't VACE-capable, say so \
+and propose a VACE-capable alternative.
+
+Canonical patterns (copy verbatim, rename ids as needed):
+
+- Slider → pipeline parameter (e.g. noise_scale):
+    {\"id\":\"e_slider\",\"source\":\"slider_noise\",\"sourceHandle\":\"param:value\",\"target\":\"pipe\",\"targetHandle\":\"param:noise_scale\"}
+
+- Primitive string → pipeline prompt:
+    {\"id\":\"e_prompt\",\"source\":\"prompt_text\",\"sourceHandle\":\"param:value\",\"target\":\"pipe\",\"targetHandle\":\"param:__prompt\"}
+
+- Prompt-list → pipeline prompt (preferred for button-driven switching \
+between a fixed set of prompts — ALWAYS use prompt_list for \"switch \
+between N prompts with a button press\" requests):
+    {\"id\":\"e_plist\",\"source\":\"plist\",\"sourceHandle\":\"param:prompt\",\"target\":\"pipe\",\"targetHandle\":\"param:__prompt\"}
+    {\"id\":\"e_plist_trig\",\"source\":\"next_btn\",\"sourceHandle\":\"param:value\",\"target\":\"plist\",\"targetHandle\":\"param:trigger\"}
+  Set the prompts in the node's data.promptListItems: [\"prompt one\", \
+\"prompt two\", ...]. Use N distinct trigger nodes if the user wants each \
+prompt on its own button, or one trigger to advance through the list.
+
+- Prompt-switcher subgraph → pipeline prompt (fallback — only when a \
+prompt_list + trigger doesn't fit, e.g. time-based / conditional \
+switching. The subgraph must expose an output whose name is referenced \
+here):
+    {\"id\":\"e_switch\",\"source\":\"switcher_sg\",\"sourceHandle\":\"param:prompt\",\"target\":\"pipe\",\"targetHandle\":\"param:__prompt\"}
+
+- Image → VACE → pipeline (two edges — both required):
+    {\"id\":\"e_img_vace\",\"source\":\"ref_img\",\"sourceHandle\":\"param:value\",\"target\":\"vace_1\",\"targetHandle\":\"param:ref_image\"}
+    {\"id\":\"e_vace_pipe\",\"source\":\"vace_1\",\"sourceHandle\":\"param:__vace\",\"target\":\"pipe\",\"targetHandle\":\"param:__vace\"}
+
+Subgraph mechanics: internal nodes live in data.subgraphNodes; internal \
+edges in data.subgraphEdges (same shape as top-level). External ports are \
+declared in data.subgraphInputs / data.subgraphOutputs as \
+[{name, portType: 'param'|'stream', paramType, innerNodeId, \
+innerHandleId}]. External ui_state.edges reference those ports as \
+'param:<name>'. When extending a blueprint's subgraph (e.g. the 3-prompt \
+manual switcher to 5 prompts) you MUST modify BOTH data.subgraphNodes \
+(add the new primitive + extend the inner control's str_N / item_N \
+inputs) AND data.subgraphInputs (so the new trigger port is exposed \
+externally), AND the top-level ui_state.nodes + ui_state.edges so the \
+new trigger buttons actually wire into the new subgraph inputs.
+
+LAYOUT (ui_state.nodes positions)
+
+Top-level nodes (source/pipeline/sink/record) are auto-laid out by the \
+frontend in four columns: source≈x50, pipeline≈x350, sink≈x650, \
+record≈x950, each ~240×200, rows stacked at y=50, 210, 370, .... You \
+do NOT set positions for top-level nodes; leave them absent.
+
+For UI nodes (sliders, triggers, primitives, prompt_list, image, vace, \
+lora, subgraph, ...) you SET position.x / position.y. Follow these rules:
+
+- Use a single input column LEFT of the source column at x=-320. \
+Stack vertically at y=50, 220, 390, 560, .... If you run out of rows, \
+add a second column further left at x=-620.
+- Never place a UI node at x in [0, 1100] — that strip is owned by the \
+top-level columns and will visually collide with them.
+- Never place two UI nodes at the same (x, y), or within 160px \
+vertically in the same column. Every UI node should be treated as at \
+least 240 wide × 140 tall for spacing purposes. Image, vace, and \
+subgraph nodes are taller (≥ 280 tall) — give them 320px vertical \
+gaps.
+- If the workflow has >6 UI nodes, partition by role: primitives + \
+sliders in the x=-320 column, triggers + prompt_list in x=-620, \
+images + vace + lora in x=-920.
+- The canvas fits nodes automatically on import; don't worry about \
+negative x values.
+
+Completeness check (before calling propose_workflow, walk through the \
+user's intent):
+- Did the user name a specific pipeline? Is THAT pipeline (not a \
+substitute) the one in the graph?
+- Are all pipelines the user asked for present? Each wired as \
+source→pipeline→sink?
+- Did the user ask for prompts? Is there a node whose output lands on \
+param:__prompt of the pipeline?
+- Did the user ask to switch between a fixed list of prompts with a \
+button? A 'prompt_list' node (with data.promptListItems set) and a \
+trigger node wired into its param:trigger, with its param:prompt going \
+to the pipeline's param:__prompt?
+- VACE references? An 'image' node per reference, a 'vace' node, edges \
+into param:ref_image / param:first_frame / param:last_frame, and \
+param:__vace → pipeline.param:__vace?
+- LoRAs? 'lora' nodes, their param:lora → pipeline.param:__loras?
+- Sliders/knobs for modulatable parameters called out by name? Wired \
+into param:<that param> on the pipeline?
+- Did the user ask to record, save, or capture output? Add a \
+top-level {\"id\":\"rec\",\"type\":\"record\"} node and a top-level \
+stream edge from the pipeline into it.
+
+If propose_workflow returns issues, read each one, fix the listed edges \
+or nodes, and call propose_workflow again — do NOT apologize to the user \
+and ask them to wire anything by hand.
+
+STYLE
+- One sentence when confirming a tool outcome.
+- No meta-narration. Don't announce what you're about to do, don't \
+explain field-to-label mappings, don't describe your reasoning, don't \
+comment on tool outputs except to report the final result.
+- Tool calls render as their own UI in the chat; don't prefix them \
+with \"Let me call X\" or follow them with \"Calling Y now\".
+- Avoid restating the user's request.
+- Don't apologize unless something actually failed.
+"""
+
+
+def _format_sse(event: str, data: dict) -> str:
+    return f"event: {event}\ndata: {json.dumps(data)}\n\n"
+
+
+async def run_turn(
+    app: FastAPI,
+    session: AgentSession,
+    user_message: str,
+    *,
+    is_system_continuation: bool = False,
+) -> AsyncIterator[str]:
+    """Run one agent turn (may contain multiple provider calls due to tools).
+
+    Yields raw SSE-formatted strings ready to write to the response stream.
+
+    Args:
+        app: the FastAPI app (for in-process httpx ASGITransport).
+        session: the agent session to mutate.
+        user_message: the human-authored user message OR a system
+            continuation message (e.g. "[user approved proposal X]").
+        is_system_continuation: True when the "user" message is actually a
+            synthetic continuation (approval/rejection). Does not change LLM
+            behavior but is tagged so the UI can hide it if it wants.
+    """
+    # 1. Provider build
+    try:
+        provider: LLMProvider = build_provider(session.config_snapshot)
+    except ProviderError as e:
+        yield _format_sse("error", {"message": str(e)})
+        yield _format_sse("turn_end", {"stop_reason": "error"})
+        return
+
+    # 2. Append user message to session history
+    session.messages.append({"role": "user", "content": user_message})
+    session.touch()
+    yield _format_sse(
+        "user_message_appended",
+        {"session_id": session.id, "is_continuation": is_system_continuation},
+    )
+
+    # 3. Tool loop
+    tool_specs = build_tool_specs()
+
+    async with AgentTools(app=app, session=session) as tools:
+        # Inner loop: keep calling provider while it asks for tools.
+        # Cap iterations to avoid runaway loops. Tuned generously:
+        # a single "build a workflow" turn can legitimately chain
+        # inspect state → list pipelines → read schema → check
+        # blueprints → propose → (apply → verify) and each of those
+        # may itself be a few tool calls.
+        MAX_TOOL_ROUNDS = 40
+        for _round in range(MAX_TOOL_ROUNDS):
+            # Accumulate assistant blocks (text + tool_use) so we can append
+            # them to session.messages as one assistant message.
+            assistant_blocks: list[dict] = []
+            tool_uses_this_round: list[dict] = []
+            text_buffer: list[str] = []
+
+            stop_reason = "end_turn"
+            error_message: str | None = None
+
+            try:
+                async for event in provider.stream_turn(
+                    SYSTEM_PROMPT, session.messages, tool_specs
+                ):
+                    if isinstance(event, TextDelta):
+                        text_buffer.append(event.text)
+                        yield _format_sse("text_delta", {"delta": event.text})
+
+                    elif isinstance(event, ToolUseStart):
+                        # Flush any pending text to assistant_blocks.
+                        if text_buffer:
+                            assistant_blocks.append(
+                                {"type": "text", "text": "".join(text_buffer)}
+                            )
+                            text_buffer = []
+                        yield _format_sse(
+                            "tool_call_start",
+                            {"id": event.id, "name": event.name},
+                        )
+
+                    elif isinstance(event, ToolUseEnd):
+                        tool_uses_this_round.append(
+                            {"id": event.id, "name": event.name, "input": event.input}
+                        )
+                        assistant_blocks.append(
+                            {
+                                "type": "tool_use",
+                                "id": event.id,
+                                "name": event.name,
+                                "input": event.input,
+                            }
+                        )
+                        yield _format_sse(
+                            "tool_call_input",
+                            {
+                                "id": event.id,
+                                "name": event.name,
+                                "input": event.input,
+                            },
+                        )
+
+                    elif isinstance(event, TurnEnd):
+                        stop_reason = event.stop_reason
+                        error_message = event.error_message
+                        # flush tail text
+                        if text_buffer:
+                            assistant_blocks.append(
+                                {"type": "text", "text": "".join(text_buffer)}
+                            )
+                            text_buffer = []
+                        break
+            except Exception as e:
+                logger.exception("Provider loop error")
+                yield _format_sse("error", {"message": str(e)})
+                yield _format_sse("turn_end", {"stop_reason": "error"})
+                return
+
+            # Append the assistant turn to history (if non-empty).
+            if assistant_blocks:
+                session.messages.append(
+                    {"role": "assistant", "content": assistant_blocks}
+                )
+
+            # If the provider errored, surface and stop.
+            if stop_reason == "error":
+                yield _format_sse(
+                    "error", {"message": error_message or "provider error"}
+                )
+                yield _format_sse("turn_end", {"stop_reason": "error"})
+                return
+
+            # If no tool calls requested, we're done.
+            if not tool_uses_this_round:
+                yield _format_sse("turn_end", {"stop_reason": stop_reason})
+                return
+
+            # Dispatch tools, collect tool_result blocks.
+            tool_result_blocks: list[dict] = []
+            for call in tool_uses_this_round:
+                result_block = await _dispatch_tool(tools, call)
+                # Emit a user-visible summary for the UI.
+                yield _format_sse(
+                    "tool_call_result",
+                    {
+                        "id": call["id"],
+                        "name": call["name"],
+                        "ok": result_block.get("_ok", True),
+                        "summary": result_block.get("_summary", ""),
+                    },
+                )
+                # Emit workflow_proposal if propose_workflow succeeded.
+                if (
+                    call["name"] == "propose_workflow"
+                    and session.pending_proposal is not None
+                    and session.pending_proposal.id == result_block.get("_proposal_id")
+                ):
+                    pp = session.pending_proposal
+                    yield _format_sse(
+                        "workflow_proposal",
+                        {
+                            "proposal_id": pp.id,
+                            "graph": pp.graph,
+                            "graph_hash": pp.graph_hash_at_propose,
+                            "rationale": pp.rationale,
+                            "pipelines_to_load": sorted(
+                                {
+                                    n.get("pipeline_id")
+                                    for n in pp.graph.get("nodes", [])
+                                    if n.get("type") == "pipeline"
+                                    and n.get("pipeline_id")
+                                }
+                            ),
+                            "diff": pp.diff,
+                        },
+                    )
+                tool_result_blocks.append(result_block["_anthropic_block"])
+
+            # Append the tool_result message and continue the loop.
+            session.messages.append({"role": "user", "content": tool_result_blocks})
+
+        # Hit iteration cap — force stop.
+        yield _format_sse(
+            "error",
+            {"message": f"exceeded {MAX_TOOL_ROUNDS} tool rounds; stopping"},
+        )
+        yield _format_sse("turn_end", {"stop_reason": "max_tokens"})
+
+
+async def _dispatch_tool(tools: AgentTools, call: dict[str, Any]) -> dict:
+    """Call the named tool, return a dict with:
+    - _anthropic_block: the tool_result content block to feed back to the LLM
+    - _summary: short human summary for the UI
+    - _ok: bool for UI status
+    - _proposal_id: only set for propose_workflow (used to emit SSE)
+    """
+    name = call["name"]
+    method_name = TOOL_METHODS.get(name)
+    tool_use_id = call["id"]
+
+    if method_name is None:
+        return _error_result(tool_use_id, f"unknown tool: {name}")
+
+    method = getattr(tools, method_name, None)
+    if method is None:
+        return _error_result(tool_use_id, f"tool not implemented: {name}")
+
+    try:
+        result = await method(**(call.get("input") or {}))
+    except TypeError as e:
+        return _error_result(tool_use_id, f"bad arguments: {e}")
+    except Exception as e:
+        logger.exception(f"tool {name} failed")
+        return _error_result(tool_use_id, f"{type(e).__name__}: {e}")
+
+    # capture_frame → multimodal tool_result.
+    if name == "capture_frame" and isinstance(result, dict) and "base64" in result:
+        text_summary = f"Captured frame ({result.get('size_bytes', 0)} bytes)" + (
+            f" from sink '{result['sink_node_id']}'"
+            if result.get("sink_node_id")
+            else ""
+        )
+        return {
+            "_anthropic_block": {
+                "type": "tool_result",
+                "tool_use_id": tool_use_id,
+                "content": [
+                    {"type": "text", "text": text_summary},
+                    {
+                        "type": "image",
+                        "source": {
+                            "type": "base64",
+                            "media_type": result.get("media_type", "image/jpeg"),
+                            "data": result["base64"],
+                        },
+                    },
+                ],
+            },
+            "_summary": text_summary,
+            "_ok": True,
+        }
+
+    # propose_workflow bookkeeping.
+    proposal_id = None
+    if name == "propose_workflow" and isinstance(result, dict) and result.get("ok"):
+        proposal_id = result.get("proposal_id")
+
+    summary = _summarize_result(name, result)
+    return {
+        "_anthropic_block": {
+            "type": "tool_result",
+            "tool_use_id": tool_use_id,
+            "content": [{"type": "text", "text": json.dumps(result, default=str)}],
+            **(
+                {"is_error": True}
+                if isinstance(result, dict) and result.get("error")
+                else {}
+            ),
+        },
+        "_summary": summary,
+        "_ok": not (isinstance(result, dict) and result.get("error")),
+        "_proposal_id": proposal_id,
+    }
+
+
+def _summarize_result(name: str, result: Any) -> str:
+    if not isinstance(result, dict):
+        return str(result)[:200]
+    if result.get("error"):
+        return f"error: {result['error']}"
+    if name == "list_pipelines":
+        return f"Found {result.get('count', 0)} pipelines"
+    if name == "list_blueprints":
+        return f"Found {result.get('count', 0)} blueprints"
+    if name == "get_pipeline_schema":
+        return f"Schema for pipeline (fields: {len(result.get('config_schema', {}) or {})})"
+    if name == "update_parameters":
+        applied = result.get("applied") or []
+        return f"Applied {len(applied)} parameter(s): {', '.join(applied[:5])}"
+    if name == "propose_workflow":
+        return (
+            f"Proposed workflow {result.get('proposal_id', '?')}: "
+            f"{result.get('rationale', '')[:100]}"
+        )
+    if name == "apply_workflow":
+        return "Workflow applied" if result.get("ok") else "Apply failed"
+    if name == "capture_frame":
+        return f"Captured {result.get('size_bytes', 0)} bytes"
+    return json.dumps(result, default=str)[:200]
+
+
+def _error_result(tool_use_id: str, message: str) -> dict:
+    return {
+        "_anthropic_block": {
+            "type": "tool_result",
+            "tool_use_id": tool_use_id,
+            "content": [{"type": "text", "text": message}],
+            "is_error": True,
+        },
+        "_summary": message,
+        "_ok": False,
+    }
+
+
+# ----------------------------------------------------------------------
+# Decision handling (approve/reject proposals)
+# ----------------------------------------------------------------------
+
+
+def build_decision_continuation_message(
+    approved: bool,
+    proposal_id: str,
+    graph_hash: str,
+    reason: str | None = None,
+) -> str:
+    """Build a synthetic "user message" that kicks off the next turn after a
+    proposal decision. The model reads this, then either calls
+    apply_workflow (on approval) or reconsiders (on rejection).
+    """
+    if approved:
+        return (
+            f"[System] The user approved proposal {proposal_id}. The graph "
+            f"has already been written to the canvas. Call apply_workflow "
+            f'with proposal_id="{proposal_id}" and '
+            f'expected_graph_hash="{graph_hash}" to clear the pending '
+            f"proposal, then briefly confirm (one sentence) that the graph "
+            f"is now on the canvas and the user can press Play. Do NOT "
+            f"call load_pipeline, session start, or any other tool."
+        )
+    return (
+        f"[System] The user rejected proposal {proposal_id}. "
+        f"Reason: {reason or 'no reason given'}. "
+        f"Ask a clarifying question or propose a revised workflow."
+    )
diff --git a/src/scope/server/agent_providers.py b/src/scope/server/agent_providers.py
new file mode 100644
index 000000000..0019be0cb
--- /dev/null
+++ b/src/scope/server/agent_providers.py
@@ -0,0 +1,532 @@
+"""LLM provider abstraction.
+
+Three concrete providers:
+- AnthropicProvider        — official SDK, default.
+- OpenAICompatibleProvider — works with OpenAI, OpenRouter, Groq, together.ai,
+                              Fireworks, vLLM, LM Studio, Ollama (OpenAI-shape).
+- SelfHostedProvider       — thin subclass of OpenAICompatibleProvider tuned
+                              for local endpoints (Ollama default).
+
+All providers yield a uniform ProviderEvent stream so agent_loop doesn't care
+which backend is running. Messages follow Anthropic's shape internally; the
+OpenAI provider translates to/from OpenAI's chat format at the boundary.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from collections.abc import AsyncIterator
+from dataclasses import dataclass
+from typing import Any, Literal, Protocol
+
+import httpx
+
+from .agent_state import AgentConfig, get_provider_key
+
+logger = logging.getLogger(__name__)
+
+
+# ----------------------------------------------------------------------
+# Event types (uniform across providers)
+# ----------------------------------------------------------------------
+
+
+@dataclass
+class TextDelta:
+    text: str
+
+
+@dataclass
+class ToolUseStart:
+    id: str
+    name: str
+
+
+@dataclass
+class ToolUseEnd:
+    id: str
+    name: str
+    input: dict
+
+
+@dataclass
+class TurnEnd:
+    stop_reason: Literal["end_turn", "tool_use", "max_tokens", "stop_sequence", "error"]
+    error_message: str | None = None
+
+
+ProviderEvent = TextDelta | ToolUseStart | ToolUseEnd | TurnEnd
+
+
+class ProviderError(RuntimeError):
+    pass
+
+
+# ----------------------------------------------------------------------
+# Protocol
+# ----------------------------------------------------------------------
+
+
+class LLMProvider(Protocol):
+    async def stream_turn(
+        self,
+        system: str,
+        messages: list[dict],
+        tools: list[dict],
+        *,
+        max_tokens: int = 4096,
+    ) -> AsyncIterator[ProviderEvent]: ...
+
+    async def ping(self) -> dict:
+        """Trivial round-trip for the Settings "Test connection" button."""
+        ...
+
+
+# ----------------------------------------------------------------------
+# Factory
+# ----------------------------------------------------------------------
+
+
+def build_provider(config: AgentConfig) -> LLMProvider:
+    key = get_provider_key(config.provider)
+    if config.provider == "anthropic":
+        if not key:
+            raise ProviderError(
+                "ANTHROPIC_API_KEY not set. Configure it in Settings → Agent."
+            )
+        return AnthropicProvider(
+            api_key=key,
+            model=config.model,
+            base_url=config.base_url,
+        )
+    if config.provider == "openai_compatible":
+        if not key:
+            raise ProviderError(
+                "OpenAI-compatible API key not set. Configure it in Settings → Agent."
+            )
+        return OpenAICompatibleProvider(
+            api_key=key,
+            model=config.model,
+            base_url=config.base_url or "https://api.openai.com/v1",
+        )
+    if config.provider == "self_hosted":
+        # Self-hosted endpoints (Ollama, vLLM, LM Studio) usually don't
+        # require a key — but allow one in case the user fronted their
+        # endpoint with an auth proxy.
+        return SelfHostedProvider(
+            api_key=key or "",
+            model=config.model,
+            base_url=config.base_url or "http://localhost:11434/v1",
+        )
+    raise ProviderError(f"unknown provider: {config.provider}")
+
+
+# ----------------------------------------------------------------------
+# Anthropic
+# ----------------------------------------------------------------------
+
+
+class AnthropicProvider:
+    def __init__(self, api_key: str, model: str, base_url: str | None = None) -> None:
+        # Import lazily so the server still starts if anthropic isn't installed
+        # for people who only use a self-hosted model.
+        from anthropic import AsyncAnthropic
+
+        kwargs: dict[str, Any] = {"api_key": api_key}
+        if base_url:
+            kwargs["base_url"] = base_url
+        self._client = AsyncAnthropic(**kwargs)
+        self._model = model
+
+    async def stream_turn(
+        self,
+        system: str,
+        messages: list[dict],
+        tools: list[dict],
+        *,
+        max_tokens: int = 4096,
+    ) -> AsyncIterator[ProviderEvent]:
+        # Anthropic's tool-use SDK returns streaming events we forward directly.
+        # We rely on the SDK's event iterator instead of re-parsing raw SSE.
+        try:
+            async with self._client.messages.stream(
+                model=self._model,
+                system=system,
+                messages=messages,
+                tools=tools,
+                max_tokens=max_tokens,
+            ) as stream:
+                current_tool: dict | None = None
+                tool_input_buffer: list[str] = []
+
+                async for event in stream:
+                    etype = getattr(event, "type", None)
+
+                    if etype == "content_block_start":
+                        block = getattr(event, "content_block", None)
+                        if (
+                            block is not None
+                            and getattr(block, "type", None) == "tool_use"
+                        ):
+                            current_tool = {
+                                "id": block.id,
+                                "name": block.name,
+                            }
+                            tool_input_buffer = []
+                            yield ToolUseStart(id=block.id, name=block.name)
+
+                    elif etype == "content_block_delta":
+                        delta = getattr(event, "delta", None)
+                        if delta is None:
+                            continue
+                        dtype = getattr(delta, "type", None)
+                        if dtype == "text_delta":
+                            yield TextDelta(text=delta.text)
+                        elif dtype == "input_json_delta":
+                            tool_input_buffer.append(delta.partial_json)
+
+                    elif etype == "content_block_stop":
+                        if current_tool is not None:
+                            raw = "".join(tool_input_buffer).strip() or "{}"
+                            try:
+                                parsed = json.loads(raw)
+                            except Exception:
+                                parsed = {}
+                            yield ToolUseEnd(
+                                id=current_tool["id"],
+                                name=current_tool["name"],
+                                input=parsed,
+                            )
+                            current_tool = None
+                            tool_input_buffer = []
+
+                final = await stream.get_final_message()
+                yield TurnEnd(stop_reason=_safe_stop_reason(final.stop_reason))
+        except Exception as e:
+            logger.exception("Anthropic stream error")
+            yield TurnEnd(stop_reason="error", error_message=str(e))
+
+    async def ping(self) -> dict:
+        msg = await self._client.messages.create(
+            model=self._model,
+            max_tokens=16,
+            messages=[{"role": "user", "content": "Reply with OK."}],
+        )
+        return {
+            "ok": True,
+            "provider": "anthropic",
+            "model": self._model,
+            "sample": _first_text_block(msg),
+        }
+
+
+def _safe_stop_reason(
+    raw: Any,
+) -> Literal["end_turn", "tool_use", "max_tokens", "stop_sequence", "error"]:
+    if raw in ("end_turn", "tool_use", "max_tokens", "stop_sequence"):
+        return raw
+    return "end_turn"
+
+
+def _first_text_block(msg: Any) -> str:
+    for block in getattr(msg, "content", []) or []:
+        if getattr(block, "type", None) == "text":
+            return getattr(block, "text", "")
+    return ""
+
+
+# ----------------------------------------------------------------------
+# OpenAI-compatible
+# ----------------------------------------------------------------------
+
+
+class OpenAICompatibleProvider:
+    def __init__(self, api_key: str, model: str, base_url: str) -> None:
+        self._api_key = api_key
+        self._model = model
+        self._base_url = base_url.rstrip("/")
+
+    async def stream_turn(
+        self,
+        system: str,
+        messages: list[dict],
+        tools: list[dict],
+        *,
+        max_tokens: int = 4096,
+    ) -> AsyncIterator[ProviderEvent]:
+        oai_messages = [{"role": "system", "content": system}]
+        oai_messages.extend(_anthropic_messages_to_openai(messages))
+        oai_tools = [_anthropic_tool_to_openai(t) for t in tools]
+
+        body = {
+            "model": self._model,
+            "messages": oai_messages,
+            "tools": oai_tools,
+            "stream": True,
+            "max_tokens": max_tokens,
+        }
+        headers: dict[str, str] = {"Content-Type": "application/json"}
+        if self._api_key:
+            headers["Authorization"] = f"Bearer {self._api_key}"
+
+        try:
+            async with httpx.AsyncClient(timeout=300.0) as client:
+                async with client.stream(
+                    "POST",
+                    f"{self._base_url}/chat/completions",
+                    json=body,
+                    headers=headers,
+                ) as resp:
+                    if resp.status_code != 200:
+                        text = await resp.aread()
+                        yield TurnEnd(
+                            stop_reason="error",
+                            error_message=(
+                                f"{resp.status_code}: "
+                                f"{text.decode('utf-8', errors='replace')[:500]}"
+                            ),
+                        )
+                        return
+                    async for ev in _parse_openai_stream(resp):
+                        yield ev
+        except Exception as e:
+            logger.exception("OpenAI-compatible stream error")
+            yield TurnEnd(stop_reason="error", error_message=str(e))
+
+    async def ping(self) -> dict:
+        body = {
+            "model": self._model,
+            "messages": [{"role": "user", "content": "Reply with OK."}],
+            "max_tokens": 16,
+        }
+        headers: dict[str, str] = {"Content-Type": "application/json"}
+        if self._api_key:
+            headers["Authorization"] = f"Bearer {self._api_key}"
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            resp = await client.post(
+                f"{self._base_url}/chat/completions", json=body, headers=headers
+            )
+            resp.raise_for_status()
+            data = resp.json()
+            sample = data.get("choices", [{}])[0].get("message", {}).get("content", "")
+            return {
+                "ok": True,
+                "provider": "openai_compatible",
+                "model": self._model,
+                "base_url": self._base_url,
+                "sample": sample,
+            }
+
+
+class SelfHostedProvider(OpenAICompatibleProvider):
+    """Same wire protocol, different defaults."""
+
+    pass
+
+
+# ----------------------------------------------------------------------
+# OpenAI stream parsing
+# ----------------------------------------------------------------------
+
+
+async def _parse_openai_stream(resp: httpx.Response) -> AsyncIterator[ProviderEvent]:
+    """Parse OpenAI-style SSE and yield uniform ProviderEvents.
+
+    OpenAI tool calls are streamed in pieces: the assistant sends a
+    ``tool_calls`` array where each entry has a stable ``index`` and its
+    arguments arrive as partial JSON deltas. We buffer per-index until the
+    finish_reason lands, then emit ToolUseEnd(s).
+    """
+    tool_calls_by_index: dict[int, dict] = {}
+    saw_any_text = False
+    stop_reason: Literal[
+        "end_turn", "tool_use", "max_tokens", "stop_sequence", "error"
+    ] = "end_turn"
+
+    async for raw_line in resp.aiter_lines():
+        if not raw_line:
+            continue
+        line = raw_line.strip()
+        if not line.startswith("data:"):
+            continue
+        payload = line[5:].strip()
+        if payload == "[DONE]":
+            break
+        try:
+            event = json.loads(payload)
+        except Exception:
+            continue
+
+        choices = event.get("choices") or []
+        if not choices:
+            continue
+        choice = choices[0]
+        delta = choice.get("delta") or {}
+
+        content = delta.get("content")
+        if content:
+            saw_any_text = True
+            yield TextDelta(text=content)
+
+        for tc in delta.get("tool_calls") or []:
+            idx = tc.get("index", 0)
+            bucket = tool_calls_by_index.setdefault(
+                idx, {"id": None, "name": None, "args": []}
+            )
+            if tc.get("id"):
+                bucket["id"] = tc["id"]
+            fn = tc.get("function") or {}
+            if fn.get("name"):
+                bucket["name"] = fn["name"]
+                if bucket["id"] is None:
+                    # Some servers drop the id; synthesize a stable one.
+                    bucket["id"] = f"oai_tool_{idx}"
+                yield ToolUseStart(id=bucket["id"], name=bucket["name"])
+            if "arguments" in fn:
+                bucket["args"].append(fn["arguments"] or "")
+
+        finish = choice.get("finish_reason")
+        if finish is not None:
+            if finish == "tool_calls":
+                stop_reason = "tool_use"
+            elif finish == "length":
+                stop_reason = "max_tokens"
+            elif finish == "stop":
+                stop_reason = "end_turn"
+            break
+
+    # Emit any buffered tool calls.
+    for idx, bucket in sorted(tool_calls_by_index.items()):
+        raw = "".join(bucket["args"]).strip() or "{}"
+        try:
+            parsed = json.loads(raw)
+        except Exception:
+            parsed = {}
+        tid = bucket["id"] or f"oai_tool_{idx}"
+        name = bucket["name"] or ""
+        if name:
+            yield ToolUseEnd(id=tid, name=name, input=parsed)
+
+    if not saw_any_text and not tool_calls_by_index and stop_reason == "end_turn":
+        stop_reason = "end_turn"
+    yield TurnEnd(stop_reason=stop_reason)
+
+
+# ----------------------------------------------------------------------
+# Anthropic <-> OpenAI message/tool translation
+# ----------------------------------------------------------------------
+
+
+def _anthropic_tool_to_openai(tool: dict) -> dict:
+    return {
+        "type": "function",
+        "function": {
+            "name": tool["name"],
+            "description": tool.get("description", ""),
+            "parameters": tool.get("input_schema")
+            or {"type": "object", "properties": {}},
+        },
+    }
+
+
+def _anthropic_messages_to_openai(messages: list[dict]) -> list[dict]:
+    """Translate Anthropic-shape messages to OpenAI chat format.
+
+    Anthropic messages are {role, content: [blocks]} where content blocks may
+    be text / tool_use (assistant) / tool_result (user). OpenAI splits these:
+    - assistant messages with tool_calls  (function-call request)
+    - `tool` role messages for each tool_result (one per tool_use_id)
+
+    Images inside tool_result are passed as OpenAI content parts with
+    image_url base64 data URIs when present.
+    """
+    out: list[dict] = []
+    for msg in messages:
+        role = msg.get("role")
+        content = msg.get("content")
+
+        if role == "user":
+            # User messages can carry either text or tool_result blocks.
+            if isinstance(content, str):
+                out.append({"role": "user", "content": content})
+                continue
+
+            plain_parts: list[Any] = []
+            for block in content or []:
+                btype = block.get("type")
+                if btype == "text":
+                    plain_parts.append({"type": "text", "text": block.get("text", "")})
+                elif btype == "tool_result":
+                    # Convert to a dedicated 'tool' role message.
+                    out.append(_tool_result_to_oai_msg(block))
+                elif btype == "image":
+                    src = block.get("source") or {}
+                    if src.get("type") == "base64":
+                        data_uri = (
+                            f"data:{src.get('media_type', 'image/jpeg')};base64,"
+                            f"{src.get('data', '')}"
+                        )
+                        plain_parts.append(
+                            {"type": "image_url", "image_url": {"url": data_uri}}
+                        )
+            if plain_parts:
+                out.append({"role": "user", "content": plain_parts})
+
+        elif role == "assistant":
+            if isinstance(content, str):
+                out.append({"role": "assistant", "content": content})
+                continue
+
+            text_parts: list[str] = []
+            tool_calls: list[dict] = []
+            for block in content or []:
+                btype = block.get("type")
+                if btype == "text":
+                    text_parts.append(block.get("text", ""))
+                elif btype == "tool_use":
+                    tool_calls.append(
+                        {
+                            "id": block.get("id", ""),
+                            "type": "function",
+                            "function": {
+                                "name": block.get("name", ""),
+                                "arguments": json.dumps(block.get("input") or {}),
+                            },
+                        }
+                    )
+            asst_msg: dict = {"role": "assistant", "content": "".join(text_parts)}
+            if tool_calls:
+                asst_msg["tool_calls"] = tool_calls
+            out.append(asst_msg)
+
+    return out
+
+
+def _tool_result_to_oai_msg(block: dict) -> dict:
+    """Convert an Anthropic tool_result block to an OpenAI 'tool' role msg.
+
+    OpenAI only supports text content in tool messages today; we stringify
+    images (they've already been fed back to the model as an earlier image
+    block in Anthropic-land, but here we flatten)."""
+    result = block.get("content")
+    if isinstance(result, str):
+        return {
+            "role": "tool",
+            "tool_call_id": block.get("tool_use_id", ""),
+            "content": result,
+        }
+    parts: list[str] = []
+    for item in result or []:
+        if isinstance(item, dict):
+            if item.get("type") == "text":
+                parts.append(item.get("text", ""))
+            elif item.get("type") == "image":
+                parts.append("[image attached in prior context]")
+        else:
+            parts.append(str(item))
+    return {
+        "role": "tool",
+        "tool_call_id": block.get("tool_use_id", ""),
+        "content": "\n".join(parts) or "(empty)",
+    }
diff --git a/src/scope/server/agent_state.py b/src/scope/server/agent_state.py
new file mode 100644
index 000000000..dc63b82b9
--- /dev/null
+++ b/src/scope/server/agent_state.py
@@ -0,0 +1,294 @@
+"""Agent session + provider config state (in-memory, single-process).
+
+MVP scope:
+- AgentSession: full conversation history per session_id, in memory only.
+- AgentConfig: which provider + model to use (persisted to disk so users don't
+  have to re-enter settings every server restart).
+- WorkflowProposal: pending proposal awaiting user decision.
+
+No database, no cross-process sharing, no persistence of chat history. Sessions
+are evicted after 1h idle. This matches the "MVP" posture in the plan.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import os
+import time
+import uuid
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Literal
+
+logger = logging.getLogger(__name__)
+
+# Config file for provider selection (persisted across restarts).
+AGENT_CONFIG_FILE = "~/.daydream-scope/agent_config.json"
+
+# Provider token files (api keys), same 0o600 pattern as CivitAI.
+ANTHROPIC_TOKEN_FILE = "~/.daydream-scope/anthropic_token"
+OPENAI_TOKEN_FILE = "~/.daydream-scope/openai_token"
+LLM_CUSTOM_TOKEN_FILE = "~/.daydream-scope/llm_custom_token"
+
+# Env var names checked before stored files.
+ANTHROPIC_ENV = "ANTHROPIC_API_KEY"
+OPENAI_ENV = "OPENAI_API_KEY"
+LLM_CUSTOM_ENV = "LLM_API_KEY"
+
+ProviderKind = Literal["anthropic", "openai_compatible", "self_hosted"]
+
+
+# ----------------------------------------------------------------------
+# Provider config
+# ----------------------------------------------------------------------
+
+
+@dataclass
+class AgentConfig:
+    """Provider config persisted to ~/.daydream-scope/agent_config.json."""
+
+    provider: ProviderKind = "anthropic"
+    model: str = "claude-sonnet-4-6"
+    base_url: str | None = None  # Optional override for any provider
+    # The actual API key is NOT stored here; it's resolved at call time from
+    # env var or disk.
+
+    def to_json(self) -> dict[str, Any]:
+        return {
+            "provider": self.provider,
+            "model": self.model,
+            "base_url": self.base_url,
+        }
+
+    @classmethod
+    def from_json(cls, data: dict[str, Any]) -> AgentConfig:
+        return cls(
+            provider=data.get("provider", "anthropic"),
+            model=data.get("model", "claude-sonnet-4-6"),
+            base_url=data.get("base_url"),
+        )
+
+
+def _config_path() -> Path:
+    return Path(AGENT_CONFIG_FILE).expanduser().resolve()
+
+
+def load_agent_config() -> AgentConfig:
+    path = _config_path()
+    if not path.exists():
+        return AgentConfig()
+    try:
+        return AgentConfig.from_json(json.loads(path.read_text()))
+    except Exception as e:
+        logger.warning(f"Failed to load agent config, using defaults: {e}")
+        return AgentConfig()
+
+
+def save_agent_config(cfg: AgentConfig) -> None:
+    path = _config_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(cfg.to_json(), indent=2))
+
+
+# ----------------------------------------------------------------------
+# Provider API key resolution
+# ----------------------------------------------------------------------
+
+
+def _resolve_key(env_var: str, token_file: str) -> str | None:
+    env = os.environ.get(env_var)
+    if env:
+        return env.strip() or None
+    path = Path(token_file).expanduser().resolve()
+    if not path.exists():
+        return None
+    try:
+        return path.read_text().strip() or None
+    except Exception as e:
+        logger.warning(f"Failed to read token file {path}: {e}")
+        return None
+
+
+def _save_key(token_file: str, value: str) -> None:
+    path = Path(token_file).expanduser().resolve()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(value)
+    path.chmod(0o600)
+
+
+def _delete_key(token_file: str) -> None:
+    path = Path(token_file).expanduser().resolve()
+    if path.exists():
+        path.unlink()
+
+
+def get_provider_key(provider: ProviderKind) -> str | None:
+    if provider == "anthropic":
+        return _resolve_key(ANTHROPIC_ENV, ANTHROPIC_TOKEN_FILE)
+    if provider == "openai_compatible":
+        return _resolve_key(OPENAI_ENV, OPENAI_TOKEN_FILE)
+    if provider == "self_hosted":
+        return _resolve_key(LLM_CUSTOM_ENV, LLM_CUSTOM_TOKEN_FILE)
+    return None
+
+
+def set_provider_key(provider: ProviderKind, value: str) -> None:
+    if provider == "anthropic":
+        _save_key(ANTHROPIC_TOKEN_FILE, value)
+    elif provider == "openai_compatible":
+        _save_key(OPENAI_TOKEN_FILE, value)
+    elif provider == "self_hosted":
+        _save_key(LLM_CUSTOM_TOKEN_FILE, value)
+
+
+def delete_provider_key(provider: ProviderKind) -> None:
+    if provider == "anthropic":
+        _delete_key(ANTHROPIC_TOKEN_FILE)
+    elif provider == "openai_compatible":
+        _delete_key(OPENAI_TOKEN_FILE)
+    elif provider == "self_hosted":
+        _delete_key(LLM_CUSTOM_TOKEN_FILE)
+
+
+def get_provider_key_source(provider: ProviderKind) -> str | None:
+    """Return "env_var" | "stored" | None."""
+    env_var, token_file = {
+        "anthropic": (ANTHROPIC_ENV, ANTHROPIC_TOKEN_FILE),
+        "openai_compatible": (OPENAI_ENV, OPENAI_TOKEN_FILE),
+        "self_hosted": (LLM_CUSTOM_ENV, LLM_CUSTOM_TOKEN_FILE),
+    }[provider]
+    if os.environ.get(env_var):
+        return "env_var"
+    if Path(token_file).expanduser().exists():
+        return "stored"
+    return None
+
+
+# ----------------------------------------------------------------------
+# Workflow proposal
+# ----------------------------------------------------------------------
+
+
+@dataclass
+class WorkflowProposal:
+    id: str
+    graph: dict
+    graph_hash_at_propose: str
+    rationale: str
+    pipeline_load_params: dict = field(default_factory=dict)
+    input_mode: str = "video"
+    diff: dict = field(default_factory=dict)
+    approved: bool = False
+    decision_feedback: str | None = None  # e.g. rejection reason
+
+
+# ----------------------------------------------------------------------
+# Agent session
+# ----------------------------------------------------------------------
+
+
+# Messages follow Anthropic's shape: {role: "user"|"assistant", content: [blocks]}.
+# Blocks: {"type": "text", "text": "..."}, {"type": "tool_use", "id", "name", "input"},
+# {"type": "tool_result", "tool_use_id", "content": [...]}.
+Message = dict[str, Any]
+
+
+@dataclass
+class AgentSession:
+    id: str
+    created_at: float
+    last_activity: float
+    config_snapshot: AgentConfig
+    messages: list[Message] = field(default_factory=list)
+    pending_proposal: WorkflowProposal | None = None
+
+    def touch(self) -> None:
+        self.last_activity = time.time()
+
+
+class AgentSessionStore:
+    """In-memory store with background TTL eviction (1 hour idle)."""
+
+    def __init__(self, idle_ttl_seconds: float = 3600.0) -> None:
+        self._sessions: dict[str, AgentSession] = {}
+        self._idle_ttl = idle_ttl_seconds
+        self._lock = asyncio.Lock()
+        self._janitor_task: asyncio.Task | None = None
+
+    def start_janitor(self) -> None:
+        if self._janitor_task is None or self._janitor_task.done():
+            self._janitor_task = asyncio.create_task(self._janitor_loop())
+
+    async def stop_janitor(self) -> None:
+        if self._janitor_task is not None:
+            self._janitor_task.cancel()
+            try:
+                await self._janitor_task
+            except (asyncio.CancelledError, Exception):
+                pass
+            self._janitor_task = None
+
+    async def _janitor_loop(self) -> None:
+        while True:
+            try:
+                await asyncio.sleep(60.0)
+                now = time.time()
+                async with self._lock:
+                    stale = [
+                        sid
+                        for sid, s in self._sessions.items()
+                        if now - s.last_activity > self._idle_ttl
+                    ]
+                    for sid in stale:
+                        logger.info(f"Evicting idle agent session {sid}")
+                        del self._sessions[sid]
+            except asyncio.CancelledError:
+                return
+            except Exception as e:
+                logger.warning(f"Agent session janitor error: {e}")
+
+    async def create(self, config: AgentConfig) -> AgentSession:
+        session = AgentSession(
+            id=f"agent_{uuid.uuid4().hex[:12]}",
+            created_at=time.time(),
+            last_activity=time.time(),
+            config_snapshot=config,
+        )
+        async with self._lock:
+            self._sessions[session.id] = session
+        return session
+
+    async def get(self, session_id: str) -> AgentSession | None:
+        async with self._lock:
+            session = self._sessions.get(session_id)
+            if session is not None:
+                session.touch()
+            return session
+
+    async def get_or_create(
+        self, session_id: str | None, config: AgentConfig
+    ) -> AgentSession:
+        if session_id:
+            existing = await self.get(session_id)
+            if existing is not None:
+                return existing
+        return await self.create(config)
+
+    async def delete(self, session_id: str) -> bool:
+        async with self._lock:
+            return self._sessions.pop(session_id, None) is not None
+
+    async def list(self) -> list[dict]:
+        async with self._lock:
+            return [
+                {
+                    "id": s.id,
+                    "created_at": s.created_at,
+                    "last_activity": s.last_activity,
+                    "messages": len(s.messages),
+                    "has_pending_proposal": s.pending_proposal is not None,
+                }
+                for s in self._sessions.values()
+            ]
diff --git a/src/scope/server/agent_tool_impls.py b/src/scope/server/agent_tool_impls.py
new file mode 100644
index 000000000..ece1ae1d5
--- /dev/null
+++ b/src/scope/server/agent_tool_impls.py
@@ -0,0 +1,1623 @@
+"""Shared tool implementations for the Scope agent.
+
+This module exposes Scope's capabilities as agent tools. It uses an in-process
+httpx client (via ``httpx.ASGITransport``) to call the FastAPI app's existing
+HTTP endpoints. This matches the pattern used by ``mcp_server.py`` (which hits
+the same endpoints over loopback HTTP), keeping behavior consistent between
+external MCP clients and the in-app agent.
+
+Each tool method:
+- Returns a JSON-serializable dict (plus base64 image for capture_frame).
+- Keeps payloads small so tool results stay well under provider context limits.
+
+The loop (``agent_loop.py``) owns provider/session/SSE concerns. This module
+is intentionally side-effect-light and does not know about Anthropic or SSE.
+"""
+
+from __future__ import annotations
+
+import base64
+import hashlib
+import json
+import logging
+import re
+import uuid
+from pathlib import Path
+from typing import Any
+
+import httpx
+from fastapi import FastAPI
+
+from .agent_state import AgentSession, WorkflowProposal
+from .graph_schema import GraphConfig
+
+# Valid ui_state edge handle IDs look like 'param:noise_scale', 'stream:video',
+# 'param:__vace', 'param:trigger_a'. The literal prefix 'parameter:' is invalid.
+_HANDLE_RE = re.compile(r"^(param|stream):[A-Za-z0-9_.\-]+$")
+
+logger = logging.getLogger(__name__)
+
+
+# Maximum size (bytes) for a tool-result JSON payload sent to the model.
+# Truncate more aggressively on anything that lists files.
+MAX_PAYLOAD_BYTES = 16 * 1024
+
+
+def _truncate_list(items: list, cap: int = 40) -> tuple[list, dict | None]:
+    """Cap a list and return (items, info_or_none)."""
+    if len(items) <= cap:
+        return items, None
+    return items[:cap], {"truncated": True, "total": len(items), "shown": cap}
+
+
+def _canonical_graph_hash(graph: dict) -> str:
+    """Stable sha256 of a canonical JSON form. Used to detect graph edits."""
+    blob = json.dumps(graph, sort_keys=True, separators=(",", ":"))
+    return hashlib.sha256(blob.encode()).hexdigest()[:16]
+
+
+# Frontend layout constants (mirror frontend/src/lib/graphUtils.ts).
+# Top-level nodes (source/pipeline/sink/record) get auto-laid-out by the
+# frontend at START_X + COLUMN_GAP * col_index, rows at
+# START_Y + i * (NODE_HEIGHT + ROW_GAP). UI-state nodes keep whatever
+# position the agent set — that's where overlaps happen when the agent
+# picks positive x coordinates that collide with the top-level columns.
+_FE_START_X = 50
+_FE_START_Y = 50
+_FE_COLUMN_GAP = 300
+_FE_ROW_GAP = 100
+_FE_NODE_WIDTH = 200
+_FE_NODE_HEIGHT = 60
+
+# Default bounding box for UI-state nodes on the canvas. React Flow sizes
+# them dynamically, but these are the conservative sizes we use for
+# overlap detection. Tall-content nodes (image preview, vace with
+# thumbnail, subgraph with header) are taller.
+_UI_NODE_W = 240
+_UI_NODE_H_DEFAULT = 140
+_UI_NODE_H_TALL = 280
+_UI_TALL_TYPES = frozenset({"image", "vace", "subgraph"})
+
+# Where reflow drops UI nodes: three columns to the LEFT of the
+# top-level auto-layout strip so they can't collide with sources (x=50),
+# pipelines (x=350), sinks (x=650), or records (x=950).
+_REFLOW_COL_X = [-320, -620, -920]
+_REFLOW_START_Y = 50
+_REFLOW_ROW_GAP = 160
+_REFLOW_ROW_GAP_TALL = 320
+# How the three columns are populated by node type.
+_REFLOW_COL_BY_TYPE = {
+    # Column 0 (closest to the top-level strip): small value-injecting
+    # controls a user pokes during a performance.
+    "slider": 0,
+    "primitive": 0,
+    "math": 0,
+    "trigger": 0,
+    # Column 1: text / list controls.
+    "prompt_list": 1,
+    "prompt_input": 1,
+    # Column 2: bulky media nodes.
+    "image": 2,
+    "vace": 2,
+    "lora": 2,
+    "subgraph": 2,
+}
+
+
+def _ui_node_height(node_type: str | None) -> int:
+    return _UI_NODE_H_TALL if node_type in _UI_TALL_TYPES else _UI_NODE_H_DEFAULT
+
+
+def _rects_overlap(
+    a: tuple[float, float, float, float],
+    b: tuple[float, float, float, float],
+) -> bool:
+    """AABB overlap (strict — touching edges don't count)."""
+    ax, ay, aw, ah = a
+    bx, by, bw, bh = b
+    return not (ax + aw <= bx or bx + bw <= ax or ay + ah <= by or by + bh <= ay)
+
+
+def _top_level_rects(
+    backend_nodes: list[dict],
+) -> list[tuple[float, float, float, float]]:
+    """Rectangles where the frontend WILL place top-level nodes.
+
+    Matches the four-column layout in ``graphConfigToFlow``: sources go in
+    column 0 (x=50), pipelines column 1 (x=350), sinks column 2 (x=650),
+    records column 3 (x=950). We don't know the exact row indices the
+    frontend will pick, but we know each column has height-padding of
+    160 per slot starting at y=50, so we model it as a tall strip
+    covering the full column.
+    """
+    type_to_col = {"source": 0, "pipeline": 1, "sink": 2, "record": 3}
+    counts: dict[int, int] = {}
+    for n in backend_nodes:
+        col = type_to_col.get(n.get("type"))
+        if col is None:
+            continue
+        counts[col] = counts.get(col, 0) + 1
+    rects: list[tuple[float, float, float, float]] = []
+    for col, count in counts.items():
+        x = _FE_START_X + col * _FE_COLUMN_GAP
+        # Model the strip as a single rectangle that covers every row the
+        # frontend will drop nodes into for this column.
+        height = max(1, count) * (_FE_NODE_HEIGHT + _FE_ROW_GAP)
+        rects.append(
+            (float(x), float(_FE_START_Y), float(_FE_NODE_WIDTH), float(height))
+        )
+    return rects
+
+
+def _has_layout_conflict(
+    ui_nodes: list[dict],
+    top_rects: list[tuple[float, float, float, float]],
+) -> bool:
+    """True if any UI node overlaps another UI node OR a top-level rect."""
+    ui_rects: list[tuple[tuple[float, float, float, float], str]] = []
+    for n in ui_nodes:
+        pos = n.get("position") or {}
+        try:
+            x = float(pos.get("x", 0))
+            y = float(pos.get("y", 0))
+        except (TypeError, ValueError):
+            # Missing/invalid position — treat as a conflict so reflow runs.
+            return True
+        h = _ui_node_height(n.get("type"))
+        ui_rects.append(((x, y, float(_UI_NODE_W), float(h)), n.get("id") or ""))
+
+    for i, (ra, _ida) in enumerate(ui_rects):
+        for rb in top_rects:
+            if _rects_overlap(ra, rb):
+                return True
+        for j in range(i + 1, len(ui_rects)):
+            rb, _idb = ui_rects[j]
+            if _rects_overlap(ra, rb):
+                return True
+    return False
+
+
+def _reflow_ui_nodes(graph: dict) -> dict:
+    """Safety net: if the agent's UI-state positions overlap each other or
+    collide with the frontend's top-level auto-layout strip, reassign all
+    UI-state node positions into three deterministic columns to the LEFT
+    of the top-level strip.
+
+    Preserves the agent's positions when nothing overlaps. Mutates the
+    graph in place and returns it.
+    """
+    ui_state = graph.get("ui_state")
+    if not isinstance(ui_state, dict):
+        return graph
+    ui_nodes = ui_state.get("nodes") or []
+    if not ui_nodes:
+        return graph
+
+    backend_nodes = graph.get("nodes") or []
+    top_rects = _top_level_rects(backend_nodes)
+
+    if not _has_layout_conflict(ui_nodes, top_rects):
+        return graph
+
+    # Conflict detected: do a fresh deterministic layout. Assign each
+    # node to a column (by type), then stack vertically within the
+    # column, giving tall types extra breathing room.
+    column_ys: dict[int, float] = {
+        i: float(_REFLOW_START_Y) for i in range(len(_REFLOW_COL_X))
+    }
+
+    # Group by column in declaration order so sibling nodes of the same
+    # type stay neighbors.
+    for node in ui_nodes:
+        t = node.get("type") or ""
+        col = _REFLOW_COL_BY_TYPE.get(t, 0)
+        col = max(0, min(col, len(_REFLOW_COL_X) - 1))
+        y = column_ys[col]
+        x = _REFLOW_COL_X[col]
+        node["position"] = {"x": float(x), "y": float(y)}
+        gap = _REFLOW_ROW_GAP_TALL if t in _UI_TALL_TYPES else _REFLOW_ROW_GAP
+        column_ys[col] = y + gap
+
+    return graph
+
+
+class AgentTools:
+    """Callable bundle of tools the agent can invoke.
+
+    One instance per AgentLoop turn. Holds a short-lived httpx client wired to
+    the FastAPI app via ASGITransport so calls go in-process (no socket hop).
+    """
+
+    def __init__(self, app: FastAPI, session: AgentSession) -> None:
+        self._app = app
+        self._session = session
+        self._client: httpx.AsyncClient | None = None
+
+    async def __aenter__(self) -> AgentTools:
+        transport = httpx.ASGITransport(app=self._app)
+        self._client = httpx.AsyncClient(
+            transport=transport,
+            base_url="http://scope-agent.local",
+            timeout=300.0,
+        )
+        return self
+
+    async def __aexit__(self, exc_type, exc, tb) -> None:
+        if self._client is not None:
+            await self._client.aclose()
+            self._client = None
+
+    def _c(self) -> httpx.AsyncClient:
+        assert self._client is not None, "AgentTools must be used as async ctx mgr"
+        return self._client
+
+    # ------------------------------------------------------------------
+    # Discovery
+    # ------------------------------------------------------------------
+
+    async def list_pipelines(self) -> dict:
+        """Return all registered pipelines with minimal metadata.
+
+        Includes plugin-provided pipelines (e.g. ltx2, helios) — they register
+        against the same registry and appear here with no special-casing.
+        """
+        resp = await self._c().get("/api/v1/pipelines/schemas")
+        resp.raise_for_status()
+        data = resp.json()
+
+        schemas = data.get("pipelines", {}) or {}
+        pipelines = []
+        for pipeline_id, schema in schemas.items():
+            pipelines.append(
+                {
+                    "id": pipeline_id,
+                    "name": schema.get("name"),
+                    "description": schema.get("description"),
+                    "supports_prompts": schema.get("supports_prompts", False),
+                    "supports_lora": schema.get("supports_lora", False),
+                    "supports_vace": schema.get("supports_vace", False),
+                    "supported_modes": schema.get("supported_modes", []),
+                }
+            )
+        return {"pipelines": pipelines, "count": len(pipelines)}
+
+    async def get_pipeline_schema(self, pipeline_id: str) -> dict:
+        """Return the full Pydantic schema for a pipeline, including UI hints.
+
+        This is the authoritative source of truth for what fields the agent
+        can safely set via update_parameters or include in a workflow.
+        """
+        resp = await self._c().get("/api/v1/pipelines/schemas")
+        resp.raise_for_status()
+        schemas = resp.json().get("pipelines", {}) or {}
+        schema = schemas.get(pipeline_id)
+        if schema is None:
+            return {
+                "error": f"pipeline '{pipeline_id}' not found",
+                "available": sorted(schemas.keys()),
+            }
+        return schema
+
+    async def get_pipeline_handles(self, pipeline_id: str) -> dict:
+        """Return the exact React Flow handle IDs available on a pipeline node.
+
+        The agent MUST call this before writing any ui_state edge whose target
+        is a pipeline: the answer tells it which ``param:<name>`` and
+        ``stream:<name>`` handles actually exist on the node, including
+        aggregate handles (``param:__prompt`` / ``param:__vace`` /
+        ``param:__loras``) that only appear when the pipeline declares the
+        matching capability.
+
+        Derived from the pipeline's config_schema + supports_* flags. Mirrors
+        the frontend's ``extractParameterPorts`` in ``graphUtils.ts``.
+        """
+        resp = await self._c().get("/api/v1/pipelines/schemas")
+        resp.raise_for_status()
+        schemas = resp.json().get("pipelines", {}) or {}
+        schema = schemas.get(pipeline_id)
+        if schema is None:
+            return {
+                "error": f"pipeline '{pipeline_id}' not found",
+                "available": sorted(schemas.keys()),
+            }
+        return _derive_pipeline_handles(pipeline_id, schema)
+
+    async def list_loras(self) -> dict:
+        resp = await self._c().get("/api/v1/loras")
+        resp.raise_for_status()
+        data = resp.json()
+        files = data.get("files", []) or []
+        files, info = _truncate_list(files, cap=50)
+        summary = {
+            "loras": [
+                {"name": f.get("name"), "path": f.get("path"), "size": f.get("size")}
+                for f in files
+            ]
+        }
+        if info:
+            summary["pagination"] = info
+        return summary
+
+    async def list_assets(self) -> dict:
+        resp = await self._c().get("/api/v1/assets")
+        resp.raise_for_status()
+        data = resp.json()
+        assets = data.get("assets", data.get("files", [])) or []
+        assets, info = _truncate_list(assets, cap=50)
+        summary = {
+            "assets": [
+                {
+                    "name": a.get("name"),
+                    "path": a.get("path"),
+                    "type": a.get("type"),
+                    "size": a.get("size"),
+                }
+                for a in assets
+            ]
+        }
+        if info:
+            summary["pagination"] = info
+        return summary
+
+    async def list_plugins(self) -> dict:
+        resp = await self._c().get("/api/v1/plugins")
+        resp.raise_for_status()
+        data = resp.json()
+        plugins = data.get("plugins", []) or []
+        return {
+            "plugins": [
+                {
+                    "name": p.get("name"),
+                    "version": p.get("version"),
+                    "pipelines": p.get("pipelines", []),
+                }
+                for p in plugins
+            ]
+        }
+
+    async def list_blueprints(self) -> dict:
+        """List frontend composable blueprints (workflow fragments).
+
+        Reads from ``frontend/src/data/blueprints/`` in dev installs; returns
+        an empty list with a note in packaged/plugin-only installs.
+        """
+        candidates = _find_blueprints_dir()
+        if candidates is None:
+            return {
+                "blueprints": [],
+                "note": "Blueprints directory not available in this install.",
+            }
+        results = []
+        for path in sorted(candidates.glob("*.json")):
+            try:
+                data = json.loads(path.read_text())
+            except Exception:
+                continue
+            results.append(
+                {
+                    "id": path.stem,
+                    "name": data.get("name", path.stem),
+                    "description": data.get("description", ""),
+                    "category": data.get("category", "misc"),
+                }
+            )
+        return {"blueprints": results, "count": len(results)}
+
+    async def get_blueprint(self, blueprint_id: str) -> dict:
+        """Return the full JSON for a blueprint so the agent can graft it."""
+        candidates = _find_blueprints_dir()
+        if candidates is None:
+            return {"error": "blueprints not available"}
+        path = candidates / f"{blueprint_id}.json"
+        if not path.exists():
+            return {"error": f"blueprint '{blueprint_id}' not found"}
+        try:
+            return {"id": blueprint_id, "blueprint": json.loads(path.read_text())}
+        except Exception as e:
+            return {"error": f"failed to load blueprint: {e}"}
+
+    async def list_node_types(self) -> dict:
+        """Return the UI-node catalog so the agent can compose graphs.
+
+        Sourced from ``frontend/src/data/nodes/manifest.json``. New node types
+        are registered there without touching agent code.
+        """
+        manifest = _find_node_manifest()
+        if manifest is None:
+            return {"node_types": [], "note": "node manifest not available"}
+        try:
+            data = json.loads(manifest.read_text())
+        except Exception as e:
+            return {"node_types": [], "error": f"failed to read manifest: {e}"}
+        return data
+
+    # ------------------------------------------------------------------
+    # State inspection
+    # ------------------------------------------------------------------
+
+    async def get_pipeline_status(self) -> dict:
+        resp = await self._c().get("/api/v1/pipeline/status")
+        resp.raise_for_status()
+        return resp.json()
+
+    async def get_current_graph(self) -> dict:
+        """Return a best-effort snapshot of the currently running graph.
+
+        The backend tracks the active session graph inside SessionContext; we
+        expose it through /api/v1/session/metrics which includes sink ids.
+        When no graph is running, returns None.
+        """
+        resp = await self._c().get("/api/v1/session/metrics")
+        if resp.status_code != 200:
+            return {"graph": None, "graph_hash": None, "running": False}
+        data = resp.json()
+        graph = data.get("graph")
+        if not graph:
+            return {"graph": None, "graph_hash": None, "running": False}
+        return {
+            "graph": graph,
+            "graph_hash": _canonical_graph_hash(graph),
+            "running": True,
+        }
+
+    async def get_session_metrics(self) -> dict:
+        resp = await self._c().get("/api/v1/session/metrics")
+        if resp.status_code != 200:
+            return {"error": "no session", "status_code": resp.status_code}
+        return resp.json()
+
+    async def get_hardware_info(self) -> dict:
+        resp = await self._c().get("/api/v1/hardware/info")
+        resp.raise_for_status()
+        return resp.json()
+
+    async def get_logs(self, lines: int = 100, level: str | None = None) -> dict:
+        params: dict[str, Any] = {"lines": max(1, min(lines, 500))}
+        resp = await self._c().get("/api/v1/logs/tail", params=params)
+        resp.raise_for_status()
+        data = resp.json()
+        log_lines = data.get("lines", [])
+        if level:
+            level_u = level.upper()
+            log_lines = [ln for ln in log_lines if level_u in ln]
+        return {"lines": log_lines[-lines:]}
+
+    # ------------------------------------------------------------------
+    # Vision
+    # ------------------------------------------------------------------
+
+    async def capture_frame(
+        self, sink_node_id: str | None = None, quality: int = 80
+    ) -> dict:
+        """Capture current frame as base64 JPEG so it can be fed back to a
+        multimodal model as tool_result image content."""
+        params: dict[str, Any] = {"quality": max(1, min(quality, 95))}
+        if sink_node_id:
+            params["sink_node_id"] = sink_node_id
+        resp = await self._c().get("/api/v1/session/frame", params=params)
+        if resp.status_code != 200:
+            return {
+                "error": "frame capture failed",
+                "status_code": resp.status_code,
+                "detail": resp.text[:200] if resp.text else None,
+            }
+        data = resp.content
+        b64 = base64.b64encode(data).decode("ascii")
+        return {
+            "media_type": "image/jpeg",
+            "size_bytes": len(data),
+            "base64": b64,
+            "sink_node_id": sink_node_id,
+        }
+
+    # ------------------------------------------------------------------
+    # Runtime control (auto-applied)
+    # ------------------------------------------------------------------
+
+    async def update_parameters(self, parameters: dict) -> dict:
+        """Apply runtime parameter updates (prompts, noise, LoRA scales, etc)."""
+        resp = await self._c().post("/api/v1/session/parameters", json=parameters or {})
+        if resp.status_code != 200:
+            return {
+                "ok": False,
+                "status_code": resp.status_code,
+                "detail": resp.text[:400] if resp.text else None,
+            }
+        return {"ok": True, "applied": list((parameters or {}).keys())}
+
+    async def load_pipeline(
+        self, pipeline_ids: list[str], load_params: dict | None = None
+    ) -> dict:
+        body: dict[str, Any] = {"pipeline_ids": list(pipeline_ids)}
+        if load_params:
+            body["load_params"] = load_params
+        resp = await self._c().post("/api/v1/pipeline/load", json=body)
+        if resp.status_code != 200:
+            return {
+                "ok": False,
+                "status_code": resp.status_code,
+                "detail": resp.text[:400] if resp.text else None,
+            }
+        return {"ok": True, **(resp.json() if resp.text else {})}
+
+    async def start_recording(self, node_id: str | None = None) -> dict:
+        params = {"node_id": node_id} if node_id else None
+        resp = await self._c().post("/api/v1/recordings/headless/start", params=params)
+        if resp.status_code != 200:
+            return {"ok": False, "detail": resp.text[:400]}
+        return {"ok": True, **(resp.json() if resp.text else {})}
+
+    async def stop_recording(self, node_id: str | None = None) -> dict:
+        params = {"node_id": node_id} if node_id else None
+        resp = await self._c().post("/api/v1/recordings/headless/stop", params=params)
+        if resp.status_code != 200:
+            return {"ok": False, "detail": resp.text[:400]}
+        return {"ok": True, **(resp.json() if resp.text else {})}
+
+    async def list_recordings(self) -> dict:
+        resp = await self._c().get("/api/v1/recordings/headless")
+        if resp.status_code != 200:
+            return {"recordings": [], "note": "no active recording"}
+        return {
+            "download_url": "/api/v1/recordings/headless",
+            "size_bytes": int(resp.headers.get("content-length", 0)),
+        }
+
+    # ------------------------------------------------------------------
+    # Workflow proposal handshake
+    # ------------------------------------------------------------------
+
+    async def propose_workflow(
+        self,
+        graph: dict,
+        rationale: str,
+        pipeline_load_params: dict | None = None,
+        input_mode: str = "video",
+    ) -> dict:
+        """Validate a graph and stage it as a pending proposal.
+
+        The proposal is emitted via SSE to the frontend, which renders a card
+        with Approve/Reject. On Approve, the loop will call apply_workflow.
+        On Reject, the agent gets a next-turn user message with the reason.
+        """
+        try:
+            cfg = GraphConfig(**graph)
+        except Exception as e:
+            return {
+                "ok": False,
+                "error": f"graph failed pydantic validation: {e}",
+                "hint": (
+                    "Top-level nodes only accept type source|pipeline|sink|"
+                    "record. UI nodes (slider, subgraph, primitive, ...) go "
+                    "under ui_state.nodes."
+                ),
+            }
+        errors = cfg.validate_structure()
+        if errors:
+            return {"ok": False, "error": "invalid graph", "issues": errors}
+
+        # Pre-flight validate ui_state + pipeline handles. This is where the
+        # agent gets actionable feedback about bad edge handles, missing
+        # targets, subgraph inconsistencies, and likely-missing wires.
+        pipeline_handles: dict[str, dict] = {}
+        for node in graph.get("nodes", []) or []:
+            if node.get("type") == "pipeline" and node.get("pipeline_id"):
+                pid = node["pipeline_id"]
+                if pid in pipeline_handles:
+                    continue
+                try:
+                    pipeline_handles[pid] = await self.get_pipeline_handles(pid)
+                except Exception as e:
+                    logger.warning(f"handle lookup failed for {pid}: {e}")
+                    pipeline_handles[pid] = {"error": str(e)}
+        issues = _validate_proposal(graph, pipeline_handles)
+        errors_only = [i for i in issues if i.get("severity") == "error"]
+        warnings_only = [i for i in issues if i.get("severity") == "warning"]
+        if errors_only:
+            return {
+                "ok": False,
+                "error": "graph failed structural validation",
+                "issues": errors_only,
+                "warnings": warnings_only,
+                "hint": (
+                    "Fix each error (handle format is 'param:<name>' or "
+                    "'stream:<name>'; call get_pipeline_handles to see valid "
+                    "pipeline inputs) and call propose_workflow again."
+                ),
+            }
+
+        # Safety net: models occasionally produce UI-node positions that
+        # collide with the frontend's top-level auto-layout strip
+        # (x=50/350/650/950) or with each other. Detect that and reassign
+        # all UI-node positions into three columns to the LEFT of the
+        # strip. No-op when the agent's layout is already clean.
+        _reflow_ui_nodes(graph)
+
+        # Hash the graph-at-propose-time so later apply can detect user edits.
+        graph_hash = _canonical_graph_hash(graph)
+        proposal_id = f"prop_{uuid.uuid4().hex[:10]}"
+
+        # Best-effort diff against the currently running graph.
+        diff = _diff_graphs(await self._running_graph_or_none(), graph)
+
+        proposal = WorkflowProposal(
+            id=proposal_id,
+            graph=graph,
+            graph_hash_at_propose=graph_hash,
+            rationale=rationale,
+            pipeline_load_params=pipeline_load_params or {},
+            input_mode=input_mode,
+            diff=diff,
+        )
+        self._session.pending_proposal = proposal
+
+        # Distinct pipelines that need to be loaded for this graph.
+        pipelines = sorted(
+            {
+                n.get("pipeline_id")
+                for n in graph.get("nodes", [])
+                if n.get("type") == "pipeline" and n.get("pipeline_id")
+            }
+        )
+
+        return {
+            "ok": True,
+            "proposal_id": proposal_id,
+            "graph_hash": graph_hash,
+            "rationale": rationale,
+            "pipelines_to_load": pipelines,
+            "diff": diff,
+            "warnings": warnings_only,
+            "note": (
+                "Proposal registered. The frontend will show Approve/Reject "
+                "to the user. End your turn after proposing; on approval a "
+                "new turn will be started with user feedback."
+                + (
+                    " NOTE: the validator flagged warnings you may want to "
+                    "revisit before approval."
+                    if warnings_only
+                    else ""
+                )
+            ),
+        }
+
+    async def apply_workflow(
+        self,
+        proposal_id: str,
+        expected_graph_hash: str,
+    ) -> dict:
+        """Confirm that a previously-proposed workflow was applied.
+
+        The frontend writes the proposed graph into the React Flow canvas at
+        approval time (before this tool runs). This tool just validates the
+        hash, clears the pending proposal, and returns so the agent can end
+        its turn with a short confirmation message.
+
+        It intentionally does NOT start a session or load pipelines — the
+        user presses Play to start, which runs the regular flow (including
+        cloud routing when cloud mode is active). That keeps the agent out
+        of environment-specific concerns and matches the "confirm workflows,
+        user controls Play" product intent.
+        """
+        proposal = self._session.pending_proposal
+        if proposal is None or proposal.id != proposal_id:
+            return {"ok": False, "error": "no matching pending proposal"}
+        if not proposal.approved:
+            return {"ok": False, "error": "proposal has not been approved by user"}
+
+        # Detect user edits during review: if the frontend recomputed the hash
+        # at approve-time and it changed, bail and invite re-proposal.
+        if expected_graph_hash != proposal.graph_hash_at_propose:
+            return {
+                "ok": False,
+                "error": "graph changed since proposal; re-propose",
+                "expected": expected_graph_hash,
+                "actual": proposal.graph_hash_at_propose,
+            }
+
+        # Distinct pipelines that will be loaded when the user presses Play —
+        # surface them so the agent can mention what's about to warm up.
+        pipelines = sorted(
+            {
+                n.get("pipeline_id")
+                for n in proposal.graph.get("nodes", [])
+                if n.get("type") == "pipeline" and n.get("pipeline_id")
+            }
+        )
+
+        # Clear proposal bookkeeping.
+        self._session.pending_proposal = None
+        return {
+            "ok": True,
+            "applied_to_canvas": True,
+            "pipelines_in_graph": pipelines,
+            "note": (
+                "Graph has been written to the canvas. The user will press "
+                "Play to start the session; do not try to start it yourself."
+            ),
+        }
+
+    async def stop_session(self) -> dict:
+        resp = await self._c().post("/api/v1/session/stop")
+        return {"ok": resp.status_code == 200, "status_code": resp.status_code}
+
+    # ------------------------------------------------------------------
+    # Internals
+    # ------------------------------------------------------------------
+
+    async def _running_graph_or_none(self) -> dict | None:
+        try:
+            resp = await self._c().get("/api/v1/session/metrics")
+            if resp.status_code != 200:
+                return None
+            return resp.json().get("graph")
+        except Exception:
+            return None
+
+
+# ----------------------------------------------------------------------
+# Lookup helpers for filesystem-backed resources
+# ----------------------------------------------------------------------
+
+
+def _find_blueprints_dir() -> Path | None:
+    """Locate frontend/src/data/blueprints relative to the running package."""
+    here = Path(__file__).resolve()
+    for parent in [here.parent, *here.parents]:
+        candidate = parent / "frontend" / "src" / "data" / "blueprints"
+        if candidate.is_dir():
+            return candidate
+    return None
+
+
+def _find_node_manifest() -> Path | None:
+    """Locate frontend/src/data/nodes/manifest.json relative to the package."""
+    here = Path(__file__).resolve()
+    for parent in [here.parent, *here.parents]:
+        candidate = parent / "frontend" / "src" / "data" / "nodes" / "manifest.json"
+        if candidate.is_file():
+            return candidate
+    return None
+
+
+def _validate_proposal(
+    graph: dict,
+    pipeline_handles: dict[str, dict],
+) -> list[dict]:
+    """Validate a proposed workflow graph and return a list of issues.
+
+    Pure function — takes the graph dict plus a lookup of pipeline handles
+    already fetched by the caller. Makes it easy to unit test without a
+    running FastAPI app.
+
+    Issue shape: ``{"severity": "error"|"warning", "message": str,
+    "edge_id": str | None, "node_id": str | None}``.
+    """
+    issues: list[dict] = []
+
+    backend_nodes = graph.get("nodes", []) or []
+    backend_node_ids: set[str] = {n.get("id") for n in backend_nodes if n.get("id")}
+    backend_pipeline_by_id: dict[str, dict] = {
+        n["id"]: n for n in backend_nodes if n.get("type") == "pipeline" and n.get("id")
+    }
+
+    ui_state = graph.get("ui_state") or {}
+    if not isinstance(ui_state, dict):
+        ui_state = {}
+    ui_nodes = ui_state.get("nodes", []) or []
+    ui_edges = ui_state.get("edges", []) or []
+
+    ui_node_by_id: dict[str, dict] = {n["id"]: n for n in ui_nodes if n.get("id")}
+    top_level_node_ids = backend_node_ids | set(ui_node_by_id.keys())
+
+    # Track VACE wiring so we can emit a warning if none reaches a pipeline.
+    vace_nodes: list[str] = []
+    vace_to_pipeline_edges: int = 0
+    image_nodes: set[str] = set()
+
+    for n in ui_nodes:
+        t = n.get("type")
+        nid = n.get("id")
+        if not nid:
+            issues.append(
+                {
+                    "severity": "error",
+                    "message": "ui_state.nodes entry is missing 'id'",
+                    "node_id": None,
+                    "edge_id": None,
+                }
+            )
+            continue
+        if t == "vace":
+            vace_nodes.append(nid)
+        elif t == "image":
+            image_nodes.add(nid)
+
+    # Validate subgraph internal consistency.
+    for n in ui_nodes:
+        if n.get("type") != "subgraph":
+            continue
+        nid = n["id"]
+        data = n.get("data") or {}
+        sg_nodes = data.get("subgraphNodes") or []
+        sg_edges = data.get("subgraphEdges") or []
+        sg_inputs = data.get("subgraphInputs") or []
+        sg_outputs = data.get("subgraphOutputs") or []
+        inner_ids = {sn.get("id") for sn in sg_nodes if sn.get("id")}
+
+        for e in sg_edges:
+            eid = e.get("id")
+            if e.get("source") not in inner_ids:
+                issues.append(
+                    {
+                        "severity": "error",
+                        "node_id": nid,
+                        "edge_id": eid,
+                        "message": (
+                            f"subgraph '{nid}' subgraphEdge references missing "
+                            f"source '{e.get('source')}' (not in subgraphNodes)"
+                        ),
+                    }
+                )
+            if e.get("target") not in inner_ids:
+                issues.append(
+                    {
+                        "severity": "error",
+                        "node_id": nid,
+                        "edge_id": eid,
+                        "message": (
+                            f"subgraph '{nid}' subgraphEdge references missing "
+                            f"target '{e.get('target')}' (not in subgraphNodes)"
+                        ),
+                    }
+                )
+            for side in ("sourceHandle", "targetHandle"):
+                h = e.get(side)
+                if h is not None and not _HANDLE_RE.match(str(h)):
+                    issues.append(
+                        {
+                            "severity": "error",
+                            "node_id": nid,
+                            "edge_id": eid,
+                            "message": (
+                                f"subgraph '{nid}' subgraphEdge {side}='{h}' "
+                                "is not a valid handle; use "
+                                "'param:<name>' or 'stream:<name>'"
+                            ),
+                        }
+                    )
+
+        for port in sg_inputs + sg_outputs:
+            inner = port.get("innerNodeId")
+            if inner and inner not in inner_ids:
+                issues.append(
+                    {
+                        "severity": "error",
+                        "node_id": nid,
+                        "edge_id": None,
+                        "message": (
+                            f"subgraph '{nid}' exposes port "
+                            f"'{port.get('name')}' whose innerNodeId "
+                            f"'{inner}' is not in subgraphNodes"
+                        ),
+                    }
+                )
+
+    # Build a per-subgraph set of exposed port names for external-edge checks.
+    def _subgraph_port_names(node: dict, kind: str) -> set[str]:
+        """kind in {"inputs", "outputs"}"""
+        data = node.get("data") or {}
+        key = "subgraphInputs" if kind == "inputs" else "subgraphOutputs"
+        return {p.get("name") for p in (data.get(key) or []) if p.get("name")}
+
+    # Validate ui_state.edges.
+    for e in ui_edges:
+        eid = e.get("id")
+        src = e.get("source")
+        tgt = e.get("target")
+        src_h = e.get("sourceHandle")
+        tgt_h = e.get("targetHandle")
+
+        for side, handle in (("sourceHandle", src_h), ("targetHandle", tgt_h)):
+            if handle is None:
+                continue
+            if not _HANDLE_RE.match(str(handle)):
+                issues.append(
+                    {
+                        "severity": "error",
+                        "edge_id": eid,
+                        "node_id": None,
+                        "message": (
+                            f"ui_state.edge {side}='{handle}' is not a valid "
+                            "handle; use 'param:<name>' or 'stream:<name>' "
+                            "(the prefix 'parameter:' is invalid)"
+                        ),
+                    }
+                )
+
+        if src not in top_level_node_ids:
+            issues.append(
+                {
+                    "severity": "error",
+                    "edge_id": eid,
+                    "node_id": src,
+                    "message": (
+                        f"ui_state.edge source '{src}' does not exist at "
+                        "top level nor in ui_state.nodes (edges between "
+                        "inner subgraph nodes live in that subgraph's "
+                        "data.subgraphEdges, not ui_state.edges)"
+                    ),
+                }
+            )
+        if tgt not in top_level_node_ids:
+            issues.append(
+                {
+                    "severity": "error",
+                    "edge_id": eid,
+                    "node_id": tgt,
+                    "message": (
+                        f"ui_state.edge target '{tgt}' does not exist at "
+                        "top level nor in ui_state.nodes"
+                    ),
+                }
+            )
+
+        # Pipeline-target checks: verify handle exists on pipeline.
+        if tgt in backend_pipeline_by_id:
+            pipe_node = backend_pipeline_by_id[tgt]
+            pid = pipe_node.get("pipeline_id")
+            ph = pipeline_handles.get(pid) if pid else None
+            if ph and "error" not in ph:
+                valid = set(ph.get("stream_inputs") or []) | {
+                    p.get("handle") for p in (ph.get("param_inputs") or [])
+                }
+                if tgt_h and tgt_h not in valid:
+                    issues.append(
+                        {
+                            "severity": "error",
+                            "edge_id": eid,
+                            "node_id": tgt,
+                            "message": (
+                                f"pipeline '{pid}' has no input handle "
+                                f"'{tgt_h}'. Valid handles: "
+                                f"{sorted(valid)}"
+                            ),
+                        }
+                    )
+
+        # Track VACE → pipeline wires.
+        src_node = ui_node_by_id.get(src)
+        if (
+            src_node
+            and src_node.get("type") == "vace"
+            and tgt in backend_pipeline_by_id
+            and tgt_h == "param:__vace"
+        ):
+            vace_to_pipeline_edges += 1
+
+        # Validate subgraph external ports (external edge handles must
+        # match the subgraph's declared inputs/outputs).
+        src_sg = ui_node_by_id.get(src) if src in ui_node_by_id else None
+        tgt_sg = ui_node_by_id.get(tgt) if tgt in ui_node_by_id else None
+        if src_sg and src_sg.get("type") == "subgraph" and src_h:
+            port_name = str(src_h).split(":", 1)[1] if ":" in src_h else src_h
+            outs = _subgraph_port_names(src_sg, "outputs")
+            if port_name not in outs:
+                issues.append(
+                    {
+                        "severity": "error",
+                        "edge_id": eid,
+                        "node_id": src,
+                        "message": (
+                            f"subgraph '{src}' has no declared output "
+                            f"'{port_name}'. Expected one of: {sorted(outs)}"
+                        ),
+                    }
+                )
+        if tgt_sg and tgt_sg.get("type") == "subgraph" and tgt_h:
+            port_name = str(tgt_h).split(":", 1)[1] if ":" in tgt_h else tgt_h
+            ins = _subgraph_port_names(tgt_sg, "inputs")
+            if port_name not in ins:
+                issues.append(
+                    {
+                        "severity": "error",
+                        "edge_id": eid,
+                        "node_id": tgt,
+                        "message": (
+                            f"subgraph '{tgt}' has no declared input "
+                            f"'{port_name}'. Expected one of: {sorted(ins)}"
+                        ),
+                    }
+                )
+
+    # Soft warnings — likely-missing wires.
+    if vace_nodes and vace_to_pipeline_edges == 0:
+        issues.append(
+            {
+                "severity": "warning",
+                "node_id": vace_nodes[0],
+                "edge_id": None,
+                "message": (
+                    f"vace node(s) {vace_nodes} present but none is wired to a "
+                    "pipeline's 'param:__vace'. Add an edge "
+                    "{source: <vace_id>, sourceHandle: 'param:__vace', "
+                    "target: <pipeline_id>, targetHandle: 'param:__vace'}."
+                ),
+            }
+        )
+
+    for vid in vace_nodes:
+        has_ref = any(
+            e.get("target") == vid
+            and str(e.get("targetHandle") or "")
+            in ("param:ref_image", "param:first_frame", "param:last_frame")
+            for e in ui_edges
+        )
+        if not has_ref:
+            issues.append(
+                {
+                    "severity": "warning",
+                    "node_id": vid,
+                    "edge_id": None,
+                    "message": (
+                        f"vace node '{vid}' has no image input wired. Connect "
+                        "an 'image' node's 'param:value' into 'param:ref_image' "
+                        "(or param:first_frame / param:last_frame)."
+                    ),
+                }
+            )
+
+    # Warn if any pipeline supports prompts but nothing reaches its __prompt.
+    for pid_node, pipe_node in backend_pipeline_by_id.items():
+        pid = pipe_node.get("pipeline_id")
+        ph = pipeline_handles.get(pid) if pid else None
+        if not ph or not ph.get("supports_prompts"):
+            continue
+        reaches_prompt = any(
+            e.get("target") == pid_node and e.get("targetHandle") == "param:__prompt"
+            for e in ui_edges
+        )
+        if not reaches_prompt:
+            issues.append(
+                {
+                    "severity": "warning",
+                    "node_id": pid_node,
+                    "edge_id": None,
+                    "message": (
+                        f"pipeline '{pid}' supports prompts but nothing is "
+                        "wired to its 'param:__prompt'. Add a primitive/"
+                        "subgraph/prompt_blend whose output feeds "
+                        "'param:__prompt'."
+                    ),
+                }
+            )
+
+    return issues
+
+
+def _derive_pipeline_handles(pipeline_id: str, schema: dict) -> dict:
+    """Produce the list of handle IDs targetable on a pipeline node.
+
+    Mirrors frontend/src/lib/graphUtils.ts::extractParameterPorts + the
+    aggregate handles rendered by PipelineNode.tsx. Kept in Python so the
+    agent doesn't need to read frontend code.
+    """
+    supports_prompts = bool(schema.get("supports_prompts"))
+    supports_vace = bool(schema.get("supports_vace"))
+    supports_lora = bool(schema.get("supports_lora"))
+    produces_video = schema.get("produces_video", True)
+    produces_audio = bool(schema.get("produces_audio"))
+
+    props = ((schema.get("config_schema") or {}).get("properties") or {}) or {}
+
+    param_inputs: list[dict] = []
+    seen_names: set[str] = set()
+
+    for name, prop in props.items():
+        if not isinstance(prop, dict):
+            continue
+        ui = prop.get("ui")
+        if not isinstance(ui, dict):
+            # Fields without ui metadata don't get rendered as param handles.
+            continue
+        component = ui.get("component")
+        if component in ("cache", "vace", "lora"):
+            # These collapse into aggregate handles or are reset buttons; skip.
+            continue
+
+        type_hint = _infer_param_type(prop)
+        if type_hint is None:
+            continue
+
+        entry = {
+            "handle": f"param:{name}",
+            "field": name,
+            "type": type_hint,
+            "modulatable": bool(ui.get("modulatable")),
+            "is_load_param": bool(ui.get("is_load_param")),
+        }
+        if "modulatable_min" in ui:
+            entry["modulatable_min"] = ui["modulatable_min"]
+        if "modulatable_max" in ui:
+            entry["modulatable_max"] = ui["modulatable_max"]
+        if ui.get("modes"):
+            entry["modes"] = list(ui["modes"])
+        param_inputs.append(entry)
+        seen_names.add(name)
+
+    # Aggregate handles — only present on pipelines with the matching flag.
+    if supports_prompts:
+        param_inputs.append(
+            {
+                "handle": "param:__prompt",
+                "aggregate": True,
+                "type": "string",
+                "note": (
+                    "Aggregate prompt input. Connect any string-valued output "
+                    "here (primitive, subgraph output, prompt_blend.prompts) "
+                    "to replace the built-in prompt text."
+                ),
+            }
+        )
+    if supports_vace:
+        param_inputs.append(
+            {
+                "handle": "param:__vace",
+                "aggregate": True,
+                "type": "vace",
+                "note": (
+                    "Aggregate VACE input. Connect a 'vace' node's "
+                    "'param:__vace' output here."
+                ),
+            }
+        )
+    if supports_lora:
+        param_inputs.append(
+            {
+                "handle": "param:__loras",
+                "aggregate": True,
+                "type": "lora",
+                "note": (
+                    "Aggregate LoRA input. Connect a 'lora' node's "
+                    "'param:lora' output here."
+                ),
+            }
+        )
+
+    # Stream inputs.
+    stream_inputs = ["stream:video"]
+    if supports_vace:
+        stream_inputs.extend(["stream:vace_input_frames", "stream:vace_input_masks"])
+
+    # Stream outputs.
+    stream_outputs: list[str] = []
+    if produces_video:
+        stream_outputs.append("stream:video")
+    if produces_audio:
+        stream_outputs.append("stream:audio")
+
+    return {
+        "pipeline_id": pipeline_id,
+        "supports_prompts": supports_prompts,
+        "supports_vace": supports_vace,
+        "supports_lora": supports_lora,
+        "stream_inputs": stream_inputs,
+        "stream_outputs": stream_outputs,
+        "param_inputs": param_inputs,
+        "note": (
+            "Use exactly these handle IDs in ui_state.edges. Format is "
+            "'param:<name>' or 'stream:<name>' — never 'parameter:<name>'."
+        ),
+    }
+
+
+def _infer_param_type(prop: dict) -> str | None:
+    """Classify a JSON Schema property into a coarse handle type."""
+    any_of = prop.get("anyOf")
+    t = prop.get("type")
+
+    # Direct array type.
+    if t == "array":
+        items = prop.get("items") or {}
+        if isinstance(items, dict) and items.get("type") in ("integer", "number"):
+            return "list_number"
+        return "array"
+
+    # anyOf with array variant (e.g. list[int] | None).
+    if isinstance(any_of, list):
+        for v in any_of:
+            if not isinstance(v, dict):
+                continue
+            if v.get("type") == "array":
+                items = v.get("items") or {}
+                if isinstance(items, dict) and items.get("type") in (
+                    "integer",
+                    "number",
+                ):
+                    return "list_number"
+
+    # Enum or $ref (treated as string).
+    if prop.get("enum") or prop.get("$ref"):
+        return "string"
+    if isinstance(any_of, list):
+        for v in any_of:
+            if isinstance(v, dict) and v.get("$ref"):
+                return "string"
+
+    if t in ("integer", "number"):
+        return "number"
+    if isinstance(any_of, list):
+        for v in any_of:
+            if isinstance(v, dict) and v.get("type") in ("integer", "number"):
+                return "number"
+
+    if t == "boolean":
+        return "boolean"
+    if isinstance(any_of, list):
+        for v in any_of:
+            if isinstance(v, dict) and v.get("type") == "boolean":
+                return "boolean"
+
+    if t == "string":
+        return "string"
+    if isinstance(any_of, list):
+        for v in any_of:
+            if isinstance(v, dict) and v.get("type") == "string":
+                return "string"
+
+    return None
+
+
+def _diff_graphs(current: dict | None, proposed: dict) -> dict:
+    """Return a human-readable summary of how proposed differs from current."""
+    if current is None:
+        return {
+            "summary": "no active graph; proposal creates a new session",
+            "added_nodes": [n.get("id") for n in proposed.get("nodes", [])],
+            "removed_nodes": [],
+        }
+
+    cur_ids = {n.get("id") for n in current.get("nodes", [])}
+    new_ids = {n.get("id") for n in proposed.get("nodes", [])}
+    added = sorted(new_ids - cur_ids)
+    removed = sorted(cur_ids - new_ids)
+    return {
+        "summary": (
+            f"+{len(added)} nodes, -{len(removed)} nodes "
+            f"(was {len(cur_ids)}, now {len(new_ids)})"
+        ),
+        "added_nodes": added,
+        "removed_nodes": removed,
+    }
+
+
+# ----------------------------------------------------------------------
+# Tool specs (Anthropic-shaped; the OpenAI provider translates as needed)
+# ----------------------------------------------------------------------
+
+
+def build_tool_specs() -> list[dict]:
+    """Return the tool specs the provider layer advertises to the LLM.
+
+    Shape follows Anthropic's tool-use spec. The OpenAI-compatible provider
+    translates each entry into the OpenAI function-calling format.
+    """
+    return [
+        {
+            "name": "list_pipelines",
+            "description": (
+                "List all registered pipelines. Always call this early in a "
+                "turn so you know what's available; new pipelines (e.g. LTX2) "
+                "show up here without any code changes."
+            ),
+            "input_schema": {"type": "object", "properties": {}, "required": []},
+        },
+        {
+            "name": "get_pipeline_schema",
+            "description": (
+                "Return the full Pydantic config schema for a pipeline, "
+                "including field types, ranges, UI hints, and supports_* "
+                "capability flags. Call this before proposing a workflow or "
+                "calling update_parameters so you use the real parameter names."
+            ),
+            "input_schema": {
+                "type": "object",
+                "properties": {"pipeline_id": {"type": "string"}},
+                "required": ["pipeline_id"],
+            },
+        },
+        {
+            "name": "get_pipeline_handles",
+            "description": (
+                "Return the exact React Flow handle IDs available on a "
+                "pipeline node (stream_inputs, stream_outputs, param_inputs). "
+                "Call this BEFORE writing any ui_state edge whose target is a "
+                "pipeline, so you wire to handles that actually exist. "
+                "Includes aggregate handles 'param:__prompt' (only if "
+                "supports_prompts), 'param:__vace' (only if supports_vace), "
+                "'param:__loras' (only if supports_lora), and VACE stream "
+                "inputs ('stream:vace_input_frames', 'stream:vace_input_masks') "
+                "for VACE-capable pipelines."
+            ),
+            "input_schema": {
+                "type": "object",
+                "properties": {"pipeline_id": {"type": "string"}},
+                "required": ["pipeline_id"],
+            },
+        },
+        {
+            "name": "list_loras",
+            "description": "List installed LoRA adapter files with paths.",
+            "input_schema": {"type": "object", "properties": {}, "required": []},
+        },
+        {
+            "name": "list_assets",
+            "description": "List images/videos in the assets directory.",
+            "input_schema": {"type": "object", "properties": {}, "required": []},
+        },
+        {
+            "name": "list_plugins",
+            "description": "List installed Scope plugins and their pipelines.",
+            "input_schema": {"type": "object", "properties": {}, "required": []},
+        },
+        {
+            "name": "list_blueprints",
+            "description": (
+                "List pre-composed UI graph fragments (prompt switcher, LFO, "
+                "timed cycler, etc.). Prefer grafting a blueprint over "
+                "rebuilding composite behavior from raw nodes."
+            ),
+            "input_schema": {"type": "object", "properties": {}, "required": []},
+        },
+        {
+            "name": "get_blueprint",
+            "description": (
+                "Return the full JSON of a blueprint. Its nodes/edges are "
+                "UI-node types (trigger, subgraph, primitive, slider, etc.) "
+                "— when grafting into propose_workflow, place them under "
+                "ui_state.nodes / ui_state.edges, NOT top-level nodes/edges "
+                "(top-level accepts only source|pipeline|sink|record)."
+            ),
+            "input_schema": {
+                "type": "object",
+                "properties": {"blueprint_id": {"type": "string"}},
+                "required": ["blueprint_id"],
+            },
+        },
+        {
+            "name": "list_node_types",
+            "description": (
+                "Return the UI node-type catalog (slider, knobs, prompt_list, "
+                "trigger, etc.) with their port signatures. Use when you need "
+                "to compose a UI graph beyond the source/pipeline/sink/record "
+                "primitives."
+            ),
+            "input_schema": {"type": "object", "properties": {}, "required": []},
+        },
+        {
+            "name": "get_pipeline_status",
+            "description": "Which pipelines are loaded / loading.",
+            "input_schema": {"type": "object", "properties": {}, "required": []},
+        },
+        {
+            "name": "get_current_graph",
+            "description": (
+                "Return the currently running graph and a stable hash. Use this "
+                "before propose_workflow so you can diff against what's live."
+            ),
+            "input_schema": {"type": "object", "properties": {}, "required": []},
+        },
+        {
+            "name": "get_session_metrics",
+            "description": "fps/VRAM/frames_in/out for the active session.",
+            "input_schema": {"type": "object", "properties": {}, "required": []},
+        },
+        {
+            "name": "get_hardware_info",
+            "description": "GPU VRAM and output-sink availability (Spout/NDI/Syphon).",
+            "input_schema": {"type": "object", "properties": {}, "required": []},
+        },
+        {
+            "name": "get_logs",
+            "description": "Recent server log lines, optionally filtered by level.",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "lines": {"type": "integer", "default": 100},
+                    "level": {
+                        "type": "string",
+                        "enum": ["DEBUG", "INFO", "WARNING", "ERROR"],
+                    },
+                },
+                "required": [],
+            },
+        },
+        {
+            "name": "capture_frame",
+            "description": (
+                "Capture the current pipeline output as a JPEG. The response "
+                "will be delivered back to you as an image so you can reason "
+                "visually — use this when the user asks about what they're "
+                "seeing or when tuning based on output quality."
+            ),
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "sink_node_id": {"type": "string"},
+                    "quality": {"type": "integer", "default": 80},
+                },
+                "required": [],
+            },
+        },
+        {
+            "name": "update_parameters",
+            "description": (
+                "Apply runtime parameters (prompts, transition, noise_scale, "
+                "denoising_step_list, kv_cache_attention_bias, lora_scales, "
+                "vace_ref_images, vace_context_scale, input_source, "
+                "output_sinks, paused, recording, pipeline-specific fields). "
+                "Auto-applied immediately; use only for runtime changes, not "
+                "graph structure."
+            ),
+            "input_schema": {
+                "type": "object",
+                "properties": {"parameters": {"type": "object"}},
+                "required": ["parameters"],
+            },
+        },
+        {
+            "name": "load_pipeline",
+            "description": (
+                "Load one or more pipelines. Usually invoked indirectly via "
+                "apply_workflow; call directly only when the user explicitly "
+                "asks to preload."
+            ),
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "pipeline_ids": {
+                        "type": "array",
+                        "items": {"type": "string"},
+                    },
+                    "load_params": {"type": "object"},
+                },
+                "required": ["pipeline_ids"],
+            },
+        },
+        {
+            "name": "start_recording",
+            "description": "Start recording the active session (optionally per record node).",
+            "input_schema": {
+                "type": "object",
+                "properties": {"node_id": {"type": "string"}},
+                "required": [],
+            },
+        },
+        {
+            "name": "stop_recording",
+            "description": "Stop recording.",
+            "input_schema": {
+                "type": "object",
+                "properties": {"node_id": {"type": "string"}},
+                "required": [],
+            },
+        },
+        {
+            "name": "list_recordings",
+            "description": "Return the download URL for the latest recording if any.",
+            "input_schema": {"type": "object", "properties": {}, "required": []},
+        },
+        {
+            "name": "propose_workflow",
+            "description": (
+                "Propose a new or replacement graph for the user to approve. "
+                "Always use this for structural changes — never apply a graph "
+                "without explicit user approval. End your turn with a short "
+                "text summary after calling this tool; the frontend will "
+                "render an Approve/Reject card. The tool validates the "
+                "proposal (handle format, node existence, pipeline handle "
+                "presence, subgraph consistency) — if it returns errors, fix "
+                "them and re-call propose_workflow. Warnings are informational "
+                "and may indicate likely-missing wires (e.g. a VACE node with "
+                "no edge to the pipeline)."
+            ),
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "graph": {
+                        "type": "object",
+                        "description": (
+                            "GraphConfig with two parts:\n"
+                            "1) Top-level 'nodes'/'edges' (backend runtime "
+                            "flow) — ONLY node types source|pipeline|sink|"
+                            "record. Backend edges use {from, from_port, "
+                            "to_node, to_port, kind: 'stream'|'parameter'}.\n"
+                            "2) 'ui_state': {nodes, edges} — everything else. "
+                            "UI node types include trigger, subgraph, "
+                            "primitive, slider, knobs, math, midi, "
+                            "prompt_list, prompt_blend, vace, lora, image, "
+                            "etc. UI edges use React Flow shape: "
+                            "{id, source, sourceHandle, target, targetHandle} "
+                            "where handle IDs have the shape 'param:<name>' "
+                            "(value port) or 'stream:<name>' (frame/audio "
+                            "port). The literal prefix 'parameter:' is "
+                            "INVALID — always use 'param:'. Before writing "
+                            "any ui_state edge that targets a pipeline node, "
+                            "call get_pipeline_handles(pipeline_id) to get "
+                            "the exact handle IDs. UI nodes placed in "
+                            "top-level 'nodes' will fail pydantic validation. "
+                            "Blueprint nodes always go under ui_state. Call "
+                            "get_current_graph on a loaded workflow to see "
+                            "the split in practice."
+                        ),
+                    },
+                    "rationale": {
+                        "type": "string",
+                        "description": "Short explanation shown to the user.",
+                    },
+                    "pipeline_load_params": {
+                        "type": "object",
+                        "description": "Optional load_params passed to pipeline/load.",
+                    },
+                    "input_mode": {
+                        "type": "string",
+                        "enum": ["text", "video"],
+                        "default": "video",
+                    },
+                },
+                "required": ["graph", "rationale"],
+            },
+        },
+        {
+            "name": "apply_workflow",
+            "description": (
+                "Confirm an approved proposal. The frontend already wrote the "
+                "graph to the canvas at approval time; this tool just "
+                "validates the hash and clears the pending proposal. It does "
+                "NOT start a session or load pipelines — the user presses "
+                "Play to start. Only callable after user approval (the loop "
+                "will tell you with a [System] message)."
+            ),
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "proposal_id": {"type": "string"},
+                    "expected_graph_hash": {"type": "string"},
+                },
+                "required": ["proposal_id", "expected_graph_hash"],
+            },
+        },
+        {
+            "name": "stop_session",
+            "description": "Stop the active headless session.",
+            "input_schema": {"type": "object", "properties": {}, "required": []},
+        },
+    ]
+
+
+# Dispatch table for agent_loop to resolve tool_name -> coroutine.
+TOOL_METHODS = {
+    "list_pipelines": "list_pipelines",
+    "get_pipeline_schema": "get_pipeline_schema",
+    "get_pipeline_handles": "get_pipeline_handles",
+    "list_loras": "list_loras",
+    "list_assets": "list_assets",
+    "list_plugins": "list_plugins",
+    "list_blueprints": "list_blueprints",
+    "get_blueprint": "get_blueprint",
+    "list_node_types": "list_node_types",
+    "get_pipeline_status": "get_pipeline_status",
+    "get_current_graph": "get_current_graph",
+    "get_session_metrics": "get_session_metrics",
+    "get_hardware_info": "get_hardware_info",
+    "get_logs": "get_logs",
+    "capture_frame": "capture_frame",
+    "update_parameters": "update_parameters",
+    "load_pipeline": "load_pipeline",
+    "start_recording": "start_recording",
+    "stop_recording": "stop_recording",
+    "list_recordings": "list_recordings",
+    "propose_workflow": "propose_workflow",
+    "apply_workflow": "apply_workflow",
+    "stop_session": "stop_session",
+}
diff --git a/src/scope/server/app.py b/src/scope/server/app.py
index d77939909..ec0238c62 100644
--- a/src/scope/server/app.py
+++ b/src/scope/server/app.py
@@ -324,6 +324,8 @@ def configure_static_files():
 osc_server = None
 # Global DMX server instance
 dmx_server = None
+# Global agent session store (in-memory, 1h idle TTL)
+agent_sessions = None
 
 
 async def prewarm_pipeline(pipeline_id: str):
@@ -371,7 +373,8 @@ async def lifespan(app: FastAPI):
         livepeer, \
         tempo_sync, \
         osc_server, \
-        dmx_server
+        dmx_server, \
+        agent_sessions
 
     # Check CUDA availability and warn if not available
     if not torch.cuda.is_available():
@@ -458,6 +461,13 @@ async def lifespan(app: FastAPI):
     if dmx_server.enabled:
         await dmx_server.start()
 
+    # Initialize agent session store (in-memory, 1h idle TTL).
+    from .agent_state import AgentSessionStore
+
+    agent_sessions = AgentSessionStore()
+    agent_sessions.start_janitor()
+    logger.info("Agent session store initialized")
+
     # Syphon server discovery (macOS only): create the ObjC singleton and do
     # an initial NSRunLoop pump so servers are available when the UI first loads.
     # Subsequent refreshes pump on demand in the list_input_sources endpoint.
@@ -518,6 +528,11 @@ async def lifespan(app: FastAPI):
         set_kafka_publisher(None)
         logger.info("Kafka publisher shutdown complete")
 
+    if agent_sessions is not None:
+        logger.info("Shutting down agent session store...")
+        await agent_sessions.stop_janitor()
+        logger.info("Agent session store shutdown complete")
+
 
 def get_webrtc_manager() -> "WebRTCManager":
     """Dependency to get WebRTC manager instance."""
@@ -2603,6 +2618,7 @@ async def list_api_keys():
 
     from huggingface_hub import get_token
 
+    from .agent_state import get_provider_key, get_provider_key_source
     from .models_config import get_civitai_token, get_civitai_token_source
 
     # HuggingFace
@@ -2636,7 +2652,38 @@ async def list_api_keys():
         key_url="https://civitai.com/user/account",
     )
 
-    return ApiKeysListResponse(keys=[hf_key, civitai_key])
+    # Agent provider keys
+    anthropic_key = ApiKeyInfo(
+        id="anthropic",
+        name="Anthropic",
+        description="Agent provider — Claude (Sonnet, Opus)",
+        is_set=get_provider_key("anthropic") is not None,
+        source=get_provider_key_source("anthropic"),
+        env_var="ANTHROPIC_API_KEY",
+        key_url="https://console.anthropic.com/settings/keys",
+    )
+    openai_key = ApiKeyInfo(
+        id="openai",
+        name="OpenAI-compatible",
+        description="Agent provider — OpenAI, OpenRouter, Groq, together.ai, Fireworks",
+        is_set=get_provider_key("openai_compatible") is not None,
+        source=get_provider_key_source("openai_compatible"),
+        env_var="OPENAI_API_KEY",
+        key_url="https://platform.openai.com/api-keys",
+    )
+    llm_custom_key = ApiKeyInfo(
+        id="llm_custom",
+        name="Self-hosted LLM",
+        description="Agent provider — Ollama, vLLM, LM Studio (usually no key required)",
+        is_set=get_provider_key("self_hosted") is not None,
+        source=get_provider_key_source("self_hosted"),
+        env_var="LLM_API_KEY",
+        key_url=None,
+    )
+
+    return ApiKeysListResponse(
+        keys=[hf_key, civitai_key, anthropic_key, openai_key, llm_custom_key]
+    )
 
 
 @app.put("/api/v1/keys/{service_id}", response_model=ApiKeySetResponse)
@@ -2644,6 +2691,12 @@ async def set_api_key(service_id: str, request: ApiKeySetRequest):
     """Set/save an API key for a service."""
     import os
 
+    from .agent_state import (
+        ANTHROPIC_ENV,
+        LLM_CUSTOM_ENV,
+        OPENAI_ENV,
+        set_provider_key,
+    )
     from .models_config import CIVITAI_TOKEN_ENV_VAR, set_civitai_token
 
     if service_id == "huggingface":
@@ -2668,6 +2721,21 @@ async def set_api_key(service_id: str, request: ApiKeySetRequest):
         set_civitai_token(request.value)
         return ApiKeySetResponse(success=True, message="CivitAI token saved")
 
+    elif service_id in ("anthropic", "openai", "llm_custom"):
+        provider_map = {
+            "anthropic": ("anthropic", ANTHROPIC_ENV),
+            "openai": ("openai_compatible", OPENAI_ENV),
+            "llm_custom": ("self_hosted", LLM_CUSTOM_ENV),
+        }
+        provider, env_var = provider_map[service_id]
+        if os.environ.get(env_var):
+            raise HTTPException(
+                status_code=409,
+                detail=f"{env_var} environment variable is already set. Remove it to manage this key from the UI.",
+            )
+        set_provider_key(provider, request.value)
+        return ApiKeySetResponse(success=True, message=f"{service_id} key saved")
+
     else:
         raise HTTPException(status_code=404, detail=f"Unknown service: {service_id}")
 
@@ -2677,6 +2745,13 @@ async def delete_api_key(service_id: str):
     """Remove a stored API key for a service."""
     import os
 
+    from .agent_state import (
+        ANTHROPIC_ENV,
+        LLM_CUSTOM_ENV,
+        OPENAI_ENV,
+        delete_provider_key,
+        get_provider_key_source,
+    )
     from .models_config import (
         clear_civitai_token,
         get_civitai_token_source,
@@ -2708,10 +2783,236 @@ async def delete_api_key(service_id: str):
         clear_civitai_token()
         return ApiKeyDeleteResponse(success=True, message="CivitAI token removed")
 
+    elif service_id in ("anthropic", "openai", "llm_custom"):
+        provider_map = {
+            "anthropic": ("anthropic", ANTHROPIC_ENV),
+            "openai": ("openai_compatible", OPENAI_ENV),
+            "llm_custom": ("self_hosted", LLM_CUSTOM_ENV),
+        }
+        provider, env_var = provider_map[service_id]
+        source = get_provider_key_source(provider)
+        if source == "env_var":
+            raise HTTPException(
+                status_code=409,
+                detail=f"Cannot remove token set via {env_var} environment variable. Unset the environment variable instead.",
+            )
+        if source != "stored":
+            raise HTTPException(
+                status_code=404, detail=f"No {service_id} token to remove"
+            )
+        delete_provider_key(provider)
+        return ApiKeyDeleteResponse(success=True, message=f"{service_id} key removed")
+
     else:
         raise HTTPException(status_code=404, detail=f"Unknown service: {service_id}")
 
 
+# ---------------------------------------------------------------------------
+# Agent endpoints
+# ---------------------------------------------------------------------------
+
+
+class AgentChatRequest(BaseModel):
+    """Body for POST /api/v1/agent/chat."""
+
+    message: str = Field(..., min_length=1)
+    session_id: str | None = None
+    is_continuation: bool = False
+
+
+class AgentDecisionRequest(BaseModel):
+    """Body for POST /api/v1/agent/decision."""
+
+    session_id: str
+    proposal_id: str
+    approved: bool
+    reason: str | None = None
+
+
+class AgentConfigUpdate(BaseModel):
+    """Body for PUT /api/v1/agent/config."""
+
+    provider: str | None = None  # "anthropic" | "openai_compatible" | "self_hosted"
+    model: str | None = None
+    base_url: str | None = None
+
+
+def get_agent_sessions():
+    """Dependency to get the in-memory agent session store."""
+    if agent_sessions is None:
+        raise HTTPException(status_code=503, detail="Agent session store not ready")
+    return agent_sessions
+
+
+@app.post("/api/v1/agent/chat")
+async def agent_chat(request: AgentChatRequest):
+    """Start or continue an agent turn. Returns an SSE stream."""
+    from .agent_loop import run_turn
+    from .agent_state import load_agent_config
+
+    store = get_agent_sessions()
+    config = load_agent_config()
+    session = await store.get_or_create(request.session_id, config)
+
+    async def event_generator():
+        try:
+            async for chunk in run_turn(
+                app,
+                session,
+                request.message,
+                is_system_continuation=request.is_continuation,
+            ):
+                yield chunk
+        except asyncio.CancelledError:
+            logger.info(f"Agent chat stream cancelled for session {session.id}")
+            raise
+        except Exception as e:
+            logger.exception("Agent chat stream error")
+            payload = json.dumps({"message": str(e)})
+            yield f"event: error\ndata: {payload}\n\n"
+            yield 'event: turn_end\ndata: {"stop_reason": "error"}\n\n'
+
+    return StreamingResponse(
+        event_generator(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no",
+            "X-Agent-Session-Id": session.id,
+        },
+    )
+
+
+@app.post("/api/v1/agent/decision")
+async def agent_decision(request: AgentDecisionRequest):
+    """Approve or reject a pending workflow proposal.
+
+    On approval the caller should open a fresh /agent/chat stream with the
+    continuation message returned in ``next_message`` — that drives the next
+    turn where the model calls apply_workflow.
+    """
+    from .agent_loop import build_decision_continuation_message
+
+    store = get_agent_sessions()
+    session = await store.get(request.session_id)
+    if session is None:
+        raise HTTPException(status_code=404, detail="Unknown session")
+    prop = session.pending_proposal
+    if prop is None or prop.id != request.proposal_id:
+        raise HTTPException(status_code=404, detail="Unknown proposal")
+    prop.approved = request.approved
+    prop.decision_feedback = request.reason
+    next_message = build_decision_continuation_message(
+        approved=request.approved,
+        proposal_id=prop.id,
+        graph_hash=prop.graph_hash_at_propose,
+        reason=request.reason,
+    )
+    return {
+        "ok": True,
+        "session_id": session.id,
+        "proposal_id": prop.id,
+        "approved": request.approved,
+        "next_message": next_message,
+    }
+
+
+@app.get("/api/v1/agent/sessions")
+async def agent_list_sessions():
+    """List active agent sessions (metadata only)."""
+    store = get_agent_sessions()
+    return {"sessions": await store.list()}
+
+
+@app.delete("/api/v1/agent/sessions/{session_id}")
+async def agent_delete_session(session_id: str):
+    """Delete an agent session and its in-memory history."""
+    store = get_agent_sessions()
+    removed = await store.delete(session_id)
+    if not removed:
+        raise HTTPException(status_code=404, detail="Unknown session")
+    return {"ok": True, "session_id": session_id}
+
+
+@app.get("/api/v1/agent/config")
+async def agent_get_config():
+    """Return current provider config + key availability for UI."""
+    from .agent_state import get_provider_key_source, load_agent_config
+
+    cfg = load_agent_config()
+    return {
+        "provider": cfg.provider,
+        "model": cfg.model,
+        "base_url": cfg.base_url,
+        "key_sources": {
+            "anthropic": get_provider_key_source("anthropic"),
+            "openai_compatible": get_provider_key_source("openai_compatible"),
+            "self_hosted": get_provider_key_source("self_hosted"),
+        },
+    }
+
+
+@app.put("/api/v1/agent/config")
+async def agent_put_config(update: AgentConfigUpdate):
+    """Update provider config (provider, model, base_url)."""
+    from .agent_state import load_agent_config, save_agent_config
+
+    cfg = load_agent_config()
+    if update.provider is not None:
+        if update.provider not in ("anthropic", "openai_compatible", "self_hosted"):
+            raise HTTPException(
+                status_code=400, detail=f"Unknown provider: {update.provider}"
+            )
+        cfg.provider = update.provider  # type: ignore[assignment]
+    if update.model is not None:
+        cfg.model = update.model
+    if update.base_url is not None:
+        cfg.base_url = update.base_url or None
+    save_agent_config(cfg)
+    return {"ok": True, "config": cfg.to_json()}
+
+
+@app.post("/api/v1/agent/test-connection")
+async def agent_test_connection():
+    """Dry-run: build the current provider and issue a trivial ping()."""
+    from .agent_providers import ProviderError, build_provider
+    from .agent_state import load_agent_config
+
+    cfg = load_agent_config()
+    try:
+        provider = build_provider(cfg)
+    except ProviderError as e:
+        return {"ok": False, "error": str(e)}
+    try:
+        result = await provider.ping()
+        return {"ok": True, "result": result}
+    except Exception as e:
+        logger.exception("Agent provider ping failed")
+        return {"ok": False, "error": str(e)}
+
+
+@app.get("/api/v1/agent/node-catalog")
+async def agent_node_catalog():
+    """Return the UI node-type manifest. Also exposed via list_node_types tool."""
+    manifest_path = (
+        Path(__file__).resolve().parent.parent.parent.parent
+        / "frontend"
+        / "src"
+        / "data"
+        / "nodes"
+        / "manifest.json"
+    )
+    if not manifest_path.exists():
+        raise HTTPException(status_code=404, detail="Node manifest not found")
+    try:
+        data = json.loads(manifest_path.read_text(encoding="utf-8"))
+        return data
+    except Exception as e:
+        logger.exception("Failed to read node manifest")
+        raise HTTPException(status_code=500, detail=str(e)) from e
+
+
 @app.get("/api/v1/logs/current")
 async def get_current_logs():
     """Get the most recent application log file for bug reporting."""
diff --git a/tests/test_agent_tool_impls.py b/tests/test_agent_tool_impls.py
new file mode 100644
index 000000000..e8aa5bfec
--- /dev/null
+++ b/tests/test_agent_tool_impls.py
@@ -0,0 +1,413 @@
+"""Tests for agent_tool_impls helpers.
+
+Focuses on the pure-function validator and pipeline-handle deriver so we
+don't need to stand up a FastAPI app.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from scope.server.agent_tool_impls import (
+    _derive_pipeline_handles,
+    _validate_proposal,
+)
+
+# ---------------------------------------------------------------------------
+# Fixtures — compact pipeline handle dicts that mirror _derive_pipeline_handles
+# output. Using these directly keeps validator tests isolated from schema
+# shape churn.
+# ---------------------------------------------------------------------------
+
+
+def _vace_pipeline_handles() -> dict[str, Any]:
+    return {
+        "pipeline_id": "krea-realtime-video",
+        "supports_prompts": True,
+        "supports_vace": True,
+        "supports_lora": True,
+        "stream_inputs": [
+            "stream:video",
+            "stream:vace_input_frames",
+            "stream:vace_input_masks",
+        ],
+        "stream_outputs": ["stream:video"],
+        "param_inputs": [
+            {"handle": "param:noise_scale", "field": "noise_scale", "type": "number"},
+            {
+                "handle": "param:denoising_steps",
+                "field": "denoising_steps",
+                "type": "list_number",
+            },
+            {"handle": "param:__prompt", "aggregate": True, "type": "string"},
+            {"handle": "param:__vace", "aggregate": True, "type": "vace"},
+            {"handle": "param:__loras", "aggregate": True, "type": "lora"},
+        ],
+    }
+
+
+def _simple_pipeline_handles() -> dict[str, Any]:
+    return {
+        "pipeline_id": "longlive",
+        "supports_prompts": True,
+        "supports_vace": False,
+        "supports_lora": False,
+        "stream_inputs": ["stream:video"],
+        "stream_outputs": ["stream:video"],
+        "param_inputs": [
+            {"handle": "param:noise_scale", "field": "noise_scale", "type": "number"},
+            {"handle": "param:__prompt", "aggregate": True, "type": "string"},
+        ],
+    }
+
+
+def _minimal_backend(pipeline_id: str = "longlive") -> dict[str, Any]:
+    return {
+        "nodes": [
+            {"id": "input", "type": "source"},
+            {"id": "pipe", "type": "pipeline", "pipeline_id": pipeline_id},
+            {"id": "output", "type": "sink"},
+        ],
+        "edges": [
+            {
+                "from": "input",
+                "from_port": "video",
+                "to_node": "pipe",
+                "to_port": "video",
+                "kind": "stream",
+            },
+            {
+                "from": "pipe",
+                "from_port": "video",
+                "to_node": "output",
+                "to_port": "video",
+                "kind": "stream",
+            },
+        ],
+    }
+
+
+# ---------------------------------------------------------------------------
+# _validate_proposal
+# ---------------------------------------------------------------------------
+
+
+def test_validate_rejects_invalid_handle_prefix():
+    graph = _minimal_backend()
+    graph["ui_state"] = {
+        "nodes": [
+            {
+                "id": "slider_noise",
+                "type": "slider",
+                "data": {"value": 0.7},
+            },
+        ],
+        "edges": [
+            {
+                "id": "e1",
+                "source": "slider_noise",
+                "sourceHandle": "parameter:value",  # INVALID prefix
+                "target": "pipe",
+                "targetHandle": "param:noise_scale",
+            }
+        ],
+    }
+    issues = _validate_proposal(graph, {"longlive": _simple_pipeline_handles()})
+    errors = [i for i in issues if i["severity"] == "error"]
+    assert any("parameter:" in i["message"] for i in errors), errors
+    assert any(i.get("edge_id") == "e1" for i in errors)
+
+
+def test_validate_rejects_missing_target_node():
+    graph = _minimal_backend()
+    graph["ui_state"] = {
+        "nodes": [
+            {"id": "slider_noise", "type": "slider", "data": {}},
+        ],
+        "edges": [
+            {
+                "id": "e_missing",
+                "source": "slider_noise",
+                "sourceHandle": "param:value",
+                "target": "nonexistent_pipe",
+                "targetHandle": "param:noise_scale",
+            }
+        ],
+    }
+    issues = _validate_proposal(graph, {"longlive": _simple_pipeline_handles()})
+    errors = [i for i in issues if i["severity"] == "error"]
+    assert any(
+        "does not exist" in i["message"] and i.get("edge_id") == "e_missing"
+        for i in errors
+    ), errors
+
+
+def test_validate_rejects_unknown_pipeline_handle():
+    graph = _minimal_backend()
+    graph["ui_state"] = {
+        "nodes": [
+            {"id": "slider_wild", "type": "slider", "data": {}},
+        ],
+        "edges": [
+            {
+                "id": "e_bad_handle",
+                "source": "slider_wild",
+                "sourceHandle": "param:value",
+                "target": "pipe",
+                "targetHandle": "param:does_not_exist_on_pipeline",
+            }
+        ],
+    }
+    issues = _validate_proposal(graph, {"longlive": _simple_pipeline_handles()})
+    errors = [i for i in issues if i["severity"] == "error"]
+    assert any("no input handle" in i["message"] for i in errors), (
+        f"expected unknown-handle error, got: {errors}"
+    )
+
+
+def test_validate_accepts_good_graph():
+    graph = _minimal_backend()
+    graph["ui_state"] = {
+        "nodes": [
+            {"id": "slider_noise", "type": "slider", "data": {}},
+            {
+                "id": "prompt_text",
+                "type": "primitive",
+                "data": {"valueType": "string"},
+            },
+        ],
+        "edges": [
+            {
+                "id": "e_noise",
+                "source": "slider_noise",
+                "sourceHandle": "param:value",
+                "target": "pipe",
+                "targetHandle": "param:noise_scale",
+            },
+            {
+                "id": "e_prompt",
+                "source": "prompt_text",
+                "sourceHandle": "param:value",
+                "target": "pipe",
+                "targetHandle": "param:__prompt",
+            },
+        ],
+    }
+    issues = _validate_proposal(graph, {"longlive": _simple_pipeline_handles()})
+    errors = [i for i in issues if i["severity"] == "error"]
+    assert errors == [], errors
+
+
+def test_validate_warns_on_unreached_vace():
+    graph = _minimal_backend("krea-realtime-video")
+    graph["ui_state"] = {
+        "nodes": [
+            {"id": "vace_1", "type": "vace", "data": {}},
+            {"id": "ref_img", "type": "image", "data": {}},
+        ],
+        "edges": [
+            # Image → VACE only. No VACE → pipeline wire.
+            {
+                "id": "e_img_vace",
+                "source": "ref_img",
+                "sourceHandle": "param:value",
+                "target": "vace_1",
+                "targetHandle": "param:ref_image",
+            },
+        ],
+    }
+    issues = _validate_proposal(
+        graph, {"krea-realtime-video": _vace_pipeline_handles()}
+    )
+    warnings = [i for i in issues if i["severity"] == "warning"]
+    assert any("param:__vace" in i["message"] for i in warnings), (
+        f"expected VACE wiring warning, got: {warnings}"
+    )
+
+
+def test_validate_warns_on_unreached_prompt_input():
+    graph = _minimal_backend()
+    graph["ui_state"] = {"nodes": [], "edges": []}
+    issues = _validate_proposal(graph, {"longlive": _simple_pipeline_handles()})
+    warnings = [i for i in issues if i["severity"] == "warning"]
+    assert any("param:__prompt" in i["message"] for i in warnings), (
+        f"expected unreached-prompt warning, got: {warnings}"
+    )
+
+
+def test_validate_subgraph_internal_consistency():
+    graph = _minimal_backend()
+    graph["ui_state"] = {
+        "nodes": [
+            {
+                "id": "sg",
+                "type": "subgraph",
+                "data": {
+                    "subgraphNodes": [
+                        {"id": "inner_primitive", "type": "primitive", "data": {}},
+                        {"id": "inner_control", "type": "control", "data": {}},
+                    ],
+                    "subgraphEdges": [
+                        {
+                            "id": "se1",
+                            "source": "inner_primitive",
+                            "sourceHandle": "param:value",
+                            "target": "missing_inner",  # not in subgraphNodes
+                            "targetHandle": "param:str_0",
+                        }
+                    ],
+                    "subgraphInputs": [
+                        {
+                            "name": "trigger_a",
+                            "portType": "param",
+                            "paramType": "number",
+                            "innerNodeId": "nonexistent_inner",  # bad
+                            "innerHandleId": "param:item_0",
+                        }
+                    ],
+                    "subgraphOutputs": [
+                        {
+                            "name": "prompt",
+                            "portType": "param",
+                            "paramType": "string",
+                            "innerNodeId": "inner_control",
+                            "innerHandleId": "param:value",
+                        }
+                    ],
+                },
+            }
+        ],
+        "edges": [],
+    }
+    issues = _validate_proposal(graph, {"longlive": _simple_pipeline_handles()})
+    errors = [i for i in issues if i["severity"] == "error"]
+    assert any(
+        "subgraphEdge references missing target" in i["message"] for i in errors
+    ), errors
+    assert any(
+        "innerNodeId 'nonexistent_inner' is not in subgraphNodes" in i["message"]
+        for i in errors
+    ), errors
+
+
+def test_validate_subgraph_external_edge_must_match_declared_port():
+    graph = _minimal_backend()
+    graph["ui_state"] = {
+        "nodes": [
+            {
+                "id": "sg",
+                "type": "subgraph",
+                "data": {
+                    "subgraphNodes": [
+                        {"id": "inner_ctrl", "type": "control", "data": {}},
+                    ],
+                    "subgraphEdges": [],
+                    "subgraphInputs": [],
+                    "subgraphOutputs": [
+                        {
+                            "name": "prompt",
+                            "portType": "param",
+                            "paramType": "string",
+                            "innerNodeId": "inner_ctrl",
+                            "innerHandleId": "param:value",
+                        }
+                    ],
+                },
+            },
+        ],
+        "edges": [
+            # External edge refers to a port that the subgraph does not expose.
+            {
+                "id": "e_bad_port",
+                "source": "sg",
+                "sourceHandle": "param:does_not_exist",
+                "target": "pipe",
+                "targetHandle": "param:__prompt",
+            },
+        ],
+    }
+    issues = _validate_proposal(graph, {"longlive": _simple_pipeline_handles()})
+    errors = [i for i in issues if i["severity"] == "error"]
+    assert any("no declared output" in i["message"] for i in errors), errors
+
+
+# ---------------------------------------------------------------------------
+# _derive_pipeline_handles
+# ---------------------------------------------------------------------------
+
+
+def test_derive_handles_includes_aggregates_when_supported():
+    schema = {
+        "supports_prompts": True,
+        "supports_vace": True,
+        "supports_lora": True,
+        "produces_video": True,
+        "config_schema": {
+            "properties": {
+                "noise_scale": {
+                    "type": "number",
+                    "ui": {
+                        "category": "configuration",
+                        "is_load_param": False,
+                    },
+                },
+                # Fields with these components are aggregated and skipped.
+                "vace_context_scale": {
+                    "type": "number",
+                    "ui": {"component": "vace"},
+                },
+                "manage_cache": {
+                    "type": "boolean",
+                    "ui": {"component": "cache"},
+                },
+            }
+        },
+    }
+    result = _derive_pipeline_handles("krea-realtime-video", schema)
+    handles = {p["handle"] for p in result["param_inputs"]}
+    assert "param:noise_scale" in handles
+    assert "param:vace_context_scale" not in handles
+    assert "param:manage_cache" not in handles
+    assert "param:__prompt" in handles
+    assert "param:__vace" in handles
+    assert "param:__loras" in handles
+    assert "stream:vace_input_frames" in result["stream_inputs"]
+    assert "stream:vace_input_masks" in result["stream_inputs"]
+
+
+def test_derive_handles_omits_aggregates_when_unsupported():
+    schema = {
+        "supports_prompts": False,
+        "supports_vace": False,
+        "supports_lora": False,
+        "produces_video": True,
+        "config_schema": {
+            "properties": {
+                "noise_scale": {"type": "number", "ui": {"is_load_param": False}},
+            }
+        },
+    }
+    result = _derive_pipeline_handles("plain", schema)
+    handles = {p["handle"] for p in result["param_inputs"]}
+    assert "param:noise_scale" in handles
+    assert "param:__prompt" not in handles
+    assert "param:__vace" not in handles
+    assert "param:__loras" not in handles
+    assert "stream:vace_input_frames" not in result["stream_inputs"]
+
+
+def test_derive_handles_ignores_fields_without_ui():
+    schema = {
+        "supports_prompts": False,
+        "supports_vace": False,
+        "supports_lora": False,
+        "produces_video": True,
+        "config_schema": {
+            "properties": {
+                "internal_only": {"type": "number"},  # no ui metadata
+            }
+        },
+    }
+    result = _derive_pipeline_handles("internal", schema)
+    handles = {p["handle"] for p in result["param_inputs"]}
+    assert "param:internal_only" not in handles
diff --git a/tests/test_evals_smoke.py b/tests/test_evals_smoke.py
new file mode 100644
index 000000000..e259f6998
--- /dev/null
+++ b/tests/test_evals_smoke.py
@@ -0,0 +1,55 @@
+"""Smoke test for the eval harness wiring.
+
+This runs ONE case with N=1 under ``@pytest.mark.eval``. Default ``pytest``
+skips it (pyproject's addopts includes ``-m "not eval"``); run with
+``uv run pytest -m eval`` to include it.
+
+This test is not a pass-rate gate — it only verifies:
+1. Cases can be loaded.
+2. The driver can drive the agent in-process.
+3. The grader produces a structured result.
+
+Pass-rate enforcement is intentionally left to ``python -m evals``.
+"""
+
+from __future__ import annotations
+
+import os
+
+import pytest
+
+
+@pytest.mark.eval
+@pytest.mark.anyio
+async def test_smoke_single_case():
+    # Skip if we can't reach the Anthropic API (no key set).
+    if not os.environ.get("ANTHROPIC_API_KEY"):
+        pytest.skip("ANTHROPIC_API_KEY not set; smoke eval needs live API")
+
+    from pathlib import Path
+
+    from evals.case import load_case
+    from evals.runner import run_cases
+
+    case_path = (
+        Path(__file__).resolve().parent.parent
+        / "evals"
+        / "cases"
+        / "starter-ltx-text-to-video.yaml"
+    )
+    case = load_case(case_path)
+
+    summaries = await run_cases(
+        [case],
+        runs_override=1,
+        output_dir=Path("/tmp/eval-smoke"),
+    )
+    assert len(summaries) == 1
+    summary = summaries[0]
+    assert len(summary.runs) == 1
+    # We don't assert pass here — the smoke test is about wiring, not
+    # agent quality. But we do assert the run produced *some* result
+    # structure (either a proposal or a recorded failure).
+    run = summary.runs[0]
+    assert run.drive is not None
+    assert run.drive.trace, "driver produced an empty SSE trace"
diff --git a/uv.lock b/uv.lock
index 1c6b70119..75f1fd1ab 100644
--- a/uv.lock
+++ b/uv.lock
@@ -269,6 +269,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
 ]
 
+[[package]]
+name = "anthropic"
+version = "0.96.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "distro" },
+    { name = "docstring-parser" },
+    { name = "httpx" },
+    { name = "jiter" },
+    { name = "pydantic" },
+    { name = "sniffio" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b9/7e/672f533dee813028d2c699bfd2a7f52c9118d7353680d9aa44b9e23f717f/anthropic-0.96.0.tar.gz", hash = "sha256:9de947b737f39452f68aa520f1c2239d44119c9b73b0fb6d4e6ca80f00279ee6", size = 658210, upload-time = "2026-04-16T14:28:02.846Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/48/5a/72f33204064b6e87601a71a6baf8d855769f8a0c1eaae8d06a1094872371/anthropic-0.96.0-py3-none-any.whl", hash = "sha256:9a6e335a354602a521cd9e777e92bfd46ba6e115bf9bbfe6135311e8fb2015b2", size = 635930, upload-time = "2026-04-16T14:28:01.436Z" },
+]
+
 [[package]]
 name = "antlr4-python3-runtime"
 version = "4.9.3"
@@ -288,6 +307,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
 ]
 
+[[package]]
+name = "asgi-lifespan"
+version = "2.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "sniffio" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6a/da/e7908b54e0f8043725a990bf625f2041ecf6bfe8eb7b19407f1c00b630f7/asgi-lifespan-2.1.0.tar.gz", hash = "sha256:5e2effaf0bfe39829cf2d64e7ecc47c7d86d676a6599f7afba378c31f5e3a308", size = 15627, upload-time = "2023-03-28T17:35:49.126Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2f/f5/c36551e93acba41a59939ae6a0fb77ddb3f2e8e8caa716410c65f7341f72/asgi_lifespan-2.1.0-py3-none-any.whl", hash = "sha256:ed840706680e28428c01e14afb3875d7d76d3206f3d5b2f2294e059b5c23804f", size = 10895, upload-time = "2023-03-28T17:35:47.772Z" },
+]
+
 [[package]]
 name = "async-timeout"
 version = "5.0.1"
@@ -566,6 +597,7 @@ dependencies = [
     { name = "accelerate" },
     { name = "aiohttp" },
     { name = "aiortc" },
+    { name = "anthropic" },
     { name = "click" },
     { name = "diffusers" },
     { name = "easydict" },
@@ -621,11 +653,13 @@ midi = [
 
 [package.dev-dependencies]
 dev = [
+    { name = "asgi-lifespan" },
     { name = "freezegun" },
     { name = "imageio" },
     { name = "imageio-ffmpeg" },
     { name = "pre-commit" },
     { name = "pytest" },
+    { name = "pyyaml" },
     { name = "ruff" },
     { name = "twine" },
 ]
@@ -637,6 +671,7 @@ requires-dist = [
     { name = "aiohttp", specifier = ">=3.9.0" },
     { name = "aiokafka", marker = "extra == 'kafka'", specifier = ">=0.10.0" },
     { name = "aiortc", specifier = ">=1.13.0" },
+    { name = "anthropic", specifier = ">=0.40" },
     { name = "click", specifier = ">=8.3.1" },
     { name = "diffusers", specifier = ">=0.31.0" },
     { name = "easydict", specifier = ">=1.13" },
@@ -680,11 +715,13 @@ provides-extras = ["kafka", "livepeer", "link", "midi"]
 
 [package.metadata.requires-dev]
 dev = [
+    { name = "asgi-lifespan", specifier = ">=2.1" },
     { name = "freezegun", specifier = ">=1.5.5" },
     { name = "imageio", specifier = ">=2.37.0" },
     { name = "imageio-ffmpeg", specifier = ">=0.6.0" },
     { name = "pre-commit", specifier = ">=4.0.0" },
     { name = "pytest", specifier = ">=8.4.2" },
+    { name = "pyyaml", specifier = ">=6.0" },
     { name = "ruff", specifier = "==0.14.11" },
     { name = "twine", specifier = ">=5.0.0" },
 ]
@@ -718,6 +755,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" },
 ]
 
+[[package]]
+name = "distro"
+version = "1.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" },
+]
+
 [[package]]
 name = "dnspython"
 version = "2.8.0"
@@ -727,6 +773,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094, upload-time = "2025-09-07T18:57:58.071Z" },
 ]
 
+[[package]]
+name = "docstring-parser"
+version = "0.18.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/4d/f332313098c1de1b2d2ff91cf2674415cc7cddab2ca1b01ae29774bd5fdf/docstring_parser-0.18.0.tar.gz", hash = "sha256:292510982205c12b1248696f44959db3cdd1740237a968ea1e2e7a900eeb2015", size = 29341, upload-time = "2026-04-14T04:09:19.867Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a7/5f/ed01f9a3cdffbd5a008556fc7b2a08ddb1cc6ace7effa7340604b1d16699/docstring_parser-0.18.0-py3-none-any.whl", hash = "sha256:b3fcbed555c47d8479be0796ef7e19c2670d428d72e96da63f3a40122860374b", size = 22484, upload-time = "2026-04-14T04:09:18.638Z" },
+]
+
 [[package]]
 name = "docutils"
 version = "0.22.4"
@@ -1271,6 +1326,78 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
 ]
 
+[[package]]
+name = "jiter"
+version = "0.14.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6e/c1/0cddc6eb17d4c53a99840953f95dd3accdc5cfc7a337b0e9b26476276be9/jiter-0.14.0.tar.gz", hash = "sha256:e8a39e66dac7153cf3f964a12aad515afa8d74938ec5cc0018adcdae5367c79e", size = 165725, upload-time = "2026-04-10T14:28:42.01Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/68/7390a418f10897da93b158f2d5a8bd0bcd73a0f9ec3bb36917085bb759ef/jiter-0.14.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:2fb2ce3a7bc331256dfb14cefc34832366bb28a9aca81deaf43bbf2a5659e607", size = 316295, upload-time = "2026-04-10T14:26:24.887Z" },
+    { url = "https://files.pythonhosted.org/packages/60/a0/5854ac00ff63551c52c6c89534ec6aba4b93474e7924d64e860b1c94165b/jiter-0.14.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5252a7ca23785cef5d02d4ece6077a1b556a410c591b379f82091c3001e14844", size = 315898, upload-time = "2026-04-10T14:26:26.601Z" },
+    { url = "https://files.pythonhosted.org/packages/41/a1/4f44832650a16b18e8391f1bf1d6ca4909bc738351826bcc198bba4357f4/jiter-0.14.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c409578cbd77c338975670ada777add4efd53379667edf0aceea730cabede6fb", size = 343730, upload-time = "2026-04-10T14:26:28.326Z" },
+    { url = "https://files.pythonhosted.org/packages/48/64/a329e9d469f86307203594b1707e11ae51c3348d03bfd514a5f997870012/jiter-0.14.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7ede4331a1899d604463369c730dbb961ffdc5312bc7f16c41c2896415b1304a", size = 370102, upload-time = "2026-04-10T14:26:30.089Z" },
+    { url = "https://files.pythonhosted.org/packages/94/c1/5e3dfc59635aa4d4c7bd20a820ac1d09b8ed851568356802cf1c08edb3cf/jiter-0.14.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:92cd8b6025981a041f5310430310b55b25ca593972c16407af8837d3d7d2ca01", size = 461335, upload-time = "2026-04-10T14:26:31.911Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/1b/dd157009dbc058f7b00108f545ccb72a2d56461395c4fc7b9cfdccb00af4/jiter-0.14.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:351bf6eda4e3a7ceb876377840c702e9a3e4ecc4624dbfb2d6463c67ae52637d", size = 378536, upload-time = "2026-04-10T14:26:33.595Z" },
+    { url = "https://files.pythonhosted.org/packages/91/78/256013667b7c10b8834f8e6e54cd3e562d4c6e34227a1596addccc05e38c/jiter-0.14.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1dcfbeb93d9ecd9ca128bbf8910120367777973fa193fb9a39c31237d8df165", size = 353859, upload-time = "2026-04-10T14:26:35.098Z" },
+    { url = "https://files.pythonhosted.org/packages/de/d9/137d65ade9093a409fe80955ce60b12bb753722c986467aeda47faf450ad/jiter-0.14.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:ae039aaef8de3f8157ecc1fdd4d85043ac4f57538c245a0afaecb8321ec951c3", size = 357626, upload-time = "2026-04-10T14:26:36.685Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/48/76750835b87029342727c1a268bea8878ab988caf81ee4e7b880900eeb5a/jiter-0.14.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7d9d51eb96c82a9652933bd769fe6de66877d6eb2b2440e281f2938c51b5643e", size = 393172, upload-time = "2026-04-10T14:26:38.097Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/60/456c4e81d5c8045279aefe60e9e483be08793828800a4e64add8fdde7f2a/jiter-0.14.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d824ca4148b705970bf4e120924a212fdfca9859a73e42bd7889a63a4ea6bb98", size = 520300, upload-time = "2026-04-10T14:26:39.532Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/9f/2020e0984c235f678dced38fe4eec3058cf528e6af36ebf969b410305941/jiter-0.14.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ff3a6465b3a0f54b1a430f45c3c0ba7d61ceb45cbc3e33f9e1a7f638d690baf3", size = 553059, upload-time = "2026-04-10T14:26:40.991Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/32/e2d298e1a22a4bbe6062136d1c7192db7dba003a6975e51d9a9eecabc4c2/jiter-0.14.0-cp312-cp312-win32.whl", hash = "sha256:5dec7c0a3e98d2a3f8a2e67382d0d7c3ac60c69103a4b271da889b4e8bb1e129", size = 206030, upload-time = "2026-04-10T14:26:42.517Z" },
+    { url = "https://files.pythonhosted.org/packages/36/ac/96369141b3d8a4a8e4590e983085efe1c436f35c0cda940dd76d942e3e40/jiter-0.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:fc7e37b4b8bc7e80a63ad6cfa5fc11fab27dbfea4cc4ae644b1ab3f273dc348f", size = 201603, upload-time = "2026-04-10T14:26:44.328Z" },
+    { url = "https://files.pythonhosted.org/packages/01/c3/75d847f264647017d7e3052bbcc8b1e24b95fa139c320c5f5066fa7a0bdd/jiter-0.14.0-cp312-cp312-win_arm64.whl", hash = "sha256:ee4a72f12847ef29b072aee9ad5474041ab2924106bdca9fcf5d7d965853e057", size = 191525, upload-time = "2026-04-10T14:26:46Z" },
+    { url = "https://files.pythonhosted.org/packages/97/2a/09f70020898507a89279659a1afe3364d57fc1b2c89949081975d135f6f5/jiter-0.14.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:af72f204cf4d44258e5b4c1745130ac45ddab0e71a06333b01de660ab4187a94", size = 315502, upload-time = "2026-04-10T14:26:47.697Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/be/080c96a45cd74f9fce5db4fd68510b88087fb37ffe2541ff73c12db92535/jiter-0.14.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4b77da71f6e819be5fbcec11a453fde5b1d0267ef6ed487e2a392fd8e14e4e3a", size = 314870, upload-time = "2026-04-10T14:26:49.149Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/5e/2d0fee155826a968a832cc32438de5e2a193292c8721ca70d0b53e58245b/jiter-0.14.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f4ea612fe8b84b8b04e51d0e78029ecf3466348e25973f953de6e6a59aa4c1", size = 343406, upload-time = "2026-04-10T14:26:50.762Z" },
+    { url = "https://files.pythonhosted.org/packages/70/af/bf9ee0d3a4f8dc0d679fc1337f874fe60cdbf841ebbb304b374e1c9aaceb/jiter-0.14.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:62fe2451f8fcc0240261e6a4df18ecbcd58327857e61e625b2393ea3b468aac9", size = 369415, upload-time = "2026-04-10T14:26:52.188Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/83/8e8561eadba31f4d3948a5b712fb0447ec71c3560b57a855449e7b8ddc98/jiter-0.14.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6112f26f5afc75bcb475787d29da3aa92f9d09c7858f632f4be6ffe607be82e9", size = 461456, upload-time = "2026-04-10T14:26:53.611Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/c9/c5299e826a5fe6108d172b344033f61c69b1bb979dd8d9ddd4278a160971/jiter-0.14.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:215a6cb8fb7dc702aa35d475cc00ddc7f970e5c0b1417fb4b4ac5d82fa2a29db", size = 378488, upload-time = "2026-04-10T14:26:55.211Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/37/c16d9d15c0a471b8644b1abe3c82668092a707d9bedcf076f24ff2e380cd/jiter-0.14.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc4ab96a30fb3cb2c7e0cd33f7616c8860da5f5674438988a54ac717caccdbaa", size = 353242, upload-time = "2026-04-10T14:26:56.705Z" },
+    { url = "https://files.pythonhosted.org/packages/58/ea/8050cb0dc654e728e1bfacbc0c640772f2181af5dedd13ae70145743a439/jiter-0.14.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:3a99c1387b1f2928f799a9de899193484d66206a50e98233b6b088a7f0c1edb2", size = 356823, upload-time = "2026-04-10T14:26:58.281Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/3b/cf71506d270e5f84d97326bf220e47aed9b95e9a4a060758fb07772170ab/jiter-0.14.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ab18d11074485438695f8d34a1b6da61db9754248f96d51341956607a8f39985", size = 392564, upload-time = "2026-04-10T14:27:00.018Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/cc/8c6c74a3efb5bd671bfd14f51e8a73375464ca914b1551bc3b40e26ac2c9/jiter-0.14.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:801028dcfc26ac0895e4964cbc0fd62c73be9fd4a7d7b1aaf6e5790033a719b7", size = 520322, upload-time = "2026-04-10T14:27:01.664Z" },
+    { url = "https://files.pythonhosted.org/packages/41/24/68d7b883ec959884ddf00d019b2e0e82ba81b167e1253684fa90519ce33c/jiter-0.14.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ad425b087aafb4a1c7e1e98a279200743b9aaf30c3e0ba723aec93f061bd9bc8", size = 552619, upload-time = "2026-04-10T14:27:03.316Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/89/b1a0985223bbf3150ff9e8f46f98fc9360c1de94f48abe271bbe1b465682/jiter-0.14.0-cp313-cp313-win32.whl", hash = "sha256:882bcb9b334318e233950b8be366fe5f92c86b66a7e449e76975dfd6d776a01f", size = 205699, upload-time = "2026-04-10T14:27:04.662Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/19/3f339a5a7f14a11730e67f6be34f9d5105751d547b615ef593fa122a5ded/jiter-0.14.0-cp313-cp313-win_amd64.whl", hash = "sha256:9b8c571a5dba09b98bd3462b5a53f27209a5cbbe85670391692ede71974e979f", size = 201323, upload-time = "2026-04-10T14:27:06.139Z" },
+    { url = "https://files.pythonhosted.org/packages/50/56/752dd89c84be0e022a8ea3720bcfa0a8431db79a962578544812ce061739/jiter-0.14.0-cp313-cp313-win_arm64.whl", hash = "sha256:34f19dcc35cb1abe7c369b3756babf8c7f04595c0807a848df8f26ef8298ef92", size = 191099, upload-time = "2026-04-10T14:27:07.564Z" },
+    { url = "https://files.pythonhosted.org/packages/91/28/292916f354f25a1fe8cf2c918d1415c699a4a659ae00be0430e1c5d9ffea/jiter-0.14.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e89bcd7d426a75bb4952c696b267075790d854a07aad4c9894551a82c5b574ab", size = 320880, upload-time = "2026-04-10T14:27:09.326Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/c7/b002a7d8b8957ac3d469bd59c18ef4b1595a5216ae0de639a287b9816023/jiter-0.14.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b25beaa0d4447ea8c7ae0c18c688905d34840d7d0b937f2f7bdd52162c98a40", size = 346563, upload-time = "2026-04-10T14:27:11.287Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/3b/f8d07580d8706021d255a6356b8fab13ee4c869412995550ce6ed4ddf97d/jiter-0.14.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:651a8758dd413c51e3b7f6557cdc6921faf70b14106f45f969f091f5cda990ea", size = 357928, upload-time = "2026-04-10T14:27:12.729Z" },
+    { url = "https://files.pythonhosted.org/packages/47/5b/ac1a974da29e35507230383110ffec59998b290a8732585d04e19a9eb5ba/jiter-0.14.0-cp313-cp313t-win_amd64.whl", hash = "sha256:e1a7eead856a5038a8d291f1447176ab0b525c77a279a058121b5fccee257f6f", size = 203519, upload-time = "2026-04-10T14:27:14.125Z" },
+    { url = "https://files.pythonhosted.org/packages/96/6d/9fc8433d667d2454271378a79747d8c76c10b51b482b454e6190e511f244/jiter-0.14.0-cp313-cp313t-win_arm64.whl", hash = "sha256:2e692633a12cda97e352fdcd1c4acc971b1c28707e1e33aeef782b0cbf051975", size = 190113, upload-time = "2026-04-10T14:27:16.638Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/1e/354ed92461b165bd581f9ef5150971a572c873ec3b68a916d5aa91da3cc2/jiter-0.14.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:6f396837fc7577871ca8c12edaf239ed9ccef3bbe39904ae9b8b63ce0a48b140", size = 315277, upload-time = "2026-04-10T14:27:18.109Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/95/8c7c7028aa8636ac21b7a55faef3e34215e6ed0cbf5ae58258427f621aa3/jiter-0.14.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a4d50ea3d8ba4176f79754333bd35f1bbcd28e91adc13eb9b7ca91bc52a6cef9", size = 315923, upload-time = "2026-04-10T14:27:19.603Z" },
+    { url = "https://files.pythonhosted.org/packages/47/40/e2a852a44c4a089f2681a16611b7ce113224a80fd8504c46d78491b47220/jiter-0.14.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce17f8a050447d1b4153bda4fb7d26e6a9e74eb4f4a41913f30934c5075bf615", size = 344943, upload-time = "2026-04-10T14:27:21.262Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/1f/670f92adee1e9895eac41e8a4d623b6da68c4d46249d8b556b60b63f949e/jiter-0.14.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f4f1c4b125e1652aefbc2e2c1617b60a160ab789d180e3d423c41439e5f32850", size = 369725, upload-time = "2026-04-10T14:27:22.766Z" },
+    { url = "https://files.pythonhosted.org/packages/01/2f/541c9ba567d05de1c4874a0f8f8c5e3fd78e2b874266623da9a775cf46e0/jiter-0.14.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be808176a6a3a14321d18c603f2d40741858a7c4fc982f83232842689fe86dd9", size = 461210, upload-time = "2026-04-10T14:27:24.315Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/a9/c31cbec09627e0d5de7aeaec7690dba03e090caa808fefd8133137cf45bc/jiter-0.14.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:26679d58ba816f88c3849306dd58cb863a90a1cf352cdd4ef67e30ccf8a77994", size = 380002, upload-time = "2026-04-10T14:27:26.155Z" },
+    { url = "https://files.pythonhosted.org/packages/50/02/3c05c1666c41904a2f607475a73e7a4763d1cbde2d18229c4f85b22dc253/jiter-0.14.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80381f5a19af8fa9aef743f080e34f6b25ebd89656475f8cf0470ec6157052aa", size = 354678, upload-time = "2026-04-10T14:27:27.701Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/97/e15b33545c2b13518f560d695f974b9891b311641bdcf178d63177e8801e/jiter-0.14.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:004df5fdb8ecbd6d99f3227df18ba1a259254c4359736a2e6f036c944e02d7c5", size = 358920, upload-time = "2026-04-10T14:27:29.256Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/d2/8b1461def6b96ba44530df20d07ef7a1c7da22f3f9bf1727e2d611077bf1/jiter-0.14.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cff5708f7ed0fa098f2b53446c6fa74c48469118e5cd7497b4f1cd569ab06928", size = 394512, upload-time = "2026-04-10T14:27:31.344Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/88/837566dd6ed6e452e8d3205355afd484ce44b2533edfa4ed73a298ea893e/jiter-0.14.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:2492e5f06c36a976d25c7cc347a60e26d5470178d44cde1b9b75e60b4e519f28", size = 521120, upload-time = "2026-04-10T14:27:33.299Z" },
+    { url = "https://files.pythonhosted.org/packages/89/6b/b00b45c4d1b4c031777fe161d620b755b5b02cdade1e316dcb46e4471d63/jiter-0.14.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:7609cfbe3a03d37bfdbf5052012d5a879e72b83168a363deae7b3a26564d57de", size = 553668, upload-time = "2026-04-10T14:27:34.868Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/d8/6fe5b42011d19397433d345716eac16728ac241862a2aac9c91923c7509a/jiter-0.14.0-cp314-cp314-win32.whl", hash = "sha256:7282342d32e357543565286b6450378c3cd402eea333fc1ebe146f1fabb306fc", size = 207001, upload-time = "2026-04-10T14:27:36.455Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/43/5c2e08da1efad5e410f0eaaabeadd954812612c33fbbd8fd5328b489139d/jiter-0.14.0-cp314-cp314-win_amd64.whl", hash = "sha256:bd77945f38866a448e73b0b7637366afa814d4617790ecd88a18ca74377e6c02", size = 202187, upload-time = "2026-04-10T14:27:38Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/1f/6e39ac0b4cdfa23e606af5b245df5f9adaa76f35e0c5096790da430ca506/jiter-0.14.0-cp314-cp314-win_arm64.whl", hash = "sha256:f2d4c61da0821ee42e0cdf5489da60a6d074306313a377c2b35af464955a3611", size = 192257, upload-time = "2026-04-10T14:27:39.504Z" },
+    { url = "https://files.pythonhosted.org/packages/05/57/7dbc0ffbbb5176a27e3518716608aa464aee2e2887dc938f0b900a120449/jiter-0.14.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1bf7ff85517dd2f20a5750081d2b75083c1b269cf75afc7511bdf1f9548beb3b", size = 323441, upload-time = "2026-04-10T14:27:41.039Z" },
+    { url = "https://files.pythonhosted.org/packages/83/6e/7b3314398d8983f06b557aa21b670511ec72d3b79a68ee5e4d9bff972286/jiter-0.14.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c8ef8791c3e78d6c6b157c6d360fbb5c715bebb8113bc6a9303c5caff012754a", size = 348109, upload-time = "2026-04-10T14:27:42.552Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/4f/8dc674bcd7db6dba566de73c08c763c337058baff1dbeb34567045b27cdc/jiter-0.14.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e74663b8b10da1fe0f4e4703fd7980d24ad17174b6bb35d8498d6e3ebce2ae6a", size = 368328, upload-time = "2026-04-10T14:27:44.574Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/5f/188e09a1f20906f98bbdec44ed820e19f4e8eb8aff88b9d1a5a497587ff3/jiter-0.14.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1aca29ba52913f78362ec9c2da62f22cdc4c3083313403f90c15460979b84d9b", size = 463301, upload-time = "2026-04-10T14:27:46.717Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/f0/19046ef965ed8f349e8554775bb12ff4352f443fbe12b95d31f575891256/jiter-0.14.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8b39b7d87a952b79949af5fef44d2544e58c21a28da7f1bae3ef166455c61746", size = 378891, upload-time = "2026-04-10T14:27:48.32Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/c3/da43bd8431ee175695777ee78cf0e93eacbb47393ff493f18c45231b427d/jiter-0.14.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78d918a68b26e9fab068c2b5453577ef04943ab2807b9a6275df2a812599a310", size = 360749, upload-time = "2026-04-10T14:27:49.88Z" },
+    { url = "https://files.pythonhosted.org/packages/72/26/e054771be889707c6161dbdec9c23d33a9ec70945395d70f07cfea1e9a6f/jiter-0.14.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:b08997c35aee1201c1a5361466a8fb9162d03ae7bf6568df70b6c859f1e654a4", size = 358526, upload-time = "2026-04-10T14:27:51.504Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/0f/7bea65ea2a6d91f2bf989ff11a18136644392bf2b0497a1fa50934c30a9c/jiter-0.14.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:260bf7ca20704d58d41f669e5e9fe7fe2fa72901a6b324e79056f5d52e9c9be2", size = 393926, upload-time = "2026-04-10T14:27:53.368Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/a1/b1ff7d70deef61ac0b7c6c2f12d2ace950cdeecb4fdc94500a0926802857/jiter-0.14.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:37826e3df29e60f30a382f9294348d0238ef127f4b5d7f5f8da78b5b9e050560", size = 521052, upload-time = "2026-04-10T14:27:55.058Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/7b/3b0649983cbaf15eda26a414b5b1982e910c67bd6f7b1b490f3cfc76896a/jiter-0.14.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:645be49c46f2900937ba0eaf871ad5183c96858c0af74b6becc7f4e367e36e06", size = 553716, upload-time = "2026-04-10T14:27:57.269Z" },
+    { url = "https://files.pythonhosted.org/packages/97/f8/33d78c83bd93ae0c0af05293a6660f88a1977caef39a6d72a84afab94ce0/jiter-0.14.0-cp314-cp314t-win32.whl", hash = "sha256:2f7877ed45118de283786178eceaf877110abacd04fde31efff3940ae9672674", size = 207957, upload-time = "2026-04-10T14:27:59.285Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/ac/2b760516c03e2227826d1f7025d89bf6bf6357a28fe75c2a2800873c50bf/jiter-0.14.0-cp314-cp314t-win_amd64.whl", hash = "sha256:14c0cb10337c49f5eafe8e7364daca5e29a020ea03580b8f8e6c597fed4e1588", size = 204690, upload-time = "2026-04-10T14:28:00.962Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/2e/a44c20c58aeed0355f2d326969a181696aeb551a25195f47563908a815be/jiter-0.14.0-cp314-cp314t-win_arm64.whl", hash = "sha256:5419d4aa2024961da9fe12a9cfe7484996735dca99e8e090b5c88595ef1951ff", size = 191338, upload-time = "2026-04-10T14:28:02.853Z" },
+    { url = "https://files.pythonhosted.org/packages/21/42/9042c3f3019de4adcb8c16591c325ec7255beea9fcd33a42a43f3b0b1000/jiter-0.14.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:fbd9e482663ca9d005d051330e4d2d8150bb208a209409c10f7e7dfdf7c49da9", size = 308810, upload-time = "2026-04-10T14:28:34.673Z" },
+    { url = "https://files.pythonhosted.org/packages/60/cf/a7e19b308bd86bb04776803b1f01a5f9a287a4c55205f4708827ee487fbf/jiter-0.14.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:33a20d838b91ef376b3a56896d5b04e725c7df5bc4864cc6569cf046a8d73b6d", size = 308443, upload-time = "2026-04-10T14:28:36.658Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/44/e26ede3f0caeff93f222559cb0cc4ca68579f07d009d7b6010c5b586f9b1/jiter-0.14.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:432c4db5255d86a259efde91e55cb4c8d18c0521d844c9e2e7efcce3899fb016", size = 343039, upload-time = "2026-04-10T14:28:38.356Z" },
+    { url = "https://files.pythonhosted.org/packages/da/e9/1f9ada30cef7b05e74bb06f52127e7a724976c225f46adb65c37b1dadfb6/jiter-0.14.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67f00d94b281174144d6532a04b66a12cb866cbdc47c3af3bfe2973677f9861a", size = 349613, upload-time = "2026-04-10T14:28:40.066Z" },
+]
+
 [[package]]
 name = "jsonschema"
 version = "4.26.0"
@@ -2855,6 +2982,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
 ]
 
+[[package]]
+name = "sniffio"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
+]
+
 [[package]]
 name = "spoutgl"
 version = "0.1.1"