fix(catalyst-api): use literal in-cluster Gitea URL (Helm-template breaks Kustomize parse) — qa-loop iter-12 Fix #53C follow-up

fix(infra): wire NetBird, DMZ vCluster, Hubble UI, BGP, Gitea client — qa-loop iter-12 Fix #53B+C
Phase-4 infra installs from iter-12 diagnostic audit (37 of 41 e-blocked TCs covered): bp-catalyst-platform 1.4.120 → 1.4.122 — Gitea client wired (cluster B, 4 TCs): - catalyst-api Deployment now reads CATALYST_GITEA_URL + CATALYST_GITEA_TOKEN from `catalyst-gitea-token` Secret (mirrors blueprint-controller pattern). - Unblocks /api/v1/sovereigns/.../blueprints/{publish,curatable,curate,edit-pr} which previously returned 503 "Gitea client unconfigured". - TC-081, TC-082, TC-083, TC-085. bp-netbird 0.1.0 → 0.1.1 + slot 53 install (cluster C, 4 TCs): - Pinned image tags (netbirdio/management:0.34.0, signal:0.34.0, coturn:4.6.2) so chart renders without CI mirror cycle. - Bootstrap-kit slot 53 enables NetBird on omantel; OIDC issuer points at the new omantel realm (Fix #53A). - TC-281, TC-282, TC-283, TC-284. bp-dmz-vcluster 0.1.0 → 0.1.1 + slot 54 install (cluster C, 3 TCs): - Pinned upstream loft-sh/vcluster:0.20.0 tag. - Bootstrap-kit slot 54 enables DMZ vCluster `omantel-dmz` on omantel. - TC-286, TC-287, TC-288. bp-cilium chart pin 1.2.0 → 1.3.0 + Hubble UI ingress + BGP (cluster C, 3 TCs): - Hubble relay + UI enabled in omantel cilium overlay. - catalystOverlay.hubbleUI block enables HTTPRoute hubble.console.omantel.biz; external-dns auto-creates the DNS record. - bgpControlPlane.enabled=true for multi-region peering (TC-349). - TC-289, TC-290, TC-349. Total: 14 of the 25 cluster-C TCs covered + 4 cluster-B TCs.
2026-05-10 08:47:40 +02:00 · 2026-05-10 08:47:40 +02:00
789 changed files with 4286 additions and 118461 deletions
--- a/.github/workflows/blueprint-release.yaml
+++ b/.github/workflows/blueprint-release.yaml
@ -69,16 +69,7 @@ on:
          - products

 permissions:
-  # contents: write — the auto-bump-pin step (added 2026-05-18, TBD-A6
-  # meta-fix) writes back the `version:` line in
-  # clusters/_template/bootstrap-kit/<NN>-<chart>.yaml so the bootstrap-
-  # kit pin moves in lockstep with the published OCI artifact. Before
-  # this, every chart bump required a SEPARATE manual collector PR to
-  # bump the pin (PRs #1666, #1687, #1695, #1698, #1707 in the
-  # 2026-05-17/18 wave alone). The bot-author commit does NOT re-trigger
-  # workflows (GITHUB_TOKEN convention), so we can safely push without
-  # looping the publish pipeline.
-  contents: write
+  contents: read
  packages: write
  id-token: write              # for cosign keyless signing

@ -506,363 +497,6 @@ jobs:
          cosign attest --yes --predicate /tmp/sbom/sbom.spdx.json --type spdxjson \
            "${{ steps.push.outputs.ref }}@${{ steps.push.outputs.digest }}"

-      # ──────────────────────────────────────────────────────────────
-      # AUTO-BUMP — clusters/_template/bootstrap-kit/<NN>-<chart>.yaml
-      # ──────────────────────────────────────────────────────────────
-      # TBD-A6 meta-fix (2026-05-18): every chart-publishing wave in this
-      # session required a SEPARATE manual collector PR to bump the
-      # bootstrap-kit pin so Sovereigns would actually install the new
-      # OCI artifact. Without the pin bump, the chart at e.g.
-      # bp-catalyst-platform:1.4.166 gets published to GHCR but
-      # clusters/_template/bootstrap-kit/13-bp-catalyst-platform.yaml
-      # still pins `version: 1.4.165` and fresh Sovereigns silently
-      # install the OLD artifact.
-      #
-      # Manual collector PRs from this session ALONE (eliminated by
-      # this hook):
-      #   - #1676 chart 1.4.162→1.4.163 — Wave 16 collector
-      #   - #1687 chart 1.4.163→1.4.164 — Wave 17 collector
-      #   - #1694 bp-guacamole 0.1.21 → 0.1.22 (TBD-G6)
-      #   - #1695 chart 1.4.164→1.4.165 — Wave 18 collector
-      #   - #1698 chart 1.4.165→1.4.166 (TBD-E8)
-      #   - #1700 bp-guacamole 0.1.22 → 0.1.23 (TBD-G4 phase 2)
-      #   - #1706 self-sovereign-cutover 0.1.29→0.1.30 (TBD-C18)
-      #   - #1707 chart 1.4.166→1.4.167 — Wave 24 collector
-      #
-      # Mechanism: the canonical chart name comes from Chart.yaml `name:`
-      # (already read into ${{ steps.chart.outputs.name }} above). The
-      # corresponding bootstrap-kit pin file is identified by grepping
-      # for `^      chart: <name>$` (6-space indent matches the
-      # HelmRelease.spec.chart.spec.chart shape in every existing slot).
-      # If no pin file matches, the chart is NOT in the bootstrap kit
-      # (e.g. it's an optional Application Blueprint that Sovereigns
-      # opt into via overlay) — this is a graceful no-op, NOT a failure.
-      #
-      # The bot-author commit does NOT re-trigger blueprint-release per
-      # the GITHUB_TOKEN convention. So this hook converges in ONE pass:
-      # publish → bump pin → push. The next Sovereign provisioned will
-      # pick up the new pin via the standard Flux reconciliation.
-      #
-      # Idempotent reset-and-rewrite: parallel matrix jobs (multiple
-      # changed Blueprints in the same push) could race on the same
-      # branch. Retry up to 3 times with `git fetch + reset --hard
-      # origin/main + re-sed` so concurrent runs produce strictly
-      # ordered commits instead of clobbering each other.
-      - name: "Auto-bump bootstrap-kit pin for ${{ steps.chart.outputs.name }}"
-        if: steps.chart.outputs.skip != 'true'
-        id: bump_pin
-        env:
-          CHART_NAME: ${{ steps.chart.outputs.name }}
-          CHART_VERSION: ${{ steps.chart.outputs.version }}
-        run: |
-          set -euo pipefail
-
-          # Locate the bootstrap-kit slot pinning this chart, if any.
-          # The 6-space indent matches every existing slot's chart
-          # directive:
-          #
-          #     spec:
-          #       chart:
-          #         spec:
-          #           chart: bp-<name>        ← 6 spaces
-          #           version: <semver>       ← 6 spaces, same scope
-          pin_file=$(grep -lE "^      chart: ${CHART_NAME}\$" \
-            clusters/_template/bootstrap-kit/*.yaml 2>/dev/null || true)
-
-          if [ -z "$pin_file" ]; then
-            # TBD-A6 hardening (2026-05-18): before declaring "not in the
-            # kit", check whether a slot pins this chart at a DIFFERENT
-            # indent than the canonical 6 spaces. If so, the auto-bump
-            # would silently no-op forever and the chart-pin pair would
-            # drift undetected. Fail loudly so the slot author re-indents
-            # to the canonical shape before publishing.
-            wrong_indent=$(grep -lE "^[[:space:]]+chart: ${CHART_NAME}\$" \
-              clusters/_template/bootstrap-kit/*.yaml 2>/dev/null || true)
-            if [ -n "$wrong_indent" ]; then
-              echo "::error title=Bootstrap-kit slot indent drift::Slot file(s) pin ${CHART_NAME} but at a non-6-space indent: $wrong_indent. The auto-bump hook keys on \`^      chart: <name>\$\` (exactly 6 spaces). Re-indent the slot's \`chart:\` and \`version:\` lines under HelmRelease.spec.chart.spec to 6 spaces, matching every other slot. The pin-sync audit (scripts/check-bootstrap-kit-pin-sync.sh) keys on the same regex and would also miss this slot — drift would be undetected."
-              exit 1
-            fi
-            echo "INFO: no bootstrap-kit slot pins ${CHART_NAME} — graceful no-op (chart is an opt-in Application Blueprint, not part of the kit)."
-            echo "bumped=false" >> "$GITHUB_OUTPUT"
-            exit 0
-          fi
-
-          # Defensive: refuse to operate if multiple slots reference the
-          # same chart name — bootstrap-kit invariant is one slot per
-          # chart, and a violation would mean the schema changed under
-          # us and the hook would write wrong things.
-          slot_count=$(echo "$pin_file" | wc -l)
-          if [ "$slot_count" -ne 1 ]; then
-            echo "::error title=Multiple bootstrap-kit slots for ${CHART_NAME}::Expected exactly one slot file pinning chart '${CHART_NAME}', found ${slot_count}: $pin_file"
-            exit 1
-          fi
-
-          # Read current pin. The HelmRelease.spec.chart.spec.version is
-          # at exactly 6 spaces of indent in every slot (audited 2026-
-          # 05-18 across all 51 files). If the shape ever changes we
-          # fail loudly rather than write the wrong line.
-          current=$(awk '/^      version:/{print $2; exit}' "$pin_file" | tr -d '"')
-          if [ -z "$current" ]; then
-            echo "::error title=Unparseable bootstrap-kit pin::No '      version:' line at 6-space indent in $pin_file. The HelmRelease.spec.chart.spec.version shape changed under TBD-A6's auto-bump hook; refusing to write."
-            exit 1
-          fi
-
-          if [ "$current" = "$CHART_VERSION" ]; then
-            echo "INFO: ${pin_file} already pins ${CHART_NAME}=${CHART_VERSION} — no-op."
-            echo "bumped=false" >> "$GITHUB_OUTPUT"
-            exit 0
-          fi
-
-          echo "Bumping ${pin_file}: ${CHART_NAME} ${current} → ${CHART_VERSION}"
-          # sed targets the single 6-space `      version:` line at the
-          # chart-pin scope. There is exactly one such line per slot
-          # (audited 2026-05-18); the regex is anchored to start-of-line
-          # so a deeper-indented `version:` (e.g. inside .values.xxx)
-          # cannot accidentally match.
-          sed -i -E "s|^      version: .*\$|      version: ${CHART_VERSION}|" "$pin_file"
-
-          # Verify the sed actually flipped the line — defence against
-          # a future indent change shipping silently.
-          new=$(awk '/^      version:/{print $2; exit}' "$pin_file" | tr -d '"')
-          if [ "$new" != "$CHART_VERSION" ]; then
-            echo "::error title=sed failed::After rewrite, ${pin_file} still pins '${new}', expected '${CHART_VERSION}'."
-            exit 1
-          fi
-
-          echo "bumped=true" >> "$GITHUB_OUTPUT"
-          echo "pin_file=${pin_file}" >> "$GITHUB_OUTPUT"
-          echo "prev_version=${current}" >> "$GITHUB_OUTPUT"
-
-      # ──────────────────────────────────────────────────────────────
-      # LOCKSTEP — platform/<bp>/blueprint.yaml `spec.version`
-      # ──────────────────────────────────────────────────────────────
-      # TBD-A20 (issue #1856, 2026-05-18): the auto-bump hook above only
-      # touched clusters/_template/bootstrap-kit/*.yaml. The upstream
-      # blueprint manifest (platform/<bp>/blueprint.yaml) ALSO carries
-      # a `spec.version` field that must equal the Chart.yaml `version`
-      # by convergence contract:
-      #
-      #   TestBootstrapKit_BlueprintCardsHaveRequiredFields
-      #     (tests/e2e/bootstrap-kit/main_test.go)
-      #
-      # asserts `Chart.yaml.version == blueprint.yaml.spec.version` for
-      # every kit blueprint. Six blueprints (cilium, cert-manager, flux,
-      # openbao, keycloak, gitea) silently drifted between Chart.yaml
-      # and blueprint.yaml until the test started failing — A17 (#1855)
-      # hot-patched the six drifts; this lockstep step removes the
-      # structural recurrence pattern.
-      #
-      # Location convention:
-      #   - Leaf platform blueprints live at  ${matrix.path}/blueprint.yaml
-      #     (e.g. platform/cilium/blueprint.yaml)
-      #   - Umbrella product blueprints live at ${matrix.path}/chart/blueprint.yaml
-      #     (e.g. products/continuum/chart/blueprint.yaml)
-      #
-      # Some charts have no blueprint.yaml at all (e.g. products/catalyst —
-      # the chart-only umbrella for bp-catalyst-platform). This is fine —
-      # the lockstep is a graceful no-op when no blueprint.yaml exists.
-      #
-      # The `spec.version` line is at exactly 2-space indent in every
-      # blueprint.yaml (audited 2026-05-18 across all 71 files in
-      # platform/ + products/). We key on `^  version:` with a defensive
-      # parse + post-write verify.
-      - name: "Lockstep-bump blueprint.yaml spec.version for ${{ steps.chart.outputs.name }}"
-        if: steps.chart.outputs.skip != 'true'
-        id: bump_blueprint
-        env:
-          CHART_NAME: ${{ steps.chart.outputs.name }}
-          CHART_VERSION: ${{ steps.chart.outputs.version }}
-          CHART_PATH: ${{ matrix.path }}
-        run: |
-          set -euo pipefail
-
-          # Try the two canonical blueprint.yaml locations in order.
-          # Each Blueprint folder uses exactly one shape; never both.
-          bp_file=""
-          for candidate in "${CHART_PATH}/blueprint.yaml" "${CHART_PATH}/chart/blueprint.yaml"; do
-            if [ -f "$candidate" ]; then
-              bp_file="$candidate"
-              break
-            fi
-          done
-
-          if [ -z "$bp_file" ]; then
-            echo "INFO: no blueprint.yaml at ${CHART_PATH}/blueprint.yaml or ${CHART_PATH}/chart/blueprint.yaml — graceful no-op (chart has no Blueprint manifest, e.g. the products/catalyst umbrella)."
-            echo "bumped=false" >> "$GITHUB_OUTPUT"
-            exit 0
-          fi
-
-          # Sanity: this file must actually be a Blueprint manifest (kind:
-          # Blueprint), not some other YAML co-located in the folder.
-          # Without this guard, a stray non-Blueprint blueprint.yaml (the
-          # CRD definition file we saw at products/catalyst/chart/crds/
-          # is an example of a generic name-collision) would be rewritten
-          # incorrectly.
-          bp_kind=$(awk '/^kind:/{print $2; exit}' "$bp_file" | tr -d '"')
-          if [ "$bp_kind" != "Blueprint" ]; then
-            echo "INFO: ${bp_file} kind='${bp_kind}' — not a Blueprint manifest, graceful no-op."
-            echo "bumped=false" >> "$GITHUB_OUTPUT"
-            exit 0
-          fi
-
-          # Read current spec.version. The blueprint.yaml convention is:
-          #
-          #   spec:
-          #     version: <semver>     ← 2 spaces, single `version:` line
-          #
-          # The 2-space indent is anchored start-of-line so any deeper-
-          # indented `version:` (inside spec.upgrades.from[], etc.) cannot
-          # accidentally match.
-          current=$(awk '/^  version:/{print $2; exit}' "$bp_file" | tr -d '"')
-          if [ -z "$current" ]; then
-            echo "::error title=Unparseable blueprint.yaml spec.version::No '  version:' line at 2-space indent in $bp_file. The Blueprint manifest shape changed under TBD-A20's lockstep hook; refusing to write."
-            exit 1
-          fi
-
-          if [ "$current" = "$CHART_VERSION" ]; then
-            echo "INFO: ${bp_file} already at spec.version=${CHART_VERSION} — no-op."
-            echo "bumped=false" >> "$GITHUB_OUTPUT"
-            # Still emit the path so downstream steps know about it.
-            echo "bp_file=${bp_file}" >> "$GITHUB_OUTPUT"
-            exit 0
-          fi
-
-          echo "Lockstep-bumping ${bp_file}: spec.version ${current} → ${CHART_VERSION}"
-          sed -i -E "s|^  version: .*\$|  version: ${CHART_VERSION}|" "$bp_file"
-
-          # Verify the sed actually flipped the line.
-          new=$(awk '/^  version:/{print $2; exit}' "$bp_file" | tr -d '"')
-          if [ "$new" != "$CHART_VERSION" ]; then
-            echo "::error title=blueprint.yaml sed failed::After rewrite, ${bp_file} still has spec.version='${new}', expected '${CHART_VERSION}'."
-            exit 1
-          fi
-
-          echo "bumped=true" >> "$GITHUB_OUTPUT"
-          echo "bp_file=${bp_file}" >> "$GITHUB_OUTPUT"
-          echo "prev_version=${current}" >> "$GITHUB_OUTPUT"
-
-      - name: "Commit + push bootstrap-kit pin bump + blueprint.yaml lockstep"
-        # Run if either side has staged changes — usually they bump in
-        # tandem, but a kit-only chart (no blueprint.yaml) or a non-kit
-        # leaf (no pin file) might bump only one. Either is sufficient
-        # to commit.
-        if: steps.chart.outputs.skip != 'true' && (steps.bump_pin.outputs.bumped == 'true' || steps.bump_blueprint.outputs.bumped == 'true')
-        env:
-          CHART_NAME: ${{ steps.chart.outputs.name }}
-          CHART_VERSION: ${{ steps.chart.outputs.version }}
-          PIN_FILE: ${{ steps.bump_pin.outputs.pin_file }}
-          PREV_VERSION: ${{ steps.bump_pin.outputs.prev_version }}
-          BP_FILE: ${{ steps.bump_blueprint.outputs.bp_file }}
-          BP_PREV_VERSION: ${{ steps.bump_blueprint.outputs.prev_version }}
-          PIN_BUMPED: ${{ steps.bump_pin.outputs.bumped }}
-          BP_BUMPED: ${{ steps.bump_blueprint.outputs.bumped }}
-        run: |
-          set -euo pipefail
-          git config user.name "hatiyildiz"
-          git config user.email "hatiyildiz@users.noreply.github.com"
-
-          # Idempotent reset-and-rewrite on push conflict — parallel
-          # matrix jobs (multiple Blueprints bumped in one push) can race
-          # on the same branch. On conflict we re-fetch, re-sed against
-          # whatever state is currently on origin/main, and re-commit.
-          # The operation is convergent: every retry produces the same
-          # final state (pin=CHART_VERSION, blueprint=CHART_VERSION).
-          rewrite_pin() {
-            local pf="$1"
-            local cur
-            cur=$(awk '/^      version:/{print $2; exit}' "$pf" | tr -d '"')
-            if [ "$cur" = "${CHART_VERSION}" ]; then
-              return 1  # already there
-            fi
-            sed -i -E "s|^      version: .*\$|      version: ${CHART_VERSION}|" "$pf"
-            return 0
-          }
-          rewrite_blueprint() {
-            local bp="$1"
-            local cur
-            cur=$(awk '/^  version:/{print $2; exit}' "$bp" | tr -d '"')
-            if [ "$cur" = "${CHART_VERSION}" ]; then
-              return 1
-            fi
-            sed -i -E "s|^  version: .*\$|  version: ${CHART_VERSION}|" "$bp"
-            return 0
-          }
-
-          # Stage whichever files were rewritten.
-          if [ "${PIN_BUMPED}" = "true" ] && [ -n "${PIN_FILE}" ]; then
-            git add "${PIN_FILE}"
-          fi
-          if [ "${BP_BUMPED}" = "true" ] && [ -n "${BP_FILE}" ]; then
-            git add "${BP_FILE}"
-          fi
-
-          if git diff --staged --quiet; then
-            echo "no staged changes — already in sync"
-            exit 0
-          fi
-
-          # Compose commit message. Pin bump remains the primary subject
-          # (preserves the existing `deploy(<chart>): bump bootstrap-kit
-          # pin X -> Y (auto, Refs TBD-A6)` shape used by every existing
-          # automation). The blueprint.yaml lockstep is mentioned as a
-          # secondary line so consumers parsing recent log subjects
-          # don't see a format change. When ONLY blueprint.yaml bumps
-          # (chart not in the kit), the subject acknowledges TBD-A20.
-          if [ "${PIN_BUMPED}" = "true" ] && [ "${BP_BUMPED}" = "true" ]; then
-            msg="deploy(${CHART_NAME}): bump bootstrap-kit pin ${PREV_VERSION} -> ${CHART_VERSION} (auto, Refs TBD-A6)
-
-Also locksteps platform blueprint.yaml spec.version ${BP_PREV_VERSION} -> ${CHART_VERSION} (Refs TBD-A20, #1856)."
-          elif [ "${PIN_BUMPED}" = "true" ]; then
-            msg="deploy(${CHART_NAME}): bump bootstrap-kit pin ${PREV_VERSION} -> ${CHART_VERSION} (auto, Refs TBD-A6)"
-          else
-            # Only blueprint.yaml moved — chart is not in the bootstrap kit
-            # (e.g. an opt-in Application Blueprint like bp-velero, bp-vllm).
-            msg="deploy(${CHART_NAME}): lockstep blueprint.yaml spec.version ${BP_PREV_VERSION} -> ${CHART_VERSION} (auto, Refs TBD-A20, #1856)"
-          fi
-          git commit -m "${msg}"
-
-          for i in 1 2 3; do
-            if git push origin HEAD:main; then
-              echo "Pushed lockstep bump for ${CHART_NAME}=${CHART_VERSION} (pin=${PIN_BUMPED}, blueprint=${BP_BUMPED})"
-              exit 0
-            fi
-            echo "push attempt $i failed — re-fetching origin/main and re-applying lockstep"
-            git fetch origin main
-            git reset --hard origin/main
-
-            did_pin=0
-            did_bp=0
-            if [ "${PIN_BUMPED}" = "true" ] && [ -n "${PIN_FILE}" ]; then
-              if rewrite_pin "${PIN_FILE}"; then
-                git add "${PIN_FILE}"
-                did_pin=1
-              fi
-            fi
-            if [ "${BP_BUMPED}" = "true" ] && [ -n "${BP_FILE}" ]; then
-              if rewrite_blueprint "${BP_FILE}"; then
-                git add "${BP_FILE}"
-                did_bp=1
-              fi
-            fi
-            if [ "$did_pin" -eq 0 ] && [ "$did_bp" -eq 0 ]; then
-              echo "origin/main already at ${CHART_VERSION} — nothing to push"
-              exit 0
-            fi
-            if git diff --staged --quiet; then
-              echo "no changes after re-fetch — already at ${CHART_VERSION} on origin/main"
-              exit 0
-            fi
-            if [ "$did_pin" -eq 1 ] && [ "$did_bp" -eq 1 ]; then
-              git commit -m "deploy(${CHART_NAME}): bump bootstrap-kit pin -> ${CHART_VERSION} + blueprint.yaml lockstep (auto, Refs TBD-A6 + TBD-A20, retry $i)"
-            elif [ "$did_pin" -eq 1 ]; then
-              git commit -m "deploy(${CHART_NAME}): bump bootstrap-kit pin -> ${CHART_VERSION} (auto, Refs TBD-A6, retry $i)"
-            else
-              git commit -m "deploy(${CHART_NAME}): lockstep blueprint.yaml spec.version -> ${CHART_VERSION} (auto, Refs TBD-A20, retry $i)"
-            fi
-          done
-          echo "::error title=Lockstep push failed::3 attempts exhausted for ${CHART_NAME}=${CHART_VERSION}."
-          exit 1
-
      - name: Summary
        if: steps.chart.outputs.skip != 'true'
        run: |
@ -875,15 +509,3 @@ Also locksteps platform blueprint.yaml spec.version ${BP_PREV_VERSION} -> ${CHAR
          echo "- **Cosigned:** ✓ (keyless via GitHub OIDC)" >> "$GITHUB_STEP_SUMMARY"
          echo "- **SBOM attested:** ✓ (SPDX-JSON)" >> "$GITHUB_STEP_SUMMARY"
          echo "- **Subchart guards:** ✓ working tree, ✓ packaged tgz, ✓ pulled OCI artifact, ✓ helm template smoke" >> "$GITHUB_STEP_SUMMARY"
-          if [ "${{ steps.bump_pin.outputs.bumped }}" = "true" ]; then
-            echo "- **Bootstrap-kit pin:** ✓ auto-bumped \`${{ steps.bump_pin.outputs.pin_file }}\` ${{ steps.bump_pin.outputs.prev_version }} → ${{ steps.chart.outputs.version }} (TBD-A6 meta-fix)" >> "$GITHUB_STEP_SUMMARY"
-          else
-            echo "- **Bootstrap-kit pin:** (chart is not in the kit — opt-in Application Blueprint, no pin to bump)" >> "$GITHUB_STEP_SUMMARY"
-          fi
-          if [ "${{ steps.bump_blueprint.outputs.bumped }}" = "true" ]; then
-            echo "- **Blueprint.yaml lockstep:** ✓ auto-bumped \`${{ steps.bump_blueprint.outputs.bp_file }}\` spec.version ${{ steps.bump_blueprint.outputs.prev_version }} → ${{ steps.chart.outputs.version }} (TBD-A20, #1856)" >> "$GITHUB_STEP_SUMMARY"
-          elif [ -n "${{ steps.bump_blueprint.outputs.bp_file }}" ]; then
-            echo "- **Blueprint.yaml lockstep:** (already at ${{ steps.chart.outputs.version }}, no-op)" >> "$GITHUB_STEP_SUMMARY"
-          else
-            echo "- **Blueprint.yaml lockstep:** (chart has no platform/<bp>/blueprint.yaml — e.g. products/catalyst umbrella)" >> "$GITHUB_STEP_SUMMARY"
-          fi
--- a/.github/workflows/build-bp-newapi.yaml
+++ b/.github/workflows/build-bp-newapi.yaml
@ -1,197 +0,0 @@
-name: Build bp-newapi
-
-# bp-newapi — Catalyst Blueprint wrapping the upstream NewAPI multi-tenant
-# LLM gateway (github.com/Calcium-Ion/new-api, MIT). Per
-# platform/newapi/chart/Chart.yaml the upstream ships a docker-compose
-# image only at `docker.io/calciumion/new-api:<UPSTREAM_VER>`. Per
-# docs/INVIOLABLE-PRINCIPLES.md #4a we never let production Sovereigns
-# pull from Docker Hub at runtime — every image must live in
-# ghcr.io/openova-io/* under a registry we own (no Docker Hub rate
-# limits, no upstream availability risk).
-#
-# This workflow mirrors the bp-guacamole pattern
-# (.github/workflows/build-bp-guacamole.yaml):
-#   1. Pulls `docker.io/calciumion/new-api:<UPSTREAM_VER>` from Docker Hub.
-#   2. Captures the upstream repo digest (sha256:...) so the GHCR tag
-#      points at exactly the bytes we tested against, even if upstream
-#      re-cuts the tag in the future.
-#   3. Re-tags + pushes into GHCR under
-#      `ghcr.io/openova-io/openova/newapi-mirror:<UPSTREAM_VER>` so each
-#      Sovereign pulls from a registry we own.
-#   4. Bumps platform/newapi/chart/values.yaml `newapi.image.tag` to the
-#      mirrored tag.
-#   5. Bumps platform/newapi/chart/Chart.yaml `version` patch level + sets
-#      `appVersion` to the upstream version + dispatches
-#      blueprint-release.yaml so a fresh bp-newapi:<semver> OCI artifact
-#      lands.
-#
-# Closes the gap surfaced by qa-loop bounded-cycle audit (prov #7, Gap F):
-# the chart referenced `ghcr.io/openova-io/openova/newapi-mirror:v0.4.5`
-# but no CI workflow ever built that image — the GHCR package didn't
-# exist and the Pod ImagePullBackOff'd on every fresh Sovereign, blocking
-# alice signup gate 5 (LLM).
-
-on:
-  push:
-    paths:
-      - 'platform/newapi/chart/**'
-      - 'platform/newapi/blueprint.yaml'
-      - '.github/workflows/build-bp-newapi.yaml'
-    branches: [main]
-  workflow_dispatch:
-    inputs:
-      upstream_version:
-        description: 'Calcium-Ion/new-api upstream version (e.g. v0.13.2).'
-        required: false
-        default: 'v0.13.2'
-
-env:
-  REGISTRY: ghcr.io
-  NEWAPI_IMAGE: ghcr.io/openova-io/openova/newapi-mirror
-  CHART_VALUES: platform/newapi/chart/values.yaml
-  CHART_YAML: platform/newapi/chart/Chart.yaml
-  # v0.13.2 is the latest stable (non-rc) Calcium-Ion/new-api release on
-  # Docker Hub at the time this workflow was authored (2026-04-27 upstream
-  # publish date). Bump in both this default AND in the chart Chart.yaml
-  # `appVersion` when rolling forward. The v1.0.0-rc.x line is gated on
-  # upstream stabilising the schema migration; do NOT auto-roll past
-  # v0.13.x without re-running the channel-seed integration smoke against
-  # NewAPI's `/api/channel/` admin shape (the seed Job uses the legacy
-  # request body shape).
-  DEFAULT_UPSTREAM_VERSION: 'v0.13.2'
-
-jobs:
-  mirror:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: write
-      packages: write
-      actions: write
-    outputs:
-      upstream_version: ${{ steps.vars.outputs.upstream_version }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          token: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Resolve upstream version
-        id: vars
-        run: |
-          set -euo pipefail
-          ver="${{ inputs.upstream_version }}"
-          ver="${ver:-${DEFAULT_UPSTREAM_VERSION}}"
-          echo "upstream_version=${ver}" >> "$GITHUB_OUTPUT"
-          echo "Mirroring Calcium-Ion/new-api ${ver} to GHCR"
-
-      - name: Login to GHCR
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Mirror calciumion/new-api → ghcr.io/openova-io/openova/newapi-mirror
-        env:
-          UPSTREAM_VER: ${{ steps.vars.outputs.upstream_version }}
-        run: |
-          set -euo pipefail
-          src="docker.io/calciumion/new-api:${UPSTREAM_VER}"
-          dst="${NEWAPI_IMAGE}:${UPSTREAM_VER}"
-          docker pull "${src}"
-          # Capture the upstream repo digest so the GHCR tag points at
-          # exactly the bytes we tested against, even if upstream ever
-          # re-cuts the tag. Stored in GITHUB_ENV for the step summary.
-          UPSTREAM_DIGEST=$(docker inspect --format='{{index .RepoDigests 0}}' "${src}")
-          echo "Upstream digest: ${UPSTREAM_DIGEST}"
-          docker tag "${src}" "${dst}"
-          docker tag "${src}" "${NEWAPI_IMAGE}:latest"
-          docker push "${dst}"
-          docker push "${NEWAPI_IMAGE}:latest"
-          echo "NEWAPI_UPSTREAM_DIGEST=${UPSTREAM_DIGEST}" >> "$GITHUB_ENV"
-
-      - name: Install yq
-        run: |
-          sudo wget -qO /usr/local/bin/yq \
-            https://github.com/mikefarah/yq/releases/download/v4.44.3/yq_linux_amd64
-          sudo chmod +x /usr/local/bin/yq
-
-      - name: Bump image tag in values.yaml
-        env:
-          UPSTREAM_VER: ${{ steps.vars.outputs.upstream_version }}
-        run: |
-          set -euo pipefail
-          # Set repository to GHCR mirror AND tag to the upstream version
-          # we just mirrored. Repository write is idempotent (no-op once
-          # values.yaml is already GHCR-pinned).
-          yq eval -i ".newapi.image.repository = \"${NEWAPI_IMAGE}\"" "${CHART_VALUES}"
-          yq eval -i ".newapi.image.tag = \"${UPSTREAM_VER}\"" "${CHART_VALUES}"
-          echo "values.yaml after update:"
-          yq eval '.newapi.image' "${CHART_VALUES}"
-
-      - name: Bump Chart.yaml patch version + appVersion
-        env:
-          UPSTREAM_VER: ${{ steps.vars.outputs.upstream_version }}
-        run: |
-          set -euo pipefail
-          current=$(yq eval '.version' "${CHART_YAML}")
-          IFS='.' read -r major minor patch <<<"${current}"
-          next="${major}.${minor}.$((patch + 1))"
-          yq eval -i ".version = \"${next}\"" "${CHART_YAML}"
-          # appVersion mirrors the upstream tag we just mirrored (strip
-          # leading v: Helm convention is appVersion = upstream version
-          # without the v prefix). Operators reading `helm list` see the
-          # actual NewAPI release running in their Sovereign.
-          app_ver="${UPSTREAM_VER#v}"
-          yq eval -i ".appVersion = \"${app_ver}\"" "${CHART_YAML}"
-          echo "Chart.yaml: version ${current} -> ${next}, appVersion -> ${app_ver}"
-          echo "CHART_NEW_VERSION=${next}" >> "$GITHUB_ENV"
-
-      - name: Commit and push chart bump
-        id: deploy_commit
-        env:
-          UPSTREAM_VER: ${{ steps.vars.outputs.upstream_version }}
-        run: |
-          set -euo pipefail
-          git config user.name "github-actions[bot]"
-          git config user.email "github-actions[bot]@users.noreply.github.com"
-          git add "${CHART_VALUES}" "${CHART_YAML}"
-          if git diff --staged --quiet; then
-            echo "No changes to commit"
-            echo "pushed=false" >> "$GITHUB_OUTPUT"
-            exit 0
-          fi
-          git commit -m "deploy: bump bp-newapi upstream ${UPSTREAM_VER} chart ${CHART_NEW_VERSION}"
-          for i in 1 2 3; do
-            git push && break
-            git pull --rebase
-          done
-          echo "pushed=true" >> "$GITHUB_OUTPUT"
-
-      - name: Trigger blueprint-release for the chart bump
-        if: steps.deploy_commit.outputs.pushed == 'true'
-        env:
-          GH_TOKEN: ${{ github.token }}
-        run: |
-          gh workflow run blueprint-release.yaml \
-            --repo "${{ github.repository }}" \
-            --ref main \
-            -f blueprint=newapi \
-            -f tree=platform
-          echo "blueprint-release dispatched for platform/newapi @ main"
-
-      - name: Summary
-        env:
-          UPSTREAM_VER: ${{ steps.vars.outputs.upstream_version }}
-        run: |
-          {
-            echo "## bp-newapi mirror complete"
-            echo ""
-            echo "- Upstream: \`docker.io/calciumion/new-api:${UPSTREAM_VER}\`"
-            echo "- Mirrored: \`${NEWAPI_IMAGE}:${UPSTREAM_VER}\`"
-            echo "- Upstream digest: \`${NEWAPI_UPSTREAM_DIGEST:-unknown}\`"
-            echo "- Chart bumped to: \`${CHART_NEW_VERSION:-unchanged}\`"
-          } >> "$GITHUB_STEP_SUMMARY"
--- a/.github/workflows/build-openova-flow-adapter-flux.yaml
+++ b/.github/workflows/build-openova-flow-adapter-flux.yaml
@ -1,175 +0,0 @@
-name: Build openova-flow-adapter-flux
-
-# openova-flow-adapter-flux — DaemonSet sidecar that watches Flux
-# HelmRelease CRs and POSTs FlowMessage envelopes to openova-flow-server.
-# Source at products/openova-flow/adapter-flux/, chart at
-# platform/openova-flow-emitter/chart/.
-#
-# Per docs/INVIOLABLE-PRINCIPLES.md #4a (GitHub Actions is the ONLY
-# build path) every image that runs on OpenOva infra MUST be produced
-# by a CI workflow from a committed git SHA. This workflow mirrors the
-# shape of build-application-controller.yaml — same Buildx push, same
-# cosign keyless signing, same auto-bump of values.yaml + dispatch of
-# blueprint-release for chart re-publish.
-#
-# Per `feedback_inviolable_principles.md` / global CLAUDE.md "every
-# workflow MUST be event-driven, NEVER scheduled". Triggers on
-# push-to-main (paths filter), pull_request (test only, no push), and
-# workflow_dispatch for manual re-runs without a code change.
-
-on:
-  push:
-    paths:
-      - 'products/openova-flow/adapter-flux/**'
-      - 'platform/openova-flow-emitter/chart/**'
-      - '.github/workflows/build-openova-flow-adapter-flux.yaml'
-    branches: [main]
-  pull_request:
-    paths:
-      - 'products/openova-flow/adapter-flux/**'
-      - 'platform/openova-flow-emitter/chart/**'
-      - '.github/workflows/build-openova-flow-adapter-flux.yaml'
-  workflow_dispatch:
-
-env:
-  REGISTRY: ghcr.io
-  IMAGE: ghcr.io/openova-io/openova/openova-flow-adapter-flux
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: write
-      packages: write
-      id-token: write
-      actions: write
-    outputs:
-      sha_short: ${{ steps.vars.outputs.sha_short }}
-      digest: ${{ steps.build.outputs.digest }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Set short SHA
-        id: vars
-        run: echo "sha_short=$(echo $GITHUB_SHA | head -c 7)" >> "$GITHUB_OUTPUT"
-
-      - name: Set up Go
-        uses: actions/setup-go@v5
-        with:
-          go-version: '1.22'
-          cache-dependency-path: |
-            products/openova-flow/adapter-flux/go.sum
-
-      - name: go vet
-        working-directory: products/openova-flow/adapter-flux
-        run: go vet ./...
-
-      - name: Run unit tests
-        working-directory: products/openova-flow/adapter-flux
-        run: go test -count=1 -race ./...
-
-      # On pull_request runs we stop here — image push requires
-      # `packages: write` which only main-branch authors hold.
-      - name: Login to GHCR
-        if: github.event_name != 'pull_request'
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Set up Docker Buildx
-        if: github.event_name != 'pull_request'
-        uses: docker/setup-buildx-action@v3
-
-      - name: Build and push image
-        id: build
-        if: github.event_name != 'pull_request'
-        uses: docker/build-push-action@v6
-        with:
-          context: products/openova-flow/adapter-flux
-          file: products/openova-flow/adapter-flux/Dockerfile
-          push: true
-          tags: |
-            ${{ env.IMAGE }}:${{ steps.vars.outputs.sha_short }}
-            ${{ env.IMAGE }}:latest
-          labels: |
-            org.opencontainers.image.source=https://github.com/openova-io/openova
-            org.opencontainers.image.revision=${{ github.sha }}
-            org.opencontainers.image.title=openova-flow-adapter-flux
-            org.opencontainers.image.description=OpenovaFlow Flux adapter — HelmRelease informer to FlowMessage emitter
-          provenance: false
-          sbom: false
-
-      - name: Install cosign
-        if: github.event_name != 'pull_request'
-        uses: sigstore/cosign-installer@v3
-
-      - name: Sign image with cosign (keyless)
-        if: github.event_name != 'pull_request'
-        env:
-          DIGEST: ${{ steps.build.outputs.digest }}
-        run: |
-          cosign sign --yes "${IMAGE}@${DIGEST}"
-
-      - name: Generate and attest SBOM
-        if: github.event_name != 'pull_request'
-        env:
-          DIGEST: ${{ steps.build.outputs.digest }}
-        run: |
-          cosign attest --yes \
-            --predicate <(echo '{"sbom":"in-toto-spdx attached at build time"}') \
-            --type spdx \
-            "${IMAGE}@${DIGEST}"
-
-      # Auto-bump the chart values.yaml tag. The adapter-flux image is
-      # consumed by the bp-openova-flow-emitter chart (chart name is
-      # "emitter", binary name is "adapter-flux" — chart wraps the
-      # adapter as a DaemonSet emitter per ADR contract).
-      - name: Bump flowEmitter.image.tag in chart values.yaml
-        if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
-        env:
-          SHA_SHORT: ${{ steps.vars.outputs.sha_short }}
-        run: |
-          VALUES="platform/openova-flow-emitter/chart/values.yaml"
-          awk -v sha="${SHA_SHORT}" '
-            /^flowEmitter:/ { in_fe=1; print; next }
-            in_fe && /^[a-zA-Z]/ && !/^flowEmitter:/ { in_fe=0 }
-            in_fe && /^  image:/ { in_img=1; print; next }
-            in_fe && /^  [a-zA-Z]/ && !/^  image:/ { in_img=0 }
-            in_img && /^    tag:/ { sub(/:.*/, ": \"" sha "\""); in_img=0 }
-            { print }
-          ' "${VALUES}" > "${VALUES}.tmp" && mv "${VALUES}.tmp" "${VALUES}"
-          echo "values.yaml after bump:"
-          grep -A1 "^  image:" "${VALUES}" | head -6
-
-      - name: Commit and push values.yaml bump
-        id: deploy_commit
-        if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
-        env:
-          SHA_SHORT: ${{ steps.vars.outputs.sha_short }}
-        run: |
-          git config user.name "github-actions[bot]"
-          git config user.email "github-actions[bot]@users.noreply.github.com"
-          if git diff --quiet platform/openova-flow-emitter/chart/values.yaml; then
-            echo "no values.yaml change — already pinned to ${SHA_SHORT}"
-            echo "pushed=false" >> "$GITHUB_OUTPUT"
-            exit 0
-          fi
-          git add platform/openova-flow-emitter/chart/values.yaml
-          git commit -m "chore(deploy): bump openova-flow-adapter-flux image to ${SHA_SHORT} [skip ci]"
-          git pull --rebase --autostash origin main || true
-          git push origin HEAD:main
-          echo "pushed=true" >> "$GITHUB_OUTPUT"
-
-      - name: Dispatch blueprint-release for chart re-publish
-        if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main' && steps.deploy_commit.outputs.pushed == 'true'
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          gh workflow run blueprint-release.yaml \
-            --repo "${GITHUB_REPOSITORY}" \
-            --ref main \
-            -f blueprint=openova-flow-emitter \
-            -f tree=platform
--- a/.github/workflows/build-openova-flow-server.yaml
+++ b/.github/workflows/build-openova-flow-server.yaml
@ -1,210 +0,0 @@
-name: Build openova-flow-server
-
-# openova-flow-server — stateless HTTP+SSE event router that drives the
-# OpenovaFlow timeline view in the Catalyst console. Source at
-# products/openova-flow/server/, chart at platform/openova-flow-server/chart/.
-#
-# Per docs/INVIOLABLE-PRINCIPLES.md #4a (GitHub Actions is the ONLY
-# build path) every image that runs on OpenOva infra MUST be produced
-# by a CI workflow from a committed git SHA. This workflow mirrors the
-# shape of build-application-controller.yaml — same Buildx push, same
-# cosign keyless signing, same auto-bump of values.yaml + dispatch of
-# blueprint-release for chart re-publish.
-#
-# Per `feedback_inviolable_principles.md` / global CLAUDE.md "every
-# workflow MUST be event-driven, NEVER scheduled". Triggers on
-# push-to-main (paths filter), pull_request (test only, no push), and
-# workflow_dispatch for manual re-runs without a code change.
-
-on:
-  push:
-    paths:
-      - 'products/openova-flow/server/**'
-      - 'platform/openova-flow-server/chart/**'
-      - '.github/workflows/build-openova-flow-server.yaml'
-    branches: [main]
-  pull_request:
-    paths:
-      - 'products/openova-flow/server/**'
-      - 'platform/openova-flow-server/chart/**'
-      - '.github/workflows/build-openova-flow-server.yaml'
-  workflow_dispatch:
-
-env:
-  REGISTRY: ghcr.io
-  IMAGE: ghcr.io/openova-io/openova/openova-flow-server
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    permissions:
-      # contents: write — the deploy step below pushes a values.yaml SHA
-      # bump back to main so the bp-openova-flow-server chart picks up
-      # the newly-built image without an operator manually editing the
-      # file (per `feedback_no_mvp_no_workarounds.md` rule 1: target-state,
-      # never "manual follow-up bump").
-      contents: write
-      packages: write
-      # id-token write is required by cosign keyless signing (Sigstore).
-      id-token: write
-      # actions: write — required for `gh workflow run` to dispatch
-      # blueprint-release after the deploy commit lands. Without it
-      # the GITHUB_TOKEN gets HTTP 403 "Resource not accessible by
-      # integration" and bp-openova-flow-server OCI artifact stays
-      # stuck on the previous deploy's SHA (#712 / catalyst-build.yaml
-      # incident replicated here for parity).
-      actions: write
-    outputs:
-      sha_short: ${{ steps.vars.outputs.sha_short }}
-      digest: ${{ steps.build.outputs.digest }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Set short SHA
-        id: vars
-        run: echo "sha_short=$(echo $GITHUB_SHA | head -c 7)" >> "$GITHUB_OUTPUT"
-
-      - name: Set up Go
-        uses: actions/setup-go@v5
-        with:
-          go-version: '1.22'
-          # server has no external deps (stdlib only) so no go.sum is
-          # present in the tree — skip cache-dependency-path entirely.
-          cache: false
-
-      - name: go vet
-        working-directory: products/openova-flow/server
-        run: go vet ./...
-
-      - name: Run unit tests
-        working-directory: products/openova-flow/server
-        run: go test -count=1 -race ./...
-
-      # On pull_request runs we stop here — image push requires
-      # `packages: write` which only main-branch authors hold.
-      - name: Login to GHCR
-        if: github.event_name != 'pull_request'
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Set up Docker Buildx
-        if: github.event_name != 'pull_request'
-        uses: docker/setup-buildx-action@v3
-
-      - name: Build and push image
-        id: build
-        if: github.event_name != 'pull_request'
-        uses: docker/build-push-action@v6
-        with:
-          # Build context is the server module dir — its Dockerfile's
-          # `COPY go.mod ./` / `COPY cmd ./cmd` paths are relative to
-          # that dir, not the repo root.
-          context: products/openova-flow/server
-          file: products/openova-flow/server/Dockerfile
-          push: true
-          tags: |
-            ${{ env.IMAGE }}:${{ steps.vars.outputs.sha_short }}
-            ${{ env.IMAGE }}:latest
-          labels: |
-            org.opencontainers.image.source=https://github.com/openova-io/openova
-            org.opencontainers.image.revision=${{ github.sha }}
-            org.opencontainers.image.title=openova-flow-server
-            org.opencontainers.image.description=OpenovaFlow event router — HTTP ingest + SSE replay
-          # provenance=false: containerd 1.7.x on k3s mis-resolves the
-          # provenance attestation manifest. SBOM attestation handled by
-          # the cosign attest step below.
-          provenance: false
-          sbom: false
-
-      - name: Install cosign
-        if: github.event_name != 'pull_request'
-        uses: sigstore/cosign-installer@v3
-
-      - name: Sign image with cosign (keyless)
-        if: github.event_name != 'pull_request'
-        env:
-          DIGEST: ${{ steps.build.outputs.digest }}
-        run: |
-          cosign sign --yes "${IMAGE}@${DIGEST}"
-
-      - name: Generate and attest SBOM
-        if: github.event_name != 'pull_request'
-        env:
-          DIGEST: ${{ steps.build.outputs.digest }}
-        run: |
-          cosign attest --yes \
-            --predicate <(echo '{"sbom":"in-toto-spdx attached at build time"}') \
-            --type spdx \
-            "${IMAGE}@${DIGEST}"
-
-      # Auto-bump the chart values.yaml tag so the next Sovereign chart
-      # rollout picks up this image without a manual edit. Per
-      # `feedback_no_mvp_no_workarounds.md` rule 1 (target-state, no
-      # operator-action gates) and `feedback_inviolable_principles.md`
-      # (event-driven, never cron). Mirrors the awk pattern in
-      # build-application-controller.yaml (under `controllers.application.tag`).
-      - name: Bump flowServer.image.tag in chart values.yaml
-        if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
-        env:
-          SHA_SHORT: ${{ steps.vars.outputs.sha_short }}
-        run: |
-          VALUES="platform/openova-flow-server/chart/values.yaml"
-          # awk: find `flowServer:` (top-level key), then under it find
-          # the nested `  image:` block, then update the next `    tag:`
-          # line. Stops at the next top-level key so we don't bump a
-          # sibling chart's tag.
-          awk -v sha="${SHA_SHORT}" '
-            /^flowServer:/ { in_fs=1; print; next }
-            in_fs && /^[a-zA-Z]/ && !/^flowServer:/ { in_fs=0 }
-            in_fs && /^  image:/ { in_img=1; print; next }
-            in_fs && /^  [a-zA-Z]/ && !/^  image:/ { in_img=0 }
-            in_img && /^    tag:/ { sub(/:.*/, ": \"" sha "\""); in_img=0 }
-            { print }
-          ' "${VALUES}" > "${VALUES}.tmp" && mv "${VALUES}.tmp" "${VALUES}"
-          echo "values.yaml after bump:"
-          grep -A1 "^  image:" "${VALUES}" | head -6
-
-      - name: Commit and push values.yaml bump
-        id: deploy_commit
-        if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
-        env:
-          SHA_SHORT: ${{ steps.vars.outputs.sha_short }}
-        run: |
-          git config user.name "github-actions[bot]"
-          git config user.email "github-actions[bot]@users.noreply.github.com"
-          if git diff --quiet platform/openova-flow-server/chart/values.yaml; then
-            echo "no values.yaml change — already pinned to ${SHA_SHORT}"
-            echo "pushed=false" >> "$GITHUB_OUTPUT"
-            exit 0
-          fi
-          git add platform/openova-flow-server/chart/values.yaml
-          # `[skip ci]` keeps blueprint-release from re-firing twice
-          # (we explicitly dispatch it below — see the next step).
-          git commit -m "chore(deploy): bump openova-flow-server image to ${SHA_SHORT} [skip ci]"
-          # Pull-rebase to avoid races with parallel build commits.
-          git pull --rebase --autostash origin main || true
-          git push origin HEAD:main
-          echo "pushed=true" >> "$GITHUB_OUTPUT"
-
-      # GitHub Actions does NOT trigger workflows from GITHUB_TOKEN bot
-      # pushes by default (anti-recursion safeguard). The bot commit
-      # above changes platform/openova-flow-server/chart/values.yaml
-      # which would normally fire blueprint-release.yaml's path filter
-      # — but bot pushes are silently filtered. Without this dispatch
-      # the rebuilt image is NEVER baked into a new chart version, so
-      # Sovereigns keep installing the previous chart with the previous
-      # image tag (incident replicated from catalyst-build.yaml #712).
-      - name: Dispatch blueprint-release for chart re-publish
-        if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main' && steps.deploy_commit.outputs.pushed == 'true'
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          gh workflow run blueprint-release.yaml \
-            --repo "${GITHUB_REPOSITORY}" \
-            --ref main \
-            -f blueprint=openova-flow-server \
-            -f tree=platform
--- a/.github/workflows/build-sandbox-controller.yaml
+++ b/.github/workflows/build-sandbox-controller.yaml
@ -1,190 +0,0 @@
-name: Build sandbox-controller
-
-# sandbox-controller — Wave 1 of the Sandbox product (PR #1622). Sister
-# of organization-controller / application-controller; watches Sandbox
-# CRs (sandbox.openova.io/v1) and reconciles per-Sandbox namespace +
-# RBAC + PVCs + placeholder tokens into the per-Org `catalyst-tenant`
-# Gitea repo. Per products/sandbox/docs/architecture.md §7.
-#
-# Per docs/INVIOLABLE-PRINCIPLES.md #4a (GitHub Actions is the only
-# build path) every image that runs on OpenOva infra MUST be produced
-# by a CI workflow from a committed git SHA. Shape mirrors
-# build-application-controller.yaml — same Buildx + cosign keyless
-# sign + SBOM attestation + auto-bump of the chart values.yaml so the
-# next Sovereign install picks up the SHA-pinned image without an
-# operator manually editing the file.
-#
-# Per `feedback_inviolable_principles.md`: event-driven only, NO cron.
-# Triggers on push-to-main with paths filter (so unrelated commits
-# don't burn CI minutes), pull_request for reviewers, and
-# workflow_dispatch for manual re-runs.
-
-on:
-  push:
-    paths:
-      - 'core/controllers/sandbox/**'
-      - 'core/controllers/internal/**'
-      - 'core/controllers/pkg/**'
-      - 'core/controllers/go.mod'
-      - 'core/controllers/go.sum'
-      - '.github/workflows/build-sandbox-controller.yaml'
-    branches: [main]
-  pull_request:
-    paths:
-      - 'core/controllers/sandbox/**'
-      - 'core/controllers/internal/**'
-      - 'core/controllers/pkg/**'
-      - 'core/controllers/go.mod'
-      - 'core/controllers/go.sum'
-      - '.github/workflows/build-sandbox-controller.yaml'
-  workflow_dispatch:
-
-env:
-  REGISTRY: ghcr.io
-  IMAGE: ghcr.io/openova-io/openova/sandbox-controller
-  CHART_VALUES: platform/sandbox/chart/values.yaml
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    permissions:
-      # contents: write — the deploy step below pushes a values.yaml SHA
-      # bump back to main so the platform/sandbox chart picks up the
-      # newly-built image without an operator manually editing the file
-      # (per `feedback_no_mvp_no_workarounds.md` rule 1: target-state,
-      # never "manual follow-up bump").
-      contents: write
-      packages: write
-      # id-token write is required by cosign keyless signing (Sigstore).
-      id-token: write
-    outputs:
-      sha_short: ${{ steps.vars.outputs.sha_short }}
-      digest: ${{ steps.build.outputs.digest }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Set short SHA
-        id: vars
-        run: echo "sha_short=$(echo $GITHUB_SHA | head -c 7)" >> "$GITHUB_OUTPUT"
-
-      - name: Set up Go
-        uses: actions/setup-go@v5
-        with:
-          go-version: '1.23'
-          cache-dependency-path: |
-            core/controllers/go.sum
-
-      - name: go vet
-        working-directory: core/controllers
-        # Slice CC1 (#1095) consolidated the Group C controllers into a
-        # single shared go.mod. Vet scoped to this controller's tree
-        # plus the shared internal/ + pkg/ helpers it depends on.
-        run: go vet ./sandbox/... ./internal/... ./pkg/...
-
-      - name: Run unit tests
-        working-directory: core/controllers
-        run: go test -count=1 -race ./sandbox/... ./internal/... ./pkg/...
-
-      # On pull_request runs we stop here — image push requires
-      # `packages: write` which only main-branch authors hold.
-      - name: Login to GHCR
-        if: github.event_name != 'pull_request'
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Set up Docker Buildx
-        if: github.event_name != 'pull_request'
-        uses: docker/setup-buildx-action@v3
-
-      - name: Build and push image
-        id: build
-        if: github.event_name != 'pull_request'
-        uses: docker/build-push-action@v6
-        with:
-          # Build context is the repository root so the Dockerfile's
-          # COPY paths can reach core/controllers/{go.mod,internal,pkg,
-          # sandbox}/.
-          context: .
-          file: core/controllers/sandbox/Dockerfile
-          push: true
-          tags: |
-            ${{ env.IMAGE }}:${{ steps.vars.outputs.sha_short }}
-            ${{ env.IMAGE }}:latest
-          labels: |
-            org.opencontainers.image.source=https://github.com/openova-io/openova
-            org.opencontainers.image.revision=${{ github.sha }}
-            org.opencontainers.image.title=sandbox-controller
-            org.opencontainers.image.description=Reconciles Sandbox.sandbox.openova.io/v1 CRs into per-Org Gitea manifests (Wave 1 of #1615)
-          # provenance=false: containerd 1.7.x on k3s mis-resolves the
-          # provenance attestation manifest. SBOM attestation handled by
-          # the cosign attest step below.
-          provenance: false
-          sbom: false
-
-      - name: Install cosign
-        if: github.event_name != 'pull_request'
-        uses: sigstore/cosign-installer@v3
-
-      - name: Sign image with cosign (keyless)
-        if: github.event_name != 'pull_request'
-        env:
-          DIGEST: ${{ steps.build.outputs.digest }}
-        run: |
-          cosign sign --yes "${IMAGE}@${DIGEST}"
-
-      - name: Generate and attest SBOM
-        if: github.event_name != 'pull_request'
-        env:
-          DIGEST: ${{ steps.build.outputs.digest }}
-        run: |
-          cosign attest --yes \
-            --predicate <(echo '{"sbom":"in-toto-spdx attached at build time"}') \
-            --type spdx \
-            "${IMAGE}@${DIGEST}"
-
-      # Auto-bump the chart values.yaml tag so the next Sovereign chart
-      # rollout picks up this image without a manual edit. Per
-      # `feedback_no_mvp_no_workarounds.md` rule 1 (target-state, no
-      # operator-action gates) and `feedback_inviolable_principles.md`
-      # (event-driven, never cron). Unlike build-k8s-ws-proxy.yaml this
-      # workflow does NOT bump Chart.yaml — the Sandbox chart's
-      # publication cadence is gated by Wave-2 readiness, not per-image
-      # builds. Operators flip `enabled: true` per Sovereign overlay.
-      - name: Install yq
-        if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
-        run: |
-          sudo wget -qO /usr/local/bin/yq \
-            https://github.com/mikefarah/yq/releases/download/v4.44.3/yq_linux_amd64
-          sudo chmod +x /usr/local/bin/yq
-
-      - name: Bump image.tag in values.yaml
-        if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
-        env:
-          SHA_SHORT: ${{ steps.vars.outputs.sha_short }}
-        run: |
-          set -euo pipefail
-          yq eval -i ".image.tag = \"${SHA_SHORT}\"" "${CHART_VALUES}"
-          echo "values.yaml after bump:"
-          yq eval '.image' "${CHART_VALUES}"
-
-      - name: Commit and push values.yaml bump
-        if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
-        env:
-          SHA_SHORT: ${{ steps.vars.outputs.sha_short }}
-        run: |
-          set -euo pipefail
-          git config user.name "github-actions[bot]"
-          git config user.email "github-actions[bot]@users.noreply.github.com"
-          if git diff --quiet "${CHART_VALUES}"; then
-            echo "no values.yaml change — already pinned to ${SHA_SHORT}"
-            exit 0
-          fi
-          git add "${CHART_VALUES}"
-          git commit -m "deploy: bump sandbox-controller image to ${SHA_SHORT}"
-          # Pull-rebase to avoid races with parallel build commits.
-          git pull --rebase --autostash origin main || true
-          git push origin HEAD:main
--- a/.github/workflows/build-sandbox-mcp-server.yaml
+++ b/.github/workflows/build-sandbox-mcp-server.yaml
@ -1,193 +0,0 @@
-name: Build sandbox-mcp-server
-
-# sandbox-mcp-server — Wave 2 of the Sandbox product (PR #1618). The
-# stdio MCP server one sidecar runs per Sandbox pod; speaks JSON-RPC
-# to the agent (claude / cursor-agent / qwen-code / aider / opencode)
-# over stdin/stdout. See products/sandbox/docs/architecture.md §3.
-#
-# Per docs/INVIOLABLE-PRINCIPLES.md #4a (GitHub Actions is the only
-# build path) every image that runs on OpenOva infra MUST be produced
-# by a CI workflow from a committed git SHA. Shape mirrors
-# build-sandbox-controller.yaml — same Buildx + cosign keyless sign +
-# SBOM attestation + auto-bump of the chart values.yaml so the next
-# Sovereign install picks up the SHA-pinned image without an operator
-# manually editing the file.
-#
-# Wave 8 / PR #1658 wired this module's go.mod to depend on
-# core/controllers + core/services/shared via `replace` directives
-# (canonical Gitea client + auth.Claims). The Dockerfile therefore
-# requires the repository ROOT as the build context, mirroring
-# build-sandbox-controller.yaml. Paths-filter widens to the dep trees
-# so a change to those sources re-triggers the build.
-#
-# Per `feedback_inviolable_principles.md`: event-driven only, NO cron.
-
-on:
-  push:
-    paths:
-      - 'products/sandbox/mcp-server/**'
-      - 'core/controllers/**'
-      - 'core/services/shared/**'
-      - '.github/workflows/build-sandbox-mcp-server.yaml'
-    branches: [main]
-  pull_request:
-    paths:
-      - 'products/sandbox/mcp-server/**'
-      - 'core/controllers/**'
-      - 'core/services/shared/**'
-      - '.github/workflows/build-sandbox-mcp-server.yaml'
-  workflow_dispatch:
-
-env:
-  REGISTRY: ghcr.io
-  IMAGE: ghcr.io/openova-io/openova/sandbox-mcp-server
-  CHART_VALUES: platform/sandbox/chart/values.yaml
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    permissions:
-      # contents: write — the deploy step below pushes a values.yaml SHA
-      # bump back to main so the platform/sandbox chart picks up the
-      # newly-built image without an operator manually editing the file
-      # (per `feedback_no_mvp_no_workarounds.md` rule 1: target-state,
-      # never "manual follow-up bump").
-      contents: write
-      packages: write
-      # id-token write is required by cosign keyless signing (Sigstore).
-      id-token: write
-    outputs:
-      sha_short: ${{ steps.vars.outputs.sha_short }}
-      digest: ${{ steps.build.outputs.digest }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Set short SHA
-        id: vars
-        run: echo "sha_short=$(echo $GITHUB_SHA | head -c 7)" >> "$GITHUB_OUTPUT"
-
-      - name: Set up Go
-        uses: actions/setup-go@v5
-        with:
-          go-version: '1.23'
-          cache-dependency-path: |
-            products/sandbox/mcp-server/go.sum
-            core/controllers/go.sum
-            core/services/shared/go.sum
-
-      - name: go vet
-        working-directory: products/sandbox/mcp-server
-        run: go vet ./...
-
-      - name: Run unit tests
-        working-directory: products/sandbox/mcp-server
-        run: go test -count=1 -race ./...
-
-      # On pull_request runs we stop here — image push requires
-      # `packages: write` which only main-branch authors hold.
-      - name: Login to GHCR
-        if: github.event_name != 'pull_request'
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Set up Docker Buildx
-        if: github.event_name != 'pull_request'
-        uses: docker/setup-buildx-action@v3
-
-      - name: Build and push image
-        id: build
-        if: github.event_name != 'pull_request'
-        uses: docker/build-push-action@v6
-        with:
-          # Build context is the repository root so the Dockerfile's
-          # COPY paths can reach core/controllers + core/services/shared
-          # (PR #1658 `replace` targets) alongside products/sandbox/
-          # mcp-server. Mirrors build-sandbox-controller.yaml.
-          context: .
-          file: products/sandbox/mcp-server/Dockerfile
-          push: true
-          tags: |
-            ${{ env.IMAGE }}:${{ steps.vars.outputs.sha_short }}
-            ${{ env.IMAGE }}:latest
-          labels: |
-            org.opencontainers.image.source=https://github.com/openova-io/openova
-            org.opencontainers.image.revision=${{ github.sha }}
-            org.opencontainers.image.title=sandbox-mcp-server
-            org.opencontainers.image.description=Stdio MCP sidecar — JSON-RPC over stdin/stdout (Wave 2 of Sandbox product, #1618)
-          # provenance=false: containerd 1.7.x on k3s mis-resolves the
-          # provenance attestation manifest. SBOM attestation handled
-          # by the cosign attest step below.
-          provenance: false
-          sbom: false
-
-      - name: Install cosign
-        if: github.event_name != 'pull_request'
-        uses: sigstore/cosign-installer@v3
-
-      - name: Sign image with cosign (keyless)
-        if: github.event_name != 'pull_request'
-        env:
-          DIGEST: ${{ steps.build.outputs.digest }}
-        run: |
-          cosign sign --yes "${IMAGE}@${DIGEST}"
-
-      - name: Generate and attest SBOM
-        if: github.event_name != 'pull_request'
-        env:
-          DIGEST: ${{ steps.build.outputs.digest }}
-        run: |
-          cosign attest --yes \
-            --predicate <(echo '{"sbom":"in-toto-spdx attached at build time"}') \
-            --type spdx \
-            "${IMAGE}@${DIGEST}"
-
-      # Auto-bump the chart values.yaml runtime.mcpImage so the next
-      # Sovereign chart rollout picks up this image without a manual
-      # edit. Per `feedback_no_mvp_no_workarounds.md` rule 1
-      # (target-state, no operator-action gates) and
-      # `feedback_inviolable_principles.md` (event-driven, never cron).
-      # The chart's deployment.yaml `required` guard fails-fast when
-      # runtime.mcpImage is empty (Inviolable Principle #4a), so an
-      # un-bumped values.yaml = un-deployable chart. Mirrors the
-      # build-sandbox-controller.yaml auto-bump shape, just targeting a
-      # different yq path and writing a fully-qualified `<repo>:<sha>`
-      # string (the consumer reads runtime.mcpImage as a single image
-      # reference, not a {repository,tag} pair).
-      - name: Install yq
-        if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
-        run: |
-          sudo wget -qO /usr/local/bin/yq \
-            https://github.com/mikefarah/yq/releases/download/v4.44.3/yq_linux_amd64
-          sudo chmod +x /usr/local/bin/yq
-
-      - name: Bump runtime.mcpImage in values.yaml
-        if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
-        env:
-          SHA_SHORT: ${{ steps.vars.outputs.sha_short }}
-        run: |
-          set -euo pipefail
-          yq eval -i ".runtime.mcpImage = \"${IMAGE}:${SHA_SHORT}\"" "${CHART_VALUES}"
-          echo "values.yaml after bump:"
-          yq eval '.runtime.mcpImage' "${CHART_VALUES}"
-
-      - name: Commit and push values.yaml bump
-        if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
-        env:
-          SHA_SHORT: ${{ steps.vars.outputs.sha_short }}
-        run: |
-          set -euo pipefail
-          git config user.name "github-actions[bot]"
-          git config user.email "github-actions[bot]@users.noreply.github.com"
-          if git diff --quiet "${CHART_VALUES}"; then
-            echo "no values.yaml change — already pinned to ${SHA_SHORT}"
-            exit 0
-          fi
-          git add "${CHART_VALUES}"
-          git commit -m "deploy: bump sandbox-mcp-server image to ${SHA_SHORT}"
-          # Pull-rebase to avoid races with parallel build commits.
-          git pull --rebase --autostash origin main || true
-          git push origin HEAD:main
--- a/.github/workflows/build-sandbox-pty-server.yaml
+++ b/.github/workflows/build-sandbox-pty-server.yaml
@ -1,184 +0,0 @@
-name: Build sandbox-pty-server
-
-# sandbox-pty-server — Wave 2 of the Sandbox product (PR #1618). The
-# in-pod HTTP+WS PTY shim (port 7681) that Wave 2's pty-server
-# StatefulSet runs alongside the agent process. See
-# products/sandbox/docs/architecture.md §2.
-#
-# Per docs/INVIOLABLE-PRINCIPLES.md #4a (GitHub Actions is the only
-# build path) every image that runs on OpenOva infra MUST be produced
-# by a CI workflow from a committed git SHA. Shape mirrors
-# build-sandbox-controller.yaml — same Buildx + cosign keyless sign +
-# SBOM attestation + auto-bump of the chart values.yaml so the next
-# Sovereign install picks up the SHA-pinned image without an operator
-# manually editing the file.
-#
-# Per `feedback_inviolable_principles.md`: event-driven only, NO cron.
-
-on:
-  push:
-    paths:
-      - 'products/sandbox/pty-server/**'
-      - '.github/workflows/build-sandbox-pty-server.yaml'
-    branches: [main]
-  pull_request:
-    paths:
-      - 'products/sandbox/pty-server/**'
-      - '.github/workflows/build-sandbox-pty-server.yaml'
-  workflow_dispatch:
-
-env:
-  REGISTRY: ghcr.io
-  IMAGE: ghcr.io/openova-io/openova/sandbox-pty-server
-  CHART_VALUES: platform/sandbox/chart/values.yaml
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    permissions:
-      # contents: write — the deploy step below pushes a values.yaml SHA
-      # bump back to main so the platform/sandbox chart picks up the
-      # newly-built image without an operator manually editing the file
-      # (per `feedback_no_mvp_no_workarounds.md` rule 1: target-state,
-      # never "manual follow-up bump").
-      contents: write
-      packages: write
-      # id-token write is required by cosign keyless signing (Sigstore).
-      id-token: write
-    outputs:
-      sha_short: ${{ steps.vars.outputs.sha_short }}
-      digest: ${{ steps.build.outputs.digest }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Set short SHA
-        id: vars
-        run: echo "sha_short=$(echo $GITHUB_SHA | head -c 7)" >> "$GITHUB_OUTPUT"
-
-      - name: Set up Go
-        uses: actions/setup-go@v5
-        with:
-          go-version: '1.22'
-          cache-dependency-path: |
-            products/sandbox/pty-server/go.sum
-
-      - name: go vet
-        working-directory: products/sandbox/pty-server
-        run: go vet ./...
-
-      - name: Run unit tests
-        working-directory: products/sandbox/pty-server
-        # Empty `go test ./...` is harmless: prints "no test files" and
-        # exits 0. Wave-2 follow-ups will add unit tests under
-        # internal/session/.
-        run: go test -count=1 -race ./...
-
-      # On pull_request runs we stop here — image push requires
-      # `packages: write` which only main-branch authors hold.
-      - name: Login to GHCR
-        if: github.event_name != 'pull_request'
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Set up Docker Buildx
-        if: github.event_name != 'pull_request'
-        uses: docker/setup-buildx-action@v3
-
-      - name: Build and push image
-        id: build
-        if: github.event_name != 'pull_request'
-        uses: docker/build-push-action@v6
-        with:
-          # pty-server's Dockerfile uses `COPY . .` so the build context
-          # is the pty-server directory itself (its own go.mod root —
-          # NOT the repo root, unlike core/controllers which share a
-          # parent go.mod). pty-server has no cross-tree `replace`
-          # directives so a narrow context still resolves cleanly.
-          context: products/sandbox/pty-server
-          file: products/sandbox/pty-server/Dockerfile
-          push: true
-          tags: |
-            ${{ env.IMAGE }}:${{ steps.vars.outputs.sha_short }}
-            ${{ env.IMAGE }}:latest
-          labels: |
-            org.opencontainers.image.source=https://github.com/openova-io/openova
-            org.opencontainers.image.revision=${{ github.sha }}
-            org.opencontainers.image.title=sandbox-pty-server
-            org.opencontainers.image.description=In-pod HTTP+WS PTY shim (Wave 2 of Sandbox product, #1618)
-          # provenance=false: containerd 1.7.x on k3s mis-resolves the
-          # provenance attestation manifest. SBOM attestation handled
-          # by the cosign attest step below.
-          provenance: false
-          sbom: false
-
-      - name: Install cosign
-        if: github.event_name != 'pull_request'
-        uses: sigstore/cosign-installer@v3
-
-      - name: Sign image with cosign (keyless)
-        if: github.event_name != 'pull_request'
-        env:
-          DIGEST: ${{ steps.build.outputs.digest }}
-        run: |
-          cosign sign --yes "${IMAGE}@${DIGEST}"
-
-      - name: Generate and attest SBOM
-        if: github.event_name != 'pull_request'
-        env:
-          DIGEST: ${{ steps.build.outputs.digest }}
-        run: |
-          cosign attest --yes \
-            --predicate <(echo '{"sbom":"in-toto-spdx attached at build time"}') \
-            --type spdx \
-            "${IMAGE}@${DIGEST}"
-
-      # Auto-bump the chart values.yaml runtime.ptyServerImage so the
-      # next Sovereign chart rollout picks up this image without a
-      # manual edit. Per `feedback_no_mvp_no_workarounds.md` rule 1
-      # (target-state, no operator-action gates) and
-      # `feedback_inviolable_principles.md` (event-driven, never cron).
-      # The chart's deployment.yaml `required` guard fails-fast when
-      # runtime.ptyServerImage is empty (Inviolable Principle #4a), so
-      # an un-bumped values.yaml = un-deployable chart. Mirrors the
-      # build-sandbox-controller.yaml auto-bump shape, just targeting a
-      # different yq path and writing a fully-qualified `<repo>:<sha>`
-      # string (the consumer reads runtime.ptyServerImage as a single
-      # image reference, not a {repository,tag} pair).
-      - name: Install yq
-        if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
-        run: |
-          sudo wget -qO /usr/local/bin/yq \
-            https://github.com/mikefarah/yq/releases/download/v4.44.3/yq_linux_amd64
-          sudo chmod +x /usr/local/bin/yq
-
-      - name: Bump runtime.ptyServerImage in values.yaml
-        if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
-        env:
-          SHA_SHORT: ${{ steps.vars.outputs.sha_short }}
-        run: |
-          set -euo pipefail
-          yq eval -i ".runtime.ptyServerImage = \"${IMAGE}:${SHA_SHORT}\"" "${CHART_VALUES}"
-          echo "values.yaml after bump:"
-          yq eval '.runtime.ptyServerImage' "${CHART_VALUES}"
-
-      - name: Commit and push values.yaml bump
-        if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
-        env:
-          SHA_SHORT: ${{ steps.vars.outputs.sha_short }}
-        run: |
-          set -euo pipefail
-          git config user.name "github-actions[bot]"
-          git config user.email "github-actions[bot]@users.noreply.github.com"
-          if git diff --quiet "${CHART_VALUES}"; then
-            echo "no values.yaml change — already pinned to ${SHA_SHORT}"
-            exit 0
-          fi
-          git add "${CHART_VALUES}"
-          git commit -m "deploy: bump sandbox-pty-server image to ${SHA_SHORT}"
-          # Pull-rebase to avoid races with parallel build commits.
-          git pull --rebase --autostash origin main || true
-          git push origin HEAD:main
--- a/.github/workflows/catalyst-build.yaml
+++ b/.github/workflows/catalyst-build.yaml
@ -308,108 +308,6 @@ jobs:
      - name: Checkout
        uses: actions/checkout@v4

-      # In-flight provisioning guard — t13/t17/t21 incident, 2026-05-17.
-      #
-      # The mothership catalyst-api Pod is single-replica and is rolled
-      # by Flux whenever this workflow bumps the image SHA. The OpenTofu
-      # workdir lives on a /tmp emptyDir that dies with the Pod, so any
-      # in-flight `tofu apply` is killed mid-resource. The on-disk
-      # deployment record is rewritten to status=failed on the new Pod's
-      # restoreFromStore (deployments.go:413), but the Hetzner resources
-      # tagged with the abandoned deployment-id remain orphans that
-      # require manual `hcloud` cleanup. Three consecutive provs
-      # (t13/t17/t21) died this way during 2026-05-17, each costing
-      # ~15 minutes of provisioning time plus cleanup overhead.
-      #
-      # This step polls the public, read-only in-flight-count endpoint
-      # on the mothership catalyst-api (added in this PR, served at
-      # console.openova.io/api/v1/deployments/in-flight-count). The
-      # endpoint counts ONLY Phase-0 in-flight statuses (pending /
-      # provisioning / tofu-applying / flux-bootstrapping) — Phase-1 is
-      # observational and resumes across Pod restarts, so it does not
-      # block. When count==0 we proceed with the values.yaml bump.
-      #
-      # Timeout policy: cap at MAX_WAIT_SECONDS (default 30 minutes —
-      # the upper bound on a healthy multi-region prov). If a prov is
-      # still in flight after the cap, we proceed anyway and log a
-      # WARNING. Blocking deploys indefinitely on a stuck prov would
-      # mean an operator can never ship a fix for whatever is causing
-      # the stuck prov (the worst possible failure mode for a CI gate).
-      #
-      # Endpoint outage policy: if the curl fails for any reason
-      # (network blip, mothership down, endpoint not yet deployed on
-      # the live SHA), we proceed with the bump after logging. Same
-      # rationale — a broken gate must not block all future deploys.
-      # First-time-rollout consideration: the endpoint does not exist
-      # on the LIVE mothership until THIS PR's image lands, so the
-      # first run after merge will fall through the "endpoint not
-      # found" branch and proceed normally. Subsequent runs benefit
-      # from the gate.
-      - name: Wait for in-flight provisioning to drain
-        env:
-          # Override-able via repo variables/secrets if a different
-          # mothership URL is in play (Sovereign chroot self-deploy,
-          # staging, etc.). Default targets the production mothership.
-          CATALYST_API_URL: ${{ vars.CATALYST_API_URL || 'https://console.openova.io' }}
-          MAX_WAIT_SECONDS: '1800'   # 30 min hard cap
-          POLL_INTERVAL_SECONDS: '20'
-        run: |
-          set -u
-          ENDPOINT="${CATALYST_API_URL%/}/api/v1/deployments/in-flight-count"
-          echo "Polling ${ENDPOINT} every ${POLL_INTERVAL_SECONDS}s (cap ${MAX_WAIT_SECONDS}s)"
-
-          START=$(date +%s)
-          ATTEMPT=0
-          while : ; do
-            ATTEMPT=$((ATTEMPT + 1))
-            HTTP_CODE=$(curl -fsSL --max-time 10 -o /tmp/inflight.json -w '%{http_code}' \
-              "${ENDPOINT}" 2>/dev/null || echo "000")
-
-            if [ "$HTTP_CODE" = "000" ]; then
-              # Network failure (DNS, connect refused, timeout). Do NOT
-              # block the deploy — fail-open per "broken gate must not
-              # halt all deploys" rule above. Log + proceed.
-              echo "WARN: ${ENDPOINT} unreachable on attempt ${ATTEMPT} (curl failed). Proceeding without gate."
-              break
-            fi
-
-            if [ "$HTTP_CODE" = "404" ]; then
-              # First-rollout case — the endpoint is not yet present on
-              # the LIVE catalyst-api. Once this PR merges, subsequent
-              # runs will see the endpoint and start gating properly.
-              echo "INFO: ${ENDPOINT} returned 404 — endpoint not yet deployed on live mothership. Proceeding (first-rollout fall-through)."
-              break
-            fi
-
-            if [ "$HTTP_CODE" != "200" ]; then
-              # Any other non-2xx: log + proceed (fail-open).
-              echo "WARN: ${ENDPOINT} returned HTTP ${HTTP_CODE} on attempt ${ATTEMPT}. Body:"
-              cat /tmp/inflight.json 2>/dev/null || true
-              echo
-              echo "Proceeding without gate (fail-open)."
-              break
-            fi
-
-            COUNT=$(jq -r '.count // 0' /tmp/inflight.json 2>/dev/null || echo "0")
-            IDS=$(jq -r '.ids // [] | join(",")' /tmp/inflight.json 2>/dev/null || echo "")
-
-            if [ "$COUNT" -eq 0 ] 2>/dev/null; then
-              echo "OK: 0 deployments in-flight. Safe to bump catalyst-api image."
-              break
-            fi
-
-            ELAPSED=$(($(date +%s) - START))
-            if [ "$ELAPSED" -ge "$MAX_WAIT_SECONDS" ]; then
-              echo "WARN: ${COUNT} deployment(s) still in-flight after ${ELAPSED}s (cap ${MAX_WAIT_SECONDS}s)."
-              echo "WARN: in-flight ids: ${IDS}"
-              echo "WARN: proceeding with image bump anyway — stuck provs must not block all future deploys."
-              break
-            fi
-
-            echo "WAIT: attempt ${ATTEMPT} — ${COUNT} deployment(s) in-flight (ids: ${IDS}); elapsed=${ELAPSED}s. Sleeping ${POLL_INTERVAL_SECONDS}s."
-            sleep "${POLL_INTERVAL_SECONDS}"
-          done
-
      - name: Update SHA tags in values.yaml and deployment manifests
        # The catalyst-ui and catalyst-api images are referenced in two places:
        #
--- a/.github/workflows/infra-hetzner-tofu.yaml
+++ b/.github/workflows/infra-hetzner-tofu.yaml
@ -52,53 +52,6 @@ jobs:
      - name: tofu validate
        run: tofu validate

-      # Fix #111 (2026-05-10) — guard against the PR #1311 regression class.
-      #
-      # tftpl files (cloudinit-control-plane.tftpl, cloudinit-worker.tftpl,
-      # any future *.tftpl) are consumed by tofu's `templatefile()` function
-      # which parses ALL `${...}` interpolation sequences regardless of
-      # YAML/HCL/shell context — including ones that appear inside YAML
-      # comments. When a comment references a downstream shell-envsubst
-      # variable like `${QA_FIXTURES_ENABLED:-false}`, tofu sees the colon
-      # inside the interpolation and dies with:
-      #
-      #     Error: Extra characters after interpolation expression;
-      #     Template interpolation doesn't expect a colon at this location.
-      #
-      # PR #1311 (Fix #73) shipped exactly this bug, broke `tofu plan`
-      # immediately, and prov #9 (4204f0b0c5e37a80) wasted ~30 min before
-      # PR #1328 caught and escaped the one offender. Without a CI guard,
-      # the next operator who adds a similar comment will repeat the
-      # incident.
-      #
-      # The fix is to use HCL's literal-dollar escape: `$$` → emits one
-      # literal `$` from templatefile(), so `$${VAR:-default}` survives
-      # tofu and reaches the cloud-init shell as `${VAR:-default}`.
-      #
-      # This grep scans every *.tftpl in infra/hetzner/ for any line that:
-      #   - starts with `#` (a comment) — leading whitespace optional
-      #   - contains a single-`$` `${UPPERCASE_VAR:-...}` interpolation —
-      #     the colon-in-interpolation shape that breaks tofu
-      # and fails the build with an actionable error message. The regex
-      # uses PCRE's negative lookbehind `(?<!\$)` so correctly-escaped
-      # `$${VAR:-default}` (which expands to literal `${VAR:-default}` after
-      # templatefile()) does NOT trip the guard. Code lines (non-comment)
-      # that reference shell vars are caught at `tofu validate` time; this
-      # guard plugs the comment-line gap that validate misses because
-      # templatefile() doesn't actually run during `validate`.
-      #
-      # Ubuntu-latest runners ship GNU grep with PCRE (`-P`) enabled.
-      - name: tftpl shell-expansion escape guard (Fix #111)
-        run: |
-          set -euo pipefail
-          violations=$(grep -rEnP '^\s*#.*(?<!\$)\$\{[A-Z_]+:-' *.tftpl || true)
-          if [ -n "$violations" ]; then
-            echo "::error title=Unescaped tftpl shell expansion::Use \$\${VAR:-default} (double-dollar) in tftpl YAML comments — bare \${VAR:-default} is consumed by tofu's templatefile() and breaks 'tofu plan' with 'Template interpolation doesn't expect a colon at this location' (see PR #1311 / PR #1328 / Fix #111)."
-            echo "Offending lines:"
-            echo "$violations"
-            exit 1
-          fi
-
      - name: tofu test (offline — mock_provider + override_resource)
        # The module's tests/multi_region.tftest.hcl exercises the
        # multi-region wiring shape WITHOUT touching real Hetzner.
--- a/.github/workflows/test-bootstrap-kit.yaml
+++ b/.github/workflows/test-bootstrap-kit.yaml
@ -12,10 +12,8 @@ on:
      - 'tests/e2e/bootstrap-kit/**'
      - 'platform/**/blueprint.yaml'
      - 'platform/**/chart/**'
-      - 'products/**/chart/**'
      - 'clusters/**'
      - 'scripts/check-bootstrap-deps.sh'
-      - 'scripts/check-bootstrap-kit-pin-sync.sh'
      - 'scripts/expected-bootstrap-deps.yaml'
      - '.github/workflows/test-bootstrap-kit.yaml'
    branches: [main]
@ -24,10 +22,8 @@ on:
      - 'tests/e2e/bootstrap-kit/**'
      - 'platform/**/blueprint.yaml'
      - 'platform/**/chart/**'
-      - 'products/**/chart/**'
      - 'clusters/**'
      - 'scripts/check-bootstrap-deps.sh'
-      - 'scripts/check-bootstrap-kit-pin-sync.sh'
      - 'scripts/expected-bootstrap-deps.yaml'
      - '.github/workflows/test-bootstrap-kit.yaml'
  workflow_dispatch:
@ -55,57 +51,6 @@ jobs:
      - name: Run bootstrap-kit dependency audit
        run: bash scripts/check-bootstrap-deps.sh

-  pin-sync-audit:
-    # TBD-A6 regression test. Asserts every Chart.yaml in platform/* or
-    # products/* whose chart is pinned in clusters/_template/bootstrap-
-    # kit/ has the SAME version on both sides.
-    #
-    # On `pull_request` we use --changed-only --base <base-ref> so a PR
-    # is only blocked on chart→pin pairs IT modified. This keeps the
-    # gate effective (every new chart bump must update the pin) without
-    # forcing pre-existing drifts (13 charts as of 2026-05-18) to be
-    # fixed before any unrelated PR can land. The auto-bump hook in
-    # blueprint-release.yaml will heal those drifts on the next bump
-    # of each lagging chart.
-    #
-    # On `push` to main and `workflow_dispatch` we run the FULL sweep
-    # so post-merge drift is observable on the run summary even if the
-    # PR gate let it through.
-    #
-    # TBD-A17 mitigation (#1849, 2026-05-18): the full sweep on `push`
-    # to main races with the blueprint-release auto-bump hook. When a
-    # PR bumps a Chart.yaml version, the merge commit (which is what
-    # this push event sees) does NOT yet contain the matching
-    # bootstrap-kit pin bump — the auto-bump hook runs in a DIFFERENT
-    # workflow (blueprint-release.yaml) and pushes the pin bump as a
-    # follow-up bot commit, which (per GITHUB_TOKEN convention) does
-    # NOT retrigger this workflow. So the FIRST run on every chart-
-    # bumping merge sees `chart=N pin=N-1` drift and would block.
-    # The actual desired-state is that the follow-up bot commit heals
-    # the drift within ~60s. Push-mode is therefore observational, not
-    # blocking; we use `continue-on-error: true` so the workflow stays
-    # green while the drift is still visible on the run summary.
-    runs-on: ubuntu-latest
-    continue-on-error: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          # Need history back to the PR base for the --changed-only diff.
-          fetch-depth: 0
-
-      - name: Run pin-sync audit (changed-only on PR, full sweep otherwise)
-        run: |
-          set -euo pipefail
-          if [ "${{ github.event_name }}" = "pull_request" ]; then
-            base="${{ github.event.pull_request.base.sha }}"
-            echo "Running --changed-only against base ${base}"
-            bash scripts/check-bootstrap-kit-pin-sync.sh --changed-only --base "${base}"
-          else
-            echo "Running full sweep (event=${{ github.event_name }})"
-            bash scripts/check-bootstrap-kit-pin-sync.sh
-          fi
-
  manifest-validation:
    # Static-only validation: blueprint.yaml + chart Chart.yaml + clusters/_template
    # parsing + dependency order check. Runs on every push.
--- a/clusters/_template/bootstrap-kit/01-cilium.yaml
+++ b/clusters/_template/bootstrap-kit/01-cilium.yaml
@ -29,8 +29,6 @@ kind: HelmRelease
 metadata:
  name: bp-cilium
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "01"
 spec:
  interval: 15m
  releaseName: cilium
@ -38,32 +36,7 @@ spec:
  chart:
    spec:
      chart: bp-cilium
-      # 1.3.4 (prov #55, 2026-05-12): flip kubeProxyReplacement false→true
-      # in chart defaults so the BPF masquerade datapath (bpf.masquerade:
-      # true, already on by default) gets the NodePort it needs at startup.
-      # Worker cilium-agent on prov 8d85a64cb8807cdc crashloop'd with
-      # "BPF masquerade requires NodePort" → node.cilium.io/agent-not-ready
-      # taint persisted → every post-install Job pod (keycloak-config-cli,
-      # powerdns, mimir, openbao) stayed Pending → bootstrap-kit chain
-      # stalled. Aligns with the cloud-init pre-Flux Cilium install which
-      # already used kubeProxyReplacement: true.
-      # 1.3.3 (qa-loop iter-16 Fix #70): Hubble UI HTTPRoute defaults
-      # corrected — gatewayRef.namespace=kube-system (was the stale
-      # cilium-gateway), serviceRef.namespace=kube-system (was the stale
-      # cilium), plus chart auto-derives hubble.<sovereignFQDN> when only
-      # SOVEREIGN_FQDN is provided. Combined with the bootstrap-kit
-      # default flip below (HUBBLE_ENABLED=true, hubble.relay/ui enabled,
-      # SOVEREIGN_FQDN forwarded), every Sovereign exposes Hubble UI at
-      # https://hubble.<sovereignFQDN>/ out of the box. TC-289 NXDOMAIN
-      # is closed because external-dns now sees the HTTPRoute hostname
-      # and writes the A record into PowerDNS.
-      # 1.3.1 (qa-loop iter-12 Fix #54 Workstream 2): bpf.preallocateMaps=true
-      # + socketLB.hostNamespaceOnly=true defaults so fresh worker pods can
-      # resolve DNS reliably on first-join (cilium/cilium#28456 mitigation).
-      # 1.3.0 (qa-loop iter-12 Fix #53C+D): adds the Hubble UI HTTPRoute
-      # overlay (slice H7 #1095) that the catalystOverlay.hubbleUI block
-      # below depends on.
-      version: 1.3.5
+      version: 1.2.0
      sourceRef:
        kind: HelmRepository
        name: bp-cilium
@ -73,30 +46,15 @@ spec:
  # SAME chart installs — legitimate slow-Ready). Replaces blanket
  # spec.timeout: 15m band-aid from PR #221.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  values:
    cilium:
-      # Multi-region (operator mandate 2026-05-12) — each region's k3s
-      # is an INDEPENDENT cluster per NAMING-CONVENTION §1.3, so each
-      # region's cilium MUST talk to its OWN local CP, not the primary's
-      # 10.0.1.2. Flux postBuild.substitute in
-      # cloudinit-control-plane.tftpl renders CILIUM_K8S_SERVICE_HOST to
-      # the local CP's private IP per region (10.0.1.2 for primary,
-      # 10.0.<10+idx>.2 for secondaries — see main.tf:267
-      # secondary_region_cp_ips). Without this, secondary regions'
-      # cilium-operator crash-loops with x509 unknown authority (the
-      # primary's CA doesn't sign the secondary cluster's API cert).
-      # The :=10.0.1.2 fallback preserves single-region (primary-only)
-      # provisions where the substitute var would be empty/absent.
-      k8sServiceHost: ${CILIUM_K8S_SERVICE_HOST:=10.0.1.2}
      # Phase-8a bug #15 (otech8 deployment 1bfc46347564467b 2026-05-01):
      # cilium-agent waits forever for the operator to register
      # ciliumenvoyconfigs + ciliumclusterwideenvoyconfigs CRDs.
@ -115,36 +73,13 @@ spec:
          enabled: false
      hubble:
        metrics:
-          # `null` (NOT [] and NOT a populated list) suppresses the
-          # upstream chart's metrics ServiceMonitor render. Hubble flow
-          # collection still works for Hubble Relay/UI without a
-          # ServiceMonitor — that pulls the kube-prometheus-stack CRDs
-          # which do not exist on a fresh Sovereign at bp-cilium install
-          # time. Operators flip metrics on once
-          # bp-kube-prometheus-stack is reconciled (issue #182).
          enabled: null
          serviceMonitor:
            enabled: false
-        # qa-loop iter-16 Fix #70: default Hubble UI ON for every
-        # Sovereign so TC-289 (https://hubble.<fqdn>/) resolves out of
-        # the box. Hubble flow telemetry is the canonical L3-L7 visibility
-        # surface for the Catalyst control plane (EPIC-5 #1100); shipping
-        # it default-OFF made every Sovereign blind by default and
-        # required a per-Sovereign overlay touch that nobody remembered
-        # to wire. Per-Sovereign overlay can still set HUBBLE_ENABLED=false
-        # for fully air-gapped lab Sovereigns where Relay traffic to
-        # cilium-agents is not desired.
        relay:
-          enabled: ${HUBBLE_ENABLED:=true}
+          enabled: false
        ui:
-          enabled: ${HUBBLE_ENABLED:=true}
-
-      # qa-loop iter-12 Fix #53C: BGP control plane (default off, opt-in
-      # via BGP_ENABLED=true). Per ADR-0001 §9 the BGP control plane is the
-      # canonical path for Sovereign-to-customer-router prefix advertisement
-      # (LoadBalancer VIPs, Pod CIDRs to customer's existing core network).
-      bgpControlPlane:
-        enabled: ${BGP_ENABLED:=false}
+          enabled: false

      # ── Cilium ClusterMesh — multi-region peering ──────────────────
      #
@ -179,100 +114,8 @@ spec:
        useAPIServer: true
        apiserver:
          service:
-            # 2026-05-15: default flipped NodePort → LoadBalancer per DoD A3
-            # (docs/SOVEREIGN-MULTI-REGION-DOD.md). Founder ruling:
-            # "ClusterMesh apiserver Service = LoadBalancer (NEVER NodePort)".
-            #
-            # On Hetzner, hcloud-ccm allocates a public-IPv4 LB per peer
-            # region; AutoEstablishClusterMesh (handler/clustermesh.go,
-            # PR #1508) hard-fails on type != LoadBalancer and reads the
-            # LB ingress IP for the peer endpoint. Cilium WG node
-            # encryption secures the LB→node→pod path end-to-end.
-            #
-            # ${CLUSTERMESH_SERVICE_TYPE:=LoadBalancer} keeps the
-            # operator escape hatch (e.g. bare-metal Sovereigns with
-            # MetalLB or non-cloud peers can override to NodePort) but
-            # the cloud-Hetzner default is now A3-compliant out of the
-            # box.
-            type: ${CLUSTERMESH_SERVICE_TYPE:=LoadBalancer}
-            # Hetzner CCM requires location OR network-zone annotation
-            # to allocate the LB. ${HCLOUD_LB_LOCATION} flows from the
-            # bootstrap-kit Kustomization substitute, set by the
-            # cloud-init template for EVERY region (primary CP renders
-            # var.region; secondary CPs render each.value.cloudRegion).
-            # No default fallback: a missing substitute is a tofu
-            # rendering bug, not a runtime fallback opportunity. The
-            # previous `:=hel1` default silently masked the 2026-05-16
-            # multi-region rendering regression (t114-omani-works
-            # primary=hel1 — fallback APPEARED correct but every
-            # secondary also rendered hel1; an explicit empty render
-            # would have failed cilium chart admission and surfaced
-            # the bug at provision time instead of at clustermesh-
-            # apiserver LB allocation time).
-            annotations:
-              load-balancer.hetzner.cloud/location: "${HCLOUD_LB_LOCATION}"
-              load-balancer.hetzner.cloud/type: "lb11"
-              # use-private-ip: false — LB→backend connection transits
-              # the PUBLIC IP. PR #1537 had set this to "true" attempting
-              # to bypass the firewall NodePort block; that approach was
-              # NOT viable because the per-region Hetzner LB has no
-              # private-network attachment by default. CCM rejected:
-              #     "ReconcileHCLBTargets: use private ip: missing network id"
-              # → LB never allocated → clustermesh apiserver Service
-              # stayed `<pending>` → clustermesh orchestrator waited 5min
-              # for LB IP then bailed with empty peerEntries.
-              #
-              # PR #1538's canonical fix opens TCP 30000-32767 in the
-              # Hetzner firewall so the public-IP LB health checks pass.
-              # This file reverts to use-private-ip=false to align with
-              # that approach. Caught on t130 (30463cd0a5a931be, 2026-05-16).
-              load-balancer.hetzner.cloud/use-private-ip: "false"
-              # 2026-05-16: per-region LB name suffix. Without
-              # ${SOVEREIGN_REGION_KEY} interpolated, all 3 regions'
-              # clustermesh-apiserver Services adopted the FIRST LB
-              # CCM-created (Hetzner LBs are unique by name; second
-              # creation just reuses the first). Caught on t121
-              # (48d8fe77...): primary + nbg1 both reported external_ip
-              # 167.233.14.208 (nbg1 LB), sin stayed <pending>.
-              load-balancer.hetzner.cloud/name: "${SOVEREIGN_FQDN_SLUG:=catalyst}-${SOVEREIGN_REGION_KEY:=primary}-clustermesh"
-
-    # ── Catalyst overlay templates (chart/templates/) ────────────────────
-    # qa-loop iter-16 Fix #70: Hubble UI HTTPRoute now defaults ON for
-    # every Sovereign. The chart auto-derives hostname `hubble.${SOVEREIGN_FQDN}`
-    # so the operator only needs the SOVEREIGN_FQDN substitute (already
-    # mandatory for every Sovereign — see clusters/_template/bootstrap-kit/
-    # 13-bp-catalyst-platform.yaml `host: console.${SOVEREIGN_FQDN}`).
-    # Per-Sovereign overlay can still:
-    #   - HUBBLE_ENABLED=false  → disable Hubble UI on this Sovereign
-    #   - HUBBLE_HOSTNAME=...   → override the auto-derived hostname
-    #   - HUBBLE_AUTH=oidc      → enable OIDC enforcement once the
-    #                              Keycloak realm wires the hubble-ui client
-    catalystOverlay:
-      hubbleUI:
-        enabled: ${HUBBLE_ENABLED:=true}
-        # Explicit override; empty triggers the chart to derive
-        # `hubble.${SOVEREIGN_FQDN}` from sovereignFQDN below.
-        hostname: ${HUBBLE_HOSTNAME:=}
-        sovereignFQDN: ${SOVEREIGN_FQDN}
-        gatewayRef:
-          # The Sovereign Gateway lives in kube-system — installed by
-          # clusters/_template/sovereign-tls/cilium-gateway.yaml. Every
-          # other bootstrap-kit HTTPRoute (gitea, auth, grafana, harbor,
-          # openbao, powerdns, console/catalyst-platform) attaches to
-          # cilium-gateway/kube-system; this overlay matches.
-          name: cilium-gateway
-          namespace: kube-system
-        # `none` until the Keycloak `hubble-ui` OIDC client is wired by
-        # bp-keycloak realm-config; flip to `oidc` per per-Sovereign
-        # overlay once that lands. Until then Hubble UI is publicly
-        # reachable — acceptable for the in-progress qa-loop iter-16
-        # observability slice; lock down before production handover via
-        # HUBBLE_AUTH=oidc.
-        auth: ${HUBBLE_AUTH:=none}
-        serviceRef:
-          name: hubble-ui
-          namespace: kube-system
-          port: 80
+            type: NodePort
+            nodePort: 32379
 ---
 # ─── Per-Sovereign Gateway API resources (issue #387) ────────────────────
 #
--- a/clusters/_template/bootstrap-kit/01a-gateway-api.yaml
+++ b/clusters/_template/bootstrap-kit/01a-gateway-api.yaml
@ -48,8 +48,6 @@ kind: HelmRelease
 metadata:
  name: bp-gateway-api
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "01a"
 spec:
  interval: 15m
  releaseName: gateway-api
@ -73,12 +71,10 @@ spec:
  # `dependsOn: bp-gateway-api` so Flux gates them on this release's
  # Ready condition.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/02-cert-manager.yaml
+++ b/clusters/_template/bootstrap-kit/02-cert-manager.yaml
@ -29,8 +29,6 @@ kind: HelmRelease
 metadata:
  name: bp-cert-manager
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "02"
 spec:
  interval: 15m
  releaseName: cert-manager
@ -40,10 +38,7 @@ spec:
  chart:
    spec:
      chart: bp-cert-manager
-      # 1.2.1 (Fix #158): crdGate hook image switched from
-      # bitnami/kubectl:1.30.4 (deleted from Docker Hub 2025-08) to
-      # bitnamilegacy/kubectl:1.30.7.
-      version: 1.2.2
+      version: 1.1.1
      sourceRef:
        kind: HelmRepository
        name: bp-cert-manager
@ -54,12 +49,10 @@ spec:
  # Helm install completes when manifests apply; subsequent dependsOn
  # checks Ready=True independently. Replaces PR #221 spec.timeout: 15m.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/03-flux.yaml
+++ b/clusters/_template/bootstrap-kit/03-flux.yaml
@ -50,8 +50,6 @@ kind: HelmRelease
 metadata:
  name: bp-flux
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "03"
 spec:
  interval: 15m
  releaseName: flux
@ -61,10 +59,7 @@ spec:
  chart:
    spec:
      chart: bp-flux
-      # 1.2.1 (Fix #158): stuckHelmReleaseRecovery image switched from
-      # bitnami/kubectl:1.31 (deleted from Docker Hub 2025-08) to
-      # bitnamilegacy/kubectl:1.31.4. (Catches up from 1.1.3 → 1.2.1.)
-      version: 1.2.2
+      version: 1.1.3
      sourceRef:
        kind: HelmRepository
        name: bp-flux
@ -74,7 +69,6 @@ spec:
  # a target of the chart, so blocking on Ready=True is structurally
  # impossible. disableWait avoids the deadlock. Replaces PR #221 timeout.
  install:
-    timeout: 15m
    disableWait: true
    # Adopt cloud-init-installed Flux objects rather than fail on
    # ownership conflict (the objects exist before the HelmRelease ever
@ -83,7 +77,6 @@ spec:
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    # Keep operator-supplied values (e.g. resource overrides applied via
    # helm-controller out-of-band, or dry-run patches during incident
--- a/clusters/_template/bootstrap-kit/04-crossplane.yaml
+++ b/clusters/_template/bootstrap-kit/04-crossplane.yaml
@ -29,8 +29,6 @@ kind: HelmRelease
 metadata:
  name: bp-crossplane
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "04"
 spec:
  interval: 15m
  timeout: 15m
@ -41,16 +39,14 @@ spec:
  chart:
    spec:
      chart: bp-crossplane
-      version: 1.1.4
+      version: 1.1.3
      sourceRef:
        kind: HelmRepository
        name: bp-crossplane
        namespace: flux-system
  install:
-    timeout: 15m
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/05-sealed-secrets.yaml
+++ b/clusters/_template/bootstrap-kit/05-sealed-secrets.yaml
@ -25,8 +25,6 @@ kind: HelmRelease
 metadata:
  name: bp-sealed-secrets
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "05"
 spec:
  interval: 15m
  releaseName: sealed-secrets
@ -36,7 +34,7 @@ spec:
  chart:
    spec:
      chart: bp-sealed-secrets
-      version: 1.1.2
+      version: 1.1.1
      sourceRef:
        kind: HelmRepository
        name: bp-sealed-secrets
@ -44,12 +42,10 @@ spec:
  # Event-driven install: single-replica controller + CRD; install
  # completes when manifests apply. Replaces PR #221 spec.timeout: 15m.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/05a-reflector.yaml
+++ b/clusters/_template/bootstrap-kit/05a-reflector.yaml
@ -37,8 +37,6 @@ kind: HelmRelease
 metadata:
  name: bp-reflector
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "05a"
 spec:
  interval: 15m
  releaseName: reflector
@ -57,12 +55,10 @@ spec:
  # when manifests apply. disableWait per architecture convention —
  # replaces blanket spec.timeout band-aid.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/06a-bp-self-sovereign-cutover.yaml
+++ b/clusters/_template/bootstrap-kit/06a-bp-self-sovereign-cutover.yaml
@ -52,13 +52,6 @@ kind: HelmRelease
 metadata:
  name: bp-self-sovereign-cutover
  namespace: flux-system
-  labels:
-    # slot drives the openova-flow adapter's Phase derivation
-    # (clusters/_template/bootstrap-kit/<NN>-...yaml encodes the
-    # install order). component=cutover overrides the slot rule so
-    # this HR lands in Phase 2 (Cutover) on the canvas, NOT Phase 1.
-    catalyst.openova.io/slot: "06a"
-    catalyst.openova.io/component: cutover
 spec:
  interval: 15m
  releaseName: self-sovereign-cutover
@ -211,97 +204,17 @@ spec:
      #   commits, and pushes. Subsequent reconciles see local Harbor
      #   as steady-state. Image bumped to alpine/k8s:1.31.4 (kubectl
      #   + git in one image; verified live on otech116).
-      # 0.1.24: Step-06 phase-0 ghcr-pull harbor.<sov-fqdn> auth merge
-      #   (#1184, bounded-cycle backfill). Once 0.1.20's phase-1 pivots
-      #   HelmRepository URLs to oci://harbor.<sov-fqdn>/openova-io,
-      #   source-controller hits a 401 on every pull because the
-      #   ghcr-pull Secret only carries auth for ghcr.io and
-      #   harbor.openova.io (cloud-init writes those two; harbor.<sov
-      #   -fqdn> is a per-Sovereign coordinate that doesn't exist at
-      #   bake time). Manually fixed on omantel 2026-05-10 (session
-      #   5c468708) — bp-guacamole / bp-netbird / bp-dmz-vcluster all
-      #   stuck Reconciling until `kubectl patch secret ghcr-pull` was
-      #   run by hand. 0.1.24 codifies that patch as Phase-0 of Step-06
-      #   so the next fresh `tofu apply` comes up GREEN with zero
-      #   manual intervention. Idempotent (no-op when entry already
-      #   matches), reads HARBOR_PASSWORD from the harbor-admin Secret
-      #   already mirrored into `catalyst` ns by bp-harbor 1.2.14+.
-      #   Adds `secrets: [update,patch]` to the runner ClusterRole.
-      # 0.1.25: Step-06 Phase-0 probe brittleness fix (qa-loop bounded-
-      #   cycle Wave 5 Fix #77, Gap A). 0.1.24 used kubectl jsonpath
-      #   `{.data['.dockerconfigjson']}` which silently returns EMPTY
-      #   because kubectl interprets the leading dot inside the bracket
-      #   as a child accessor (escape `\.dockerconfigjson` would work
-      #   but is a footgun). Caught live on omantel prov #7 2026-05-10:
-      #   `cutover-helmrepository-patches` Job FAILED 4× with
-      #   `FATAL: ghcr-pull Secret has no .dockerconfigjson key —
-      #   cloud-init did not run?` despite `kubectl get secret -o yaml`
-      #   showing the key present. 0.1.25 replaces the probe with
-      #   `kubectl get -o json | jq -r --arg k ...` (escape-free), adds
-      #   a 60s wait-loop for Reflector lag, and falls back to the
-      #   source namespace (flux-system) if the local copy is still
-      #   missing. Idempotent path unchanged.
-      # 0.1.26: HR install/upgrade timeout 15m + values
-      #   autoWaitForAPISeconds=720, autoTimeoutSeconds=840 (Fix #127).
-      #   Provisions #12 + #14 wedged at phase1-watching because the
-      #   HR had no explicit timeout → Helm 5m default → hit before
-      #   the auto-trigger Job's 600s activeDeadline could complete.
-      # 0.1.27: HR install/upgrade timeout 15m → 30m + values
-      #   autoWaitForAPISeconds 720→1500s (25m wait), autoTimeoutSeconds
-      #   840→1740s (29m Job deadline) (Fix #152). Prov #23 wedged
-      #   identically with 3× consecutive DeadlineExceeded on the auto-
-      #   trigger Job: catalyst-api had not yet become reachable inside
-      #   the 14m Job deadline. Cold-start of catalyst-platform on a
-      #   fresh Sovereign exceeds 14m on slow Hetzner regions; 2×
-      #   headroom (29m Job, 30m HR) restores the safety margin Fix #127
-      #   intended. NOTE: also bumps HR version pin from 0.1.25 → 0.1.27
-      #   — Fix #127 (commit 58f518ff) bumped Chart.yaml to 0.1.26 but
-      #   left this pin at 0.1.25, so the new HR-timeout/values changes
-      #   never landed on any Sovereign. The pin update here is what
-      #   actually delivers BOTH Fix #127 and Fix #152.
-      # 0.1.28 (Fix #158, 2026-05-11): values.yaml comment cleanup —
-      # platform-wide migration off bitnami/kubectl (deleted from
-      # Docker Hub 2025-08). This Blueprint already uses alpine/k8s
-      # + alpine since 0.1.10; no functional image change here.
-      # 0.1.30 (TBD-C18, 2026-05-18): NEW step 09 (gitea-token-mint)
-      # mints a real Gitea API token at cutover + patches Secret
-      # sme/provisioning-github-token.GITHUB_TOKEN. The catalyst-
-      # platform chart's provisioning-github-token.yaml template
-      # previously mirrored the Gitea admin PASSWORD verbatim into
-      # that Secret; SME provisioning then sent `Authorization: token
-      # <PWD>` to Gitea which 401s ("user does not exist [uid: 0]").
-      # On t22 2026-05-18: voucher checkout completed 200 + /jobs
-      # redirect fired, but no Organization CR was ever created.
-      # Step 09 closes the loop: DELETE-then-POST /api/v1/users/
-      # gitea_admin/tokens, capture .sha1, GET /api/v1/user validate,
-      # kubectl patch dest Secret, rollout-restart provisioning
-      # Deployment. Order=9 (last) is fine — none of steps 02-08 read
-      # the Secret and the SME provisioning service first consumes
-      # the token at voucher checkout time (always postdates cutover).
-      # 0.1.31 (TBD-C19, 2026-05-18): step-06 now also pivots
-      # openova-catalog HelmRepository (rendered by bp-catalyst-
-      # platform chart, not directly from bootstrap-kit). Adds
-      # `openova-catalog` to helmRepositories.names; new Phase-1.6
-      # patches the parent HelmRelease's spec.values.catalog.
-      # helmRepository.url; new Phase-2.5 injects same override
-      # into 13-bp-catalyst-platform.yaml in local Gitea so
-      # bootstrap-kit Kustomization reconcile preserves it. Without
-      # this pin bump, step-08 catches openova-catalog as the lone
-      # OFFENDER ~1m after step-06 (chart re-render reverts the
-      # live HR patch). Caught live on t22.omantel.biz 2026-05-18.
-      version: 0.1.31
+      version: 0.1.23
      sourceRef:
        kind: HelmRepository
        name: bp-self-sovereign-cutover
        namespace: flux-system
  install:
    disableWait: true
-    timeout: 30m
    remediation:
      retries: 3
  upgrade:
    disableWait: true
-    timeout: 30m
    remediation:
      retries: 3
  # Per-Sovereign overrides — the chart's values.yaml carries
@ -311,15 +224,6 @@ spec:
    sovereign:
      fqdn: ${SOVEREIGN_FQDN}
      harborInternalURL: http://harbor-core.harbor.svc.cluster.local
-      # NB: Harbor HTTPRoute publishes at `registry.<sov>` (see
-      # `clusters/_template/bootstrap-kit/19-harbor.yaml` gateway.host),
-      # NOT `harbor.<sov>`. Step-06 phase-1 rewrites every HelmRepository
-      # to `oci://${harbor_host}/openova-io`, so this MUST be the public
-      # hostname that actually answers — `registry.${SOVEREIGN_FQDN}`.
-      # Pre-2026-05-18 this said `harbor.${SOVEREIGN_FQDN}`, which no
-      # HTTPRoute matched → all post-pivot OCI pulls EOF → bp-sandbox HR
-      # never Ready → bootstrap-kit Ks stuck (chicken-and-egg). See
-      # t20 debug matrix.
-      harborPublicURL: https://registry.${SOVEREIGN_FQDN}
+      harborPublicURL: https://harbor.${SOVEREIGN_FQDN}
      giteaInternalURL: http://gitea-http.gitea.svc.cluster.local:3000
      giteaPublicURL: https://gitea.${SOVEREIGN_FQDN}
--- a/clusters/_template/bootstrap-kit/07-nats-jetstream.yaml
+++ b/clusters/_template/bootstrap-kit/07-nats-jetstream.yaml
@ -29,8 +29,6 @@ kind: HelmRelease
 metadata:
  name: bp-nats-jetstream
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "07"
 spec:
  interval: 15m
  releaseName: nats-jetstream
@ -42,7 +40,7 @@ spec:
  chart:
    spec:
      chart: bp-nats-jetstream
-      version: 1.2.0
+      version: 1.1.1
      sourceRef:
        kind: HelmRepository
        name: bp-nats-jetstream
@ -52,12 +50,10 @@ spec:
  # cold start. Helm install completes when manifests apply; downstream
  # dependsOn checks Ready=True independently. Replaces PR #221 timeout.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/08-openbao.yaml
+++ b/clusters/_template/bootstrap-kit/08-openbao.yaml
@ -29,8 +29,6 @@ kind: HelmRelease
 metadata:
  name: bp-openbao
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "08"
 spec:
  interval: 15m
  releaseName: openbao
@ -54,7 +52,7 @@ spec:
  chart:
    spec:
      chart: bp-openbao
-      version: 1.2.16
+      version: 1.2.14
      sourceRef:
        kind: HelmRepository
        name: bp-openbao
--- a/clusters/_template/bootstrap-kit/09-keycloak.yaml
+++ b/clusters/_template/bootstrap-kit/09-keycloak.yaml
@ -29,8 +29,6 @@ kind: HelmRelease
 metadata:
  name: bp-keycloak
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "09"
 spec:
  interval: 15m
  releaseName: keycloak
@ -43,29 +41,7 @@ spec:
  chart:
    spec:
      chart: bp-keycloak
-      # 1.5.0 (qa-loop iter-12 Fix #53A): adds .Values.sovereignRealm.name
-      # parameter so each Sovereign owns its KC realm named after the tenant
-      # short-name (omantel chroot → "omantel"). Default `sovereign` is kept
-      # in the chart for backward compat with overlays not yet migrated.
-      # 1.4.5 (issue #146, prov #21+#22 hung 30+ min on bp-keycloak install):
-      # post-#140 retune. (a) availabilityCheck.timeout 900s → 300s — coarser
-      # retry was busting HR window before backoff could retry; faster
-      # failure means more attempts fit. (b) startupProbe enabled with 30m
-      # budget so slow Liquibase doesn't get killed by livenessProbe mid-
-      # migration. (c) livenessProbe.initialDelaySeconds 300 → 60 (cold-
-      # start protection now lives in startupProbe). Coupled HR change
-      # below: install/upgrade.remediation.retries 3 → 1 (Job's own
-      # backoffLimit handles retries without losing state across Helm
-      # restarts). Together: ≤ 30m wall-clock vs. 90m+ before.
-      # 1.4.4 (issue #140, post-upgrade hook regression on prov #21): bumps
-      # keycloakConfigCli.availabilityCheck.timeout 600s → 900s + adds
-      # cleanupAfterFinished (1h TTL) so stale hook Pods don't race
-      # before-hook-creation deletes on subsequent upgrades. Coupled with
-      # the install/upgrade timeout bump below (15m → 30m) so Helm's
-      # outer hook-wait accommodates the inner 15m availability window.
-      # 1.4.3 (issue #129): bumped keycloakConfigCli.availabilityCheck.timeout
-      # 120s → 600s + backoffLimit 1 → 5 (fresh-install wedge).
-      version: 1.4.5
+      version: 1.4.0
      sourceRef:
        kind: HelmRepository
        name: bp-keycloak
@ -75,53 +51,21 @@ spec:
  # 100+ Liquibase changesets). Helm install completes when manifests
  # apply; downstream dependsOn checks Ready=True independently.
  # Replaces PR #221 spec.timeout: 15m.
-  #
-  # 15m → 30m bump (issue #140, post-upgrade hook regression on prov #21):
-  # `disableWait: true` skips Pod-Ready waits but does NOT skip Helm hook
-  # waits — Helm always blocks on hook-Pod completion bounded by this
-  # timeout. The bp-keycloak chart's keycloak-config-cli post-install/
-  # post-upgrade hook now has an inner availabilityCheck.timeout of 900s
-  # (15m), and on chart-roll-triggered upgrades the keycloak StatefulSet
-  # rolling-restart + Liquibase re-validation can consume that full
-  # window. 30m gives Helm room to wait out one full inner attempt plus
-  # exponential backoff if needed, without blowing past Flux's HR timer.
-  # If you bump availabilityCheck.timeout further, bump THIS too.
-  # remediation.retries 3 → 1 (issue #146, prov #21+#22 hung 30+ min):
-  # Flux HR remediation does a full Helm uninstall+reinstall on each retry,
-  # losing all hook-Pod state and restarting Liquibase from zero. With 3
-  # retries × 30m HR timeout = up to 90m of wasted work before Flux gives
-  # up. The keycloak-config-cli Job already retries internally via
-  # `backoffLimit: 5` (set in chart values.yaml) — Job-level backoff
-  # preserves Keycloak's state and only re-runs the realm-import sidecar.
-  # HR-level remediation is reserved for genuine release-failure (e.g.
-  # invalid manifest) where a clean reinstall is the right answer; one
-  # retry is sufficient for that. Job-level vs. HR-level retry is the
-  # correct separation per the bitnami subchart's design.
  install:
    disableWait: true
-    timeout: 30m
+    timeout: 15m
    remediation:
-      retries: 1
+      retries: 3
  upgrade:
    disableWait: true
-    timeout: 30m
+    timeout: 15m
    remediation:
-      retries: 1
-  # Per-Sovereign overrides — issue #387 + #604 + qa-loop iter-12 Fix #53A:
+      retries: 3
+  # Per-Sovereign overrides — issue #387 + #604:
  # Wire the per-Sovereign hostname into the HTTPRoute template and
  # sovereign realm ConfigMap (catalyst-ui redirect URIs). The HTTPRoute
  # attaches to cilium-gateway/kube-system installed by 01-cilium.yaml.
-  #
-  # sovereignRealm.name: per `feedback_no_mvp_no_workarounds.md` target-state
-  # rule, each Sovereign owns its KC realm named after the tenant short-name.
-  # The bootstrap-kit Kustomization's postBuild.substitute supplies
-  # SOVEREIGN_REALM_NAME (canonical: first label of SOVEREIGN_FQDN, e.g.
-  # `omantel` for omantel.biz). When unset the envsubst rule
-  # ${VAR:=default} resolves to "sovereign" — backward-compat with
-  # overlays that haven't been migrated.
  values:
    sovereignFQDN: ${SOVEREIGN_FQDN}
-    sovereignRealm:
-      name: ${SOVEREIGN_REALM_NAME:=sovereign}
    gateway:
      host: auth.${SOVEREIGN_FQDN}
--- a/clusters/_template/bootstrap-kit/10-gitea.yaml
+++ b/clusters/_template/bootstrap-kit/10-gitea.yaml
@ -30,8 +30,6 @@ kind: HelmRelease
 metadata:
  name: bp-gitea
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "10"
 spec:
  interval: 15m
  releaseName: gitea
@ -54,7 +52,7 @@ spec:
      # bp-self-sovereign-cutover Step 1 gitea-mirror Job mounts it. K8s
      # forbids cross-namespace secretKeyRef; reflector is the canonical
      # platform-level mirror. Caught live on otech103 2026-05-04.
-      version: 1.2.7
+      version: 1.2.5
      sourceRef:
        kind: HelmRepository
        name: bp-gitea
@ -65,12 +63,10 @@ spec:
  # checks Ready=True independently. Replaces PR #221 spec.timeout: 15m.
  install:
    disableWait: true
-    timeout: 15m
    remediation:
      retries: 3
  upgrade:
    disableWait: true
-    timeout: 15m
    remediation:
      retries: 3
  values:
@ -83,16 +79,3 @@ spec:
    # cilium-gateway from clusters/_template/bootstrap-kit/01-cilium.yaml.
    gateway:
      host: gitea.${SOVEREIGN_FQDN}
-    # DoD D25 (t129 2026-05-16): override the chart's baked dev hostname
-    # `gitea.catalyst.local` so the Gitea Web UI renders the LIVE
-    # Sovereign FQDN in pageData.appUrl, clone URLs, and internal links.
-    # Without this every Sovereign's Gitea page told the operator to
-    # clone from `gitea.catalyst.local` (which public DNS can't resolve),
-    # breaking the canonical "Sovereign-local Git server" contract that
-    # bp-self-sovereign-cutover relies on.
-    gitea:
-      gitea:
-        config:
-          server:
-            DOMAIN: gitea.${SOVEREIGN_FQDN}
-            ROOT_URL: https://gitea.${SOVEREIGN_FQDN}
--- a/clusters/_template/bootstrap-kit/11-powerdns.yaml
+++ b/clusters/_template/bootstrap-kit/11-powerdns.yaml
@ -70,8 +70,6 @@ kind: HelmRelease
 metadata:
  name: bp-powerdns
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "11"
 spec:
  interval: 15m
  timeout: 15m
@ -102,29 +100,7 @@ spec:
      # provisioning paths operative.
      # 1.2.1: zone-bootstrap Job needs /tmp emptyDir (readOnlyRootFS+
      # curl -o /tmp/zone-resp). Caught live on otech103 2026-05-04.
-      # 1.2.2 (issue #144): zone-bootstrap Job activeDeadlineSeconds
-      # raised 300s → 840s. Cold Sovereign on prov #22 had bp-cnpg
-      # still synthesising the `pdns-pg-app` Secret when this Job
-      # ran; powerdns Pod was not Ready, curl against
-      # http://powerdns:8081 looped, Job hit 5m DeadlineExceeded,
-      # Helm post-install hook failed, HR FAILED 4× → terminal.
-      # New deadline (14m) sits below the HR install.timeout cap of
-      # 15m so Flux's remediation can still reclaim a true failure.
-      # 1.2.3 (Fix #144-followup, prov #37+#38 recurrence 2026-05-12):
-      # bumping activeDeadlineSeconds alone was insufficient — the Job
-      # hit BackoffLimitExceeded (NOT DeadlineExceeded) at ~10min
-      # because each container invocation curl'd a Service with empty
-      # Ready endpoints (powerdns Pods Pending behind a worker-capacity
-      # wedge that kept bp-cnpg's pdns-pg-1-initdb itself Pending).
-      # Container restartPolicy=OnFailure + backoffLimit=6 killed the
-      # Job long before activeDeadlineSeconds had any effect. Fix moves
-      # the wait-for-API loop INSIDE the container (restartPolicy=Never,
-      # bounded by new apiReadyTimeoutSeconds=600s) so one Pod owns
-      # the full 14m budget. Trace: in chroot prov #38, HR status
-      # message read "Helm install failed for release powerdns/powerdns
-      # with chart bp-powerdns@1.2.2: failed post-install: 1 error
-      # occurred: * job powerdns-zone-bootstrap failed: BackoffLimitExceeded".
-      version: 1.2.3
+      version: 1.2.1
      sourceRef:
        kind: HelmRepository
        name: bp-powerdns
@ -139,12 +115,10 @@ spec:
  # cleanly; runtime convergence (powerdns pods becoming Ready once
  # CNPG lands) is observed via kubectl, not gated on Helm.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/12-external-dns.yaml
+++ b/clusters/_template/bootstrap-kit/12-external-dns.yaml
@ -44,8 +44,6 @@ kind: HelmRelease
 metadata:
  name: bp-external-dns
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "12"
 spec:
  interval: 15m
  releaseName: external-dns
@ -73,12 +71,10 @@ spec:
  # slow-Ready cascade. Helm install completes when manifests apply.
  # Replaces PR #221 spec.timeout: 15m.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/13-bp-catalyst-platform.yaml
+++ b/clusters/_template/bootstrap-kit/13-bp-catalyst-platform.yaml
@ -34,13 +34,6 @@ kind: HelmRelease
 metadata:
  name: bp-catalyst-platform
  namespace: flux-system
-  labels:
-    # slot encodes bootstrap-kit install order; component=catalyst-platform
-    # overrides the default Phase 1 mapping so this HR lands in
-    # Phase 3 (Sovereign Live) on the openova-flow canvas — once
-    # Ready=True the Sovereign is fully self-sufficient.
-    catalyst.openova.io/slot: "13"
-    catalyst.openova.io/component: catalyst-platform
 spec:
  interval: 15m
  releaseName: catalyst-platform
@ -62,16 +55,6 @@ spec:
    # the umbrella install, eliminating the race.
    - name: bp-keycloak
    - name: bp-cnpg
-    # bp-crossplane-claims (chart-roll-rca iter-15, 2026-05-10): owns the
-    # access.openova.io/v1alpha1 XRD that qa-fixtures UserAccess CRs
-    # require. Without this dep, slot 13 races slot 14 and the umbrella
-    # upgrade fails admission with `no matches for kind "UserAccess" in
-    # version "access.openova.io/v1alpha1"`. The release Secret then
-    # enters `pending-upgrade` and waits the full 15m timeout × 3 retries
-    # before any operator-visible failure (the 2026-05-10 omantel.biz
-    # 90-min wedge). With this edge, the chart never enters the failing
-    # state on a fresh roll.
-    - name: bp-crossplane-claims
  chart:
    spec:
      chart: bp-catalyst-platform
@ -380,230 +363,7 @@ spec:
      # EnsureOrg / EnsureRepo blocking qa-wp Application reconcile.
      # bootstrap-kit qaFixtures.cnpgPairName default qa-cnpg → qa-cnpgpair
      # so TC-306's "cnpgpair" substring assertion passes.
-      # 1.4.123 (qa-loop iter-12 Fix #53A): triggers catalyst-api StatefulSet
-      # restart so it picks up the new CATALYST_KC_REALM=omantel value from
-      # the bp-keycloak 1.5.0 mirrored Secret (realm-rename target-state).
-      # 1.4.127 (qa-loop iter-12 Fix #54 Workstream 4): chart-side
-      # templates/catalyst-gitea-token-secret.yaml + post-install Job
-      # auto-mints the Gitea PAT into catalyst-gitea-token (replaces
-      # kubectl-applied operational hack).
-      # 1.4.133 (qa-loop iter-1 prefetch Fix #113, prov #9 wedge):
-      # qa-fixtures Kyverno disallow-privileged-containers exclusion
-      # list now includes `catalyst` namespace so the registry-pivot
-      # DaemonSet shipped by bp-self-sovereign-cutover (which legitimately
-      # needs `securityContext.privileged: true` to rewrite
-      # /etc/rancher/k3s/registries.yaml on every node) is not blocked
-      # by the validating admission webhook. Without this, prov #9
-      # bp-self-sovereign-cutover HR went Ready=False and bp-catalyst-
-      # platform never reached Ready → console.<sov> Ingress never
-      # materialised → iter-1 was unrunnable.
-      # 1.4.134 (qa-loop iter-1 prefetch Fix #114, prov #9 unwedge):
-      # New pre-install hook Job (qa-finalizer-strip, weight -99)
-      # strips orphaned controller finalizers off Application /
-      # Organization / Environment / UserAccess CRs in the qa-
-      # namespace + force-finalizes the namespace itself if it's
-      # stuck Terminating. Breaks the rollback-orphan finalizer
-      # deadlock that left prov #9 in an unrecoverable install loop:
-      #   1. install creates qa-omantel ns + Application + controllers
-      #      in same pass (no hook ordering)
-      #   2. qa-cnpg-backup-s3-seed post-install hook stalls 15m
-      #   3. cleanupOnFail rolls back, killing controllers BEFORE they
-      #      can process Application's deletion finalizer
-      #   4. qa-omantel ns wedged in Terminating; no controller exists
-      #   5. retry: "namespace is being terminated" → seed Job RBAC
-      #      creation rejected → 15m hook timeout → loop forever.
-      # This Job runs at the very start of every install attempt and
-      # guarantees a clean slate.
-      # 1.4.136 (qa-loop iter-1 Fix #124, secondary Fix #122): convert
-      # catalyst-gitea-token bootstrap from post-install to pre-install
-      # hook so catalyst-catalog + catalyst-organization-controller
-      # (which validate non-empty CATALYST_GITEA_TOKEN at startup) see
-      # a populated Secret at first container start. Prior post-install
-      # ordering caused chicken-and-egg deadlock: Deployments crashed
-      # because Secret was empty; mint Job ran AFTER Deployments,
-      # exponential back-off blew past Helm's 15m install timeout,
-      # remediation looped forever. Pre-install hook (weight=10) now
-      # populates the Secret (weight=5) BEFORE any consumer Deployment
-      # rolls. See Chart.yaml top comment for the full diagnostic chain.
-      # 1.4.135 (qa-loop bounded-provision-cycle Fix #119): sanitize
-      # illegal `/` in qa-fixtures Continuum mirror label value. Prov
-      # #10 wedge — helm install crashed on Continuum CR validation
-      # because the Fix #102 platform-mirror label
-      # `openova.io/continuum-mirror-of: <ns>/<name>` violates k8s
-      # label-value spec (`/` forbidden in values, allowed only in
-      # keys as the prefix separator). Split into two valid labels:
-      # `openova.io/continuum-mirror-of-namespace` +
-      # `openova.io/continuum-mirror-of-name`. Unblocks prov #11+.
-      # 1.4.138 (qa-loop iter-1 Fix #138, prov #20 wedge): converts
-      # qa-fixtures qa-cnpg-backup-s3-seed + qa-cnpg-status-seed Jobs
-      # from post-install hooks → regular release resources. Resolves
-      # the circular bootstrap-kit DAG (this slot 13 install hook needed
-      # bp-seaweedfs slot 18 to be Ready, which couldn't happen until
-      # this HR was Ready). bp-catalyst-platform install now completes
-      # in ~5 min instead of timing out at 15 min then loop-rolling back.
-      # 1.4.137: deploy-bot auto-bump (no template changes).
-      # 1.4.139 (Fix #163, 2026-05-11, MIRROR-EVERYTHING): every
-      # chart-hook image reference in this Blueprint uses the explicit
-      # harbor.openova.io/proxy-dockerhub prefix per CLAUDE.md
-      # inviolable rule. SBOM-auditable, no functional change.
-      # 1.4.140 (qa-loop Wave 27 Fix #184, prov #33 wedge, 2026-05-11):
-      # catalyst-gitea-token-mint pre-install hook Gitea-API wait loop
-      # raised from hardcoded 60×5s (300s = 5m) to values-driven knob
-      # (giteaWait.iterations × giteaWait.intervalSeconds, default
-      # 168×5 = 840s = 14m). Covers the autoscaler-hcloud cold-start
-      # observed on multi-region prov #33: workerCount=0 (Fix #157
-      # sizing) means the autoscaler must spawn a worker in fsn1/hel1
-      # before bp-gitea's Pod can schedule, which takes 10-15m on a
-      # fresh provision. Pre-Fix #184 budget (300s) always expired
-      # before gitea was reachable → bp-catalyst-platform installFailed
-      # and HR loop-rolled forever. Budget arithmetic: hook 840s + 60s
-      # slack ≤ HR install.timeout 900s (15m).
-      # 1.4.141 (qa-loop Fix #185, prov #38/#39/#41 recurrence,
-      # 2026-05-12): qa-finalizer-strip pre-install hook (helm.sh/hook-
-      # weight -99) now tolerates the control-plane NoSchedule taint
-      # and runs with priorityClassName: system-cluster-critical so it
-      # is ALWAYS schedulable regardless of worker-node CPU saturation.
-      # Root cause on prov #41: after bootstrap-kit fan-out the worker
-      # (cpx32, 8vCPU/16GB) sat at 99% CPU requests; the autoscaler
-      # had backed off scale-up of a second worker; the Job's 50m CPU
-      # request couldn't be satisfied; Helm pre-install timed out at
-      # 15m; Flux remediated 3× and gave up. Same recurring failure on
-      # prov #38, #39, #41 — all on chart pin 1.4.140 which (correctly)
-      # had no scheduling concession for the -99 hook. Image switched
-      # from bitnamilegacy/kubectl:1.29.3 → alpine/k8s:1.31.4 in same
-      # commit (rule-17 MIRROR-EVERYTHING hygiene; bitnamilegacy is
-      # the Docker-Hub redirect for deprecated Bitnami 2025-08 cutover).
-      # 1.4.147 (D31 wordpress-tenant activeHotStandby + D21 owner auto-seed):
-      # - PR #1562 wires bp-cnpg-pair Primary+Replica pattern into
-      #   wordpress-tenant chart via pg.activeHotStandby knob
-      # - PR #1564 baked into catalyst-api:8d2a947 — handover now
-      #   auto-seeds the operator's UserAccess CR (D21 zero-touch)
-      # 1.4.146 (D29 billing internal JWT bypass for public routes):
-      # - PR #1561 mirrors PR #1559's gateway public routes in the billing
-      #   service's own JWT middleware. Without this, the gateway passed
-      #   through but billing still 401-d.
-      # 1.4.145 (D29 gateway public routes for redeem flow):
-      # - PR #1559 makes /api/billing/{vouchers/redeem-preview,plans,addons}
-      #   public so the marketplace /redeem?code=XXX landing can validate
-      #   codes without auth (the entire D29 voucher-redeem zero-touch
-      #   flow is broken without this)
-      # 1.4.144 (D27 admin tag override + D28 voucher email wire):
-      # - PR #1557 decouples admin tag from smeTag bundle (admin image
-      #   may not publish for every SME services CI SHA — caught t132
-      #   2026-05-16 with admin:b0ed216 stuck in ImagePullBackOff)
-      # - PR #1556 adds the billing→notification wire so the voucher
-      #   issuance flow emails the recipient (D28 zero-touch contract)
-      #
-      # 1.4.148 (D16 + D17 + D27 founder-flagged bug fixes, t139 verify cycle):
-      # - PR #1583: D16 /cloud nodes multi-cluster fan-out + handover
-      #   export retry/reorder/auth-bypass (catalyst-api 2ab8a0e)
-      # - PR #1584: D27 catalog fresh-seed Published=true default
-      #   (sme services catalog 964dc15)
-      # - PR #1585: D17 /app/$componentId route-collision fix (catalyst-ui 2ab8a0e)
-      # Caught on t136/t138 fresh-prov runs that bootstrap-kit was
-      # still pinned to 1.4.147 → none of the fixes reached the chroot.
-      # 1.4.153 — D17 Wave-1 Family A: /cloud?view=list&kind=<X>
-      # no longer drifts to /dashboard (kind-alias map in
-      # router.tsx validateSearch). Caught on t10.omantel.biz
-      # test agents E/C2 2026-05-17.
-      # 1.4.155 — Wave 5 UX polish (founder review 2026-05-17):
-      # - Sidebar reorder: Dashboard → Cloud → Apps → Jobs → Users →
-      #   BSS → Settings (operator mental model: overview → infra →
-      #   workloads → ops → access → commerce → config).
-      # - BSS icon swapped from bespoke receipt glyph to briefcase
-      #   line-glyph matching the rest of the icon family.
-      # - Marketplace toggle moved off Settings sub-nav + standalone
-      #   /settings/marketplace page INTO SettingsPage as a
-      #   <SectionCard id="marketplace"> anchor section (same pattern
-      #   as #dns, #sovereign, #notifications). MarketplaceSettings.tsx
-      #   page deleted; MarketplaceSection.tsx new inner component;
-      #   /settings/marketplace route + sidebar sub-nav child removed.
-      #   Old URL now 404s — operators click Settings then scroll to
-      #   the Marketplace anchor.
-      # - Save flow UNCHANGED: POST /api/v1/sovereigns/{id}/marketplace
-      #   still commits per-Sovereign overlay to GitOps repo, Flux
-      #   reconciles ~1 min.
-      #
-      # 1.4.154 — Wave 2 collector PR. Bundles 6 Fix-Author PRs that
-      # landed AFTER the 1.4.153 Wave-1 roll, all from the same t10
-      # test sweep:
-      # - #1598 Family F: BSS menu in Sovereign Console
-      #   (Billing/Orders/Revenue/Vouchers/Tenants iframe-embed of
-      #   marketplace.<fqdn>/back-office/*). Founder bug #1.
-      # - #1599 Family D: dashboard treemap fan-out for cluster /
-      #   region / vcluster / family + Layer-1 cluster default.
-      #   Founder bug #2.
-      # - #1600 Family C: ResourceDetailPage real-data rewrite —
-      #   per-kind summary, owner chain, navigate (not assign).
-      #   Founder bug #5.
-      # - #1601 Family G: 6 singletons — hcloud-volumes StorageClass
-      #   (C9-006), /fleet/applications aggregator (C10-002),
-      #   secondary install-* Job bridge backfill (C10-003), legacy
-      #   wildcard-tls cert cleanup (C7-007), D22 settings em-dash
-      #   placeholder lift (C8-001), /jobs region filter (C8-005).
-      # - #1602 Family E: Compliance UI — Falco runtime alerts +
-      #   SBOM/CVE tab + framework filter chip strip + policy
-      #   drilldown live-cluster fallback + PolicyReport /
-      #   ClusterPolicyReport list kinds (C11-003/005/006/007/008/
-      #   009/010).
-      # - #1603 Family B: AppDetail HR-overlay status sync +
-      #   Resources/Logs tab namespace+label fix (HR.spec.target-
-      #   Namespace + chart-name label) + "Bootstrap blueprint"
-      #   chip for bp-* (founder bug #4, C4-003/004/005/007/013).
-      # 1.4.163 (Wave 16 collector, 2026-05-18): republishes the chart
-      # OCI artifact so it actually contains every chart-template change
-      # merged after the 1.4.162 publish (commit 0ad78790). Without the
-      # republish, bootstrap-kit pin 1.4.162 pulls an artifact missing
-      # the new templates and Sovereigns boot with stale chart bytes.
-      # Baked: #1644 tenantPublic HTTPRoute reconciler + #1650
-      # tenantPublic setter on product-install + #1640 Cilium Gateway
-      # per-zone listener pairs + #1654 bp-newapi attestation gate +
-      # sandbox-controller post-handover refinements (D31 HS env vars,
-      # sovereign-fqdn ConfigMap keys, cutover-driver sandboxes RBAC,
-      # values.yaml sovereign.{enableHotStandby,primaryRegion,
-      # replicaRegion} defaults). See Chart.yaml header comment for
-      # the full change list.
-      # 1.4.166 (TBD-E8 / C4-015, 2026-05-18): seed 13 baseline Blueprint
-      # CRs unconditionally so `/api/v1/catalog` returns a non-empty
-      # items[] from handover-time. Pre-fix every fresh Sovereign had
-      # empty catalog because (a) self-sovereign-cutover step-01 only
-      # mirrors `openova-io/openova` into Gitea — not the `catalog` /
-      # `catalog-sovereign` Orgs that catalyst-catalog reads from — and
-      # (b) qa-fixtures (the only chart-shipped Blueprint CRs) defaults
-      # OFF on production. Adds templates/catalog-seed/blueprints.yaml
-      # (bp-wordpress-tenant, bp-cnpg, bp-keycloak, bp-grafana,
-      # bp-prometheus, bp-loki, bp-redis, bp-clickhouse, bp-opensearch,
-      # bp-temporal, bp-n8n, bp-langfuse, bp-llm-gateway) which the
-      # chained catalog client surfaces via in-cluster LIST fallback.
-      # 1.4.168 (TBD-C18b, 2026-05-18): stop clobbering the cutover-minted
-      # Gitea API token. templates/sme-services/provisioning-github-
-      # token.yaml gains a lookup-persistence guard — if the destination
-      # Secret carries annotation `catalyst.openova.io/token-source:
-      # self-sovereign-cutover-step-09` (stamped by Step 09 of bp-self-
-      # sovereign-cutover when it mints the real Gitea API token), the
-      # template preserves the existing GITHUB_TOKEN bytes instead of
-      # mirroring gitea-admin-secret.password over them on every Flux
-      # reconcile. Pre-fix on t22: Step 09 minted a real token at
-      # 13:43:33Z; ~5 min later helm reconcile rewrote GITHUB_TOKEN back
-      # to the admin password byte, so every subsequent SME provisioning
-      # call to Gitea returned 401 "user does not exist" and journey
-      # step 16 (tenant repo creation) silently stuck.
-      # 1.4.179 (TBD-A14/A15/A10b, 2026-05-18): three t24 zero-touch
-      # Wave 36 P1 fresh-prov blockers — see chart Chart.yaml header for
-      # the full diagnostic + fix description per gate.
-      #   - A14 issue #1843: networkpolicies (networking.k8s.io) RBAC
-      #     get/list/watch verbs added to clusterrole-cutover-driver.
-      #   - A15 issue #1844: sovereign-fqdn ConfigMap empty fields
-      #     populated end-to-end via the cloud-init → bootstrap-kit →
-      #     chart substitute chain (configuredRegions / controlPlaneIP /
-      #     primaryRegion / replicaRegion / selfDeploymentId /
-      #     enableHotStandby / qaApplications). This Kustomization gains
-      #     3 new value mappings: global.sovereignSelfDeploymentId,
-      #     sovereign.configuredRegions, sovereign.qaApplications.
-      #   - A10b issue #1845: GET kubeconfig?region=<cloudRegion>
-      #     resolves the slot-suffixed on-disk shape
-      #     `<id>-<region>-<i>.yaml` (handler-side glob fallback).
-      version: 1.4.179
+      version: 1.4.123
      sourceRef:
        kind: HelmRepository
        name: bp-catalyst-platform
@ -622,56 +382,16 @@ spec:
  # specifically for this umbrella chart — every other bp-* chart
  # remains at its previous (or default) timeout because they install
  # in well under 5 minutes empirically.
-  #
-  # chart-roll-rca iter-15 (2026-05-10): timeout reduced 25m → 15m and
-  # remediation hardened with cleanupOnFail + strategy: rollback +
-  # remediateLastFailure. Background: the 25m ceiling existed to absorb
-  # the dep-ordering race RC-1 (qa-fixtures UserAccess CRs rendering
-  # before the bp-crossplane-claims XRD existed). With that race fixed
-  # via the bp-crossplane-claims dependsOn edge above, 15m is plenty for
-  # the umbrella's true install latency on a healthy cluster.
-  # cleanupOnFail purges partial release artifacts on retry; rollback
-  # strategy reverts to the last good release before retrying instead of
-  # leaving the release Secret pinned at `pending-upgrade` for the full
-  # timeout ceiling. Net effect: a failed-then-recoverable upgrade
-  # collapses from ~75m worst case → ~15m worst case.
-  #
-  # post-prov7 fix (2026-05-10, refs chart-roll-rca-iter15): the
-  # HelmRelease v2 schema only allows `cleanupOnFail` and
-  # `remediation.strategy` on the `upgrade` block. The previous version
-  # of this file placed both fields on the `install` block as well,
-  # which caused the bootstrap-kit Kustomization to fail dry-run on a
-  # fresh Sovereign with `field not declared in schema`, blocking ALL
-  # HRs from rendering. The install block here keeps only the schema-
-  # legal fields (`retries`, `remediateLastFailure`); rollback semantics
-  # apply naturally to upgrades, and a failed first install is
-  # remediated via retry without rollback (no prior release to roll
-  # back to).
-  #
-  # F8 fix (2026-05-12, prov #44 RCA): bumped install + upgrade timeout
-  # 15m → 30m. F1-F7 ship live on main, qa-finalizer-strip Completed
-  # and autoscaler workers joined, but bp-catalyst-platform HR was
-  # still mid-retry (failures=3) at the catalyst-api 60m phase1 watch
-  # cap on d9399223c3caa4f9. Total bootstrap-kit install on a fresh
-  # cpx42×1 Sovereign genuinely exceeds the 15m PR #221 ceiling when
-  # the umbrella chart's full SME + Catalyst service stack rolls
-  # without a warm Harbor proxy-cache. Paired with the F8 catalyst-api
-  # DefaultWatchTimeout bump (60m → 120m) so the outer watch budget
-  # comfortably contains the new 30m × 3-retry inner HR ceiling.
  install:
    disableWait: true
-    timeout: 30m
+    timeout: 25m
    remediation:
      retries: 3
-      remediateLastFailure: true
  upgrade:
    disableWait: true
-    timeout: 30m
+    timeout: 25m
    remediation:
      retries: 3
-      strategy: rollback
-      remediateLastFailure: true
-    cleanupOnFail: true
  # Per-Sovereign overrides for the umbrella — sovereign-FQDN-derived hostnames
  # for console/admin/api. All chart-level Catalyst service config (image refs,
  # OTel endpoints, NATS subjects) lives in products/catalyst/chart/values.yaml.
@ -689,15 +409,6 @@ spec:
      # then short-circuits the glue registration and falls back to plain
      # set_ns (legacy behaviour).
      sovereignLBIP: ${SOVEREIGN_LB_IP}
-      # sovereignSelfDeploymentId — the catalyst-api deployment-record id
-      # this Sovereign was provisioned under on the contabo mothership.
-      # Threaded from cloud-init's SOVEREIGN_DEPLOYMENT_ID Kustomization
-      # postBuild substitute. Consumed by the chart's sovereign-fqdn
-      # ConfigMap `selfDeploymentId` key so the chroot catalyst-api's
-      # GET /api/v1/sovereign/self answers with the correct id at
-      # handover-time (no wait for the orchestrator's chart-values
-      # overlay write). TBD-A15 (t24 zero-touch, 2026-05-18, issue #1844).
-      sovereignSelfDeploymentId: '${SOVEREIGN_DEPLOYMENT_ID:-}'
    ingress:
      hosts:
        console:
@ -727,106 +438,6 @@ spec:
    # zero-touch flow), cloud-init pre-renders this variable to a
    # single-entry array derived from ${sovereign_fqdn}.
    parentZones: ${PARENT_DOMAINS_YAML}
-    # ─── Wildcard cert issuer environment (Fix #123, LE rate-limit) ────
-    # Default-OFF (production LE issuer); flipped to true via envsubst
-    # WILDCARD_CERT_USE_STAGING=true on the per-Sovereign overlay for any
-    # Sovereign that should issue staging-LE certs instead of production.
-    # The qa-loop coordinator pairs this knob with QA_FIXTURES_ENABLED on
-    # QA Sovereigns (omantel.biz and qa.* pools) so the wipe + re-provision
-    # cadence never trips Let's Encrypt's 5-certs/168h production ceiling
-    # per registered domain. Customer Sovereigns leave this empty (=false)
-    # and get real-trusted production certs.
-    #
-    # Staging certs are signed by Fake LE Intermediate X1; browsers
-    # reject without an explicit exception, but `curl -sk` and Playwright
-    # (ignoreHTTPSErrors:true) accept them — sufficient for the qa-loop
-    # Test Executor's contract assertions.
-    #
-    # Per docs/INVIOLABLE-PRINCIPLES.md #4 every Sovereign may flip this
-    # independently; the chart values.yaml carries the staging issuer
-    # name (`letsencrypt-dns01-staging-powerdns`, shipped by
-    # bp-cert-manager-powerdns-webhook 1.1.0+) as an overridable default.
-    wildcardCert:
-      useStaging: ${WILDCARD_CERT_USE_STAGING:-false}
-    # ─── Sovereign-side region seeding (DoD D5) ─────────────────────
-    # regionsJson — JSON-array literal of the canonical multi-region
-    # RegionSpec[] this Sovereign was provisioned with. Threaded
-    # through from the mothership prov body via the tofu cloud-init
-    # `SOVEREIGN_REGIONS_JSON` envsubst placeholder. The chart writes
-    # this string into the `sovereign-fqdn` ConfigMap's `regionsJson`
-    # key (sovereign-fqdn-configmap.yaml); the catalyst-api Pod reads
-    # via env `SOVEREIGN_REGIONS_JSON`; chrootEnsureDeployment parses
-    # and stamps Request.Regions so /infrastructure/topology emits
-    # the right per-region tree and /cloud?view=graph renders all
-    # N regions correctly. Without this the chroot fell back to the
-    # live-Nodes path and emitted "1 cluster 1 region" on every
-    # multi-region Sovereign (caught on t126, 2026-05-16).
-    sovereign:
-      # MUST be quoted: SOVEREIGN_REGIONS_JSON contains valid JSON like
-      # `[{"cloudRegion":"hel1",...}]`. Without quotes, YAML interprets
-      # the JSON as a YAML flow-sequence-of-flow-mappings, parses into
-      # `[]map[string]interface{}`, then Helm's chart template `{{ .Values.
-      # sovereign.regionsJson }}` stringifies via Go's `%v` printf —
-      # producing `[map[cloudRegion:hel1 ...]]` (Go map syntax, NOT JSON).
-      # The chroot's chrootRegionsFromEnv then can't json.Unmarshal it →
-      # falls back to live-Nodes path → /cloud renders "1 region 1 cluster"
-      # on every multi-region Sovereign. Caught on t131 2026-05-16.
-      # Single-quoted so embedded double-quotes in the JSON are literal.
-      regionsJson: '${SOVEREIGN_REGIONS_JSON:-}'
-      # ─── D22 (settings empty values) sovereign-side identity ──────────
-      # ORG_EMAIL / ORG_NAME / SOVEREIGN_CONTROL_PLANE_IP / GITOPS_REPO_URL
-      # threaded from cloud-init (provisioner.go::writeTfvars + Hetzner
-      # tofu cloudinit-control-plane.tftpl). Chart's sovereign-fqdn
-      # ConfigMap exposes these as keys; catalyst-api reads via env in
-      # api-deployment.yaml (PR #1569); chrootEnsureDeployment populates
-      # the deployment record so Sovereign Console Settings page renders
-      # real ownerEmail/region/controlPlaneIP/gitopsRepoURL/consoleURL
-      # instead of `—` placeholders. Empty default = same as today,
-      # backwards-compatible for charts that don't have the cloud-init
-      # placeholders wired yet.
-      orgEmail: '${ORG_EMAIL:-}'
-      orgName: '${ORG_NAME:-}'
-      controlPlaneIP: '${SOVEREIGN_CONTROL_PLANE_IP:-}'
-      gitopsRepoURL: '${GITOPS_REPO_URL:-}'
-      # ─── D31 active-hot-standby (cross-region CNPG) ──────────────────
-      # Sovereign-level opt-in for the active-hot-standby Postgres shape
-      # on every CNPG-backed tenant app the marketplace installs.
-      # Default-OFF — every Sovereign that has not flipped
-      # SOVEREIGN_ENABLE_HOT_STANDBY=true on the per-Sovereign overlay
-      # keeps rendering single-Cluster CNPG (no regression). When ON
-      # AND both region keys are non-empty AND distinct, the SME-tenant
-      # gitops writer injects pg.activeHotStandby.* into every fresh
-      # bp-wordpress-tenant HelmRelease so the chart's
-      # cnpg-cluster.yaml template renders a primary + replica
-      # Cluster.postgresql.cnpg.io pair across the two regions, WAL
-      # streaming over Cilium ClusterMesh (DoD D11 + D31). Same wiring
-      # extends to any future tenant product chart (gitlab-tenant,
-      # nextcloud-tenant) that adopts the same value contract.
-      #
-      # Region keys MUST match the canonical openova.io/region node
-      # label value (e.g. `hz-fsn-rtz-prod`, `hz-hel-rtz-prod`) — the
-      # WordPress chart's cnpg-cluster.yaml uses nodeAffinity on that
-      # label to pin the primary + replica Pods to the right regions.
-      enableHotStandby: '${SOVEREIGN_ENABLE_HOT_STANDBY:-}'
-      primaryRegion: '${SOVEREIGN_PRIMARY_REGION:-}'
-      replicaRegion: '${SOVEREIGN_REPLICA_REGION:-}'
-      # configuredRegions — YAML list of region keys this Sovereign was
-      # provisioned with (e.g. ["fsn1", "hel1"]). Threaded from cloud-init's
-      # SOVEREIGN_CONFIGURED_REGIONS_YAML Kustomization postBuild substitute
-      # which the tofu module renders as a YAML inline list literal from
-      # var.regions[*].cloudRegion. The chart's sovereign-fqdn ConfigMap
-      # joins this list into a comma-separated `configuredRegions` key for
-      # the catalyst-ui Dashboard SovereignCard + Networking → ClusterMesh
-      # tab to render configured-but-not-active chips. Defaults to empty
-      # list so non-multi-region Sovereigns surface only their live region.
-      # TBD-A15 (t24 zero-touch, 2026-05-18, issue #1844).
-      configuredRegions: ${SOVEREIGN_CONFIGURED_REGIONS_YAML:-[]}
-      # qaApplications — YAML list of qa-fixtures applicationRef literals
-      # the chroot Sovereign's /compliance/scorecard surface emits via
-      # appRefs[]. Default empty so production Sovereigns surface only
-      # PolicyReport-observed apps. QA Sovereigns set via QA_APPLICATIONS_YAML.
-      # TBD-A15 (t24 zero-touch, 2026-05-18, issue #1844).
-      qaApplications: ${QA_APPLICATIONS_YAML:-[]}
    # ─── QA fixtures (qa-loop iter-6 Cluster-F + EPIC-6 iter-6) ────────
    # Default-OFF on production; flipped to true via envsubst
    # QA_FIXTURES_ENABLED=true on the per-Sovereign overlay for any
--- a/clusters/_template/bootstrap-kit/14-crossplane-claims.yaml
+++ b/clusters/_template/bootstrap-kit/14-crossplane-claims.yaml
@ -28,8 +28,6 @@ kind: HelmRelease
 metadata:
  name: bp-crossplane-claims
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "14"
 spec:
  interval: 15m
  releaseName: crossplane-claims
@ -46,20 +44,7 @@ spec:
  chart:
    spec:
      chart: bp-crossplane-claims
-      # 1.1.3 (qa-loop iter-16 Fix #71): legacy XUserAccess Composition
-      # gated behind new `userAccess.compositionEnabled` (default false).
-      # The catalyst-useraccess-controller is now the canonical day-2
-      # path; the Composition was setting `Ready=False` on every CR
-      # because (a) provider-kubernetes is not installed and (b) post-EPIC-3
-      # CRs use `tierRoleRef` not `applications[0]`. The composite
-      # controller's status-write was overwriting the controller's
-      # `Ready=True`. Disabling the Composition (and `defaultCompositionRef`)
-      # leaves the controller in sole charge of `useraccesses.access.openova.io`
-      # while the XRD itself stays installed (it owns the CRD the
-      # controller watches).
-      # 1.1.4 (Fix #158): kubectlImage switched from bitnami/kubectl:1.31
-      # (deleted from Docker Hub 2025-08) to bitnamilegacy/kubectl:1.31.4.
-      version: 1.1.5
+      version: 1.0.0
      sourceRef:
        kind: HelmRepository
        name: bp-crossplane-claims
@ -69,12 +54,10 @@ spec:
  # HR on the upstream CRDs being live; disableWait replaces PR #221's
  # blanket spec.timeout: 15m band-aid.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/15-external-secrets.yaml
+++ b/clusters/_template/bootstrap-kit/15-external-secrets.yaml
@ -41,8 +41,6 @@ kind: HelmRelease
 metadata:
  name: bp-external-secrets
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "15"
 spec:
  interval: 15m
  releaseName: external-secrets
@ -59,7 +57,7 @@ spec:
  chart:
    spec:
      chart: bp-external-secrets
-      version: 1.1.1
+      version: 1.1.0
      sourceRef:
        kind: HelmRepository
        name: bp-external-secrets
@ -68,12 +66,10 @@ spec:
  # dependsOn is the gate, not Helm timeout). Replaces blanket
  # spec.timeout: 15m band-aid pattern from PR #221, removed in PR #250.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/15a-external-secrets-stores.yaml
+++ b/clusters/_template/bootstrap-kit/15a-external-secrets-stores.yaml
@ -34,8 +34,6 @@ kind: HelmRelease
 metadata:
  name: bp-external-secrets-stores
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "15a"
 spec:
  interval: 15m
  releaseName: external-secrets-stores
@ -50,29 +48,7 @@ spec:
  chart:
    spec:
      chart: bp-external-secrets-stores
-      # 1.0.3 (issue #147 / prov #23): bump webhookGate.timeoutSeconds
-      # 300 -> 600. Fix #141's 300s budget raced on prov #23
-      # (`3ea80c75e1568a5c`, 3rd consecutive FAIL of this HR) where
-      # cold-node image pull (+60-120s, no warmed cache) compounded on
-      # cert-manager's own earlier retry latency, pushing total webhook
-      # convergence past 300s. 600s = realistic max cold-start budget.
-      #
-      # 1.0.2 (issue #141 / prov #21): bump webhookGate.timeoutSeconds
-      # 60 -> 300. Fix #137's 60s budget raced on prov #21
-      # (`f84f6c3ff2b60296`, HR FAILED `failed pre-install: timed out`)
-      # where webhook convergence took 75-105s on slow Hetzner cold-start.
-      #
-      # 1.0.1 (issue #137 / prov #20): pre-install hook gates the
-      # ClusterSecretStore apply on the upstream ESO admission webhook
-      # actually being dial-able (Pod-Ready ≠ Endpoints-populated +
-      # cert-manager Cert mounted + CABundle injected). Without it the
-      # HR FAILED with `exceeded max retries` on cold-cluster provisions
-      # even though dependsOn (bp-external-secrets) was satisfied.
-      #
-      # 1.0.4 (Fix #158): webhookGate hook image switched from
-      # bitnami/kubectl:1.30.4 (deleted from Docker Hub 2025-08) to
-      # bitnamilegacy/kubectl:1.30.7.
-      version: 1.0.5
+      version: 1.0.0
      sourceRef:
        kind: HelmRepository
        name: bp-external-secrets-stores
@ -81,11 +57,9 @@ spec:
  # dependsOn is the gate, not Helm timeout).
  install:
    disableWait: true
-    timeout: 15m
    remediation:
      retries: 3
  upgrade:
    disableWait: true
-    timeout: 15m
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/16-cnpg.yaml
+++ b/clusters/_template/bootstrap-kit/16-cnpg.yaml
@ -44,8 +44,6 @@ kind: HelmRelease
 metadata:
  name: bp-cnpg
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "16"
 spec:
  interval: 15m
  releaseName: cnpg
@ -57,33 +55,17 @@ spec:
  chart:
    spec:
      chart: bp-cnpg
-      version: 1.0.1
+      version: 1.0.0
      sourceRef:
        kind: HelmRepository
        name: bp-cnpg
        namespace: flux-system
-  # CNPG: KEEP Helm wait (disableWait: false / default). Consumers
-  # bp-harbor + bp-powerdns + bp-keycloak + bp-gitea apply
-  # postgresql.cnpg.io/v1.Cluster CRs gated by the cnpg mutating webhook
-  # `mcluster.cnpg.io`. If bp-cnpg's HelmRelease goes Ready before the
-  # cnpg-webhook-service has endpoints, Flux dependsOn lets downstream
-  # HRs proceed → their Cluster CR apply gets:
-  #   "failed calling webhook \"mcluster.cnpg.io\": no endpoints
-  #    available for service \"cnpg-webhook-service\""
-  # → Helm install fails → RetriesExceeded → entire DB-backed chain
-  # (Harbor/PowerDNS/Keycloak/Gitea) wedges. Caught on prov #55/#56
-  # (2026-05-12). disableWait: false (the default) tells Helm to block
-  # the HR's Ready until the webhook deployment is rolled and the
-  # service has endpoints, which is exactly what downstream consumers
-  # need. This is the carve-out from the INVIOLABLE-PRINCIPLES #3
-  # event-driven blanket — the rule's WHY (avoiding agent-waits-for-
-  # its-own-CRDs cilium-style deadlock) does NOT apply here because
-  # bp-cnpg's CRDs are loaded by helm-controller before pods schedule.
+  # Event-driven install per docs/INVIOLABLE-PRINCIPLES.md #3.
  install:
-    timeout: 15m
+    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
+    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/17-valkey.yaml
+++ b/clusters/_template/bootstrap-kit/17-valkey.yaml
@ -36,8 +36,6 @@ kind: HelmRelease
 metadata:
  name: bp-valkey
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "17"
 spec:
  interval: 15m
  releaseName: valkey
@ -48,19 +46,17 @@ spec:
  chart:
    spec:
      chart: bp-valkey
-      version: 1.0.1
+      version: 1.0.0
      sourceRef:
        kind: HelmRepository
        name: bp-valkey
        namespace: flux-system
  # Event-driven install per docs/INVIOLABLE-PRINCIPLES.md #3.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/18-seaweedfs.yaml
+++ b/clusters/_template/bootstrap-kit/18-seaweedfs.yaml
@ -41,8 +41,6 @@ kind: HelmRelease
 metadata:
  name: bp-seaweedfs
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "18"
 spec:
  interval: 15m
  releaseName: seaweedfs
@ -57,24 +55,17 @@ spec:
  chart:
    spec:
      chart: bp-seaweedfs
-      # 1.2.0 — qa-loop Wave 5 Fix #79 Gap B: ships
-      # `seaweedfs-storage` StorageClass (chart-rendered) so PVCs that
-      # default to it (bp-guacamole recordings, future
-      # bp-loki/mimir/tempo cache) bind day-1 on bare-k3s Sovereigns
-      # without waiting for bp-hcloud-csi or the SeaweedFS CSI driver.
-      version: 1.2.0
+      version: 1.1.1
      sourceRef:
        kind: HelmRepository
        name: bp-seaweedfs
        namespace: flux-system
  # Event-driven install per docs/INVIOLABLE-PRINCIPLES.md #3.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/19-harbor.yaml
+++ b/clusters/_template/bootstrap-kit/19-harbor.yaml
@ -67,8 +67,6 @@ kind: HelmRelease
 metadata:
  name: bp-harbor
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "19"
 spec:
  interval: 15m
  releaseName: harbor
@ -101,24 +99,17 @@ spec:
      # live on otech113 2026-05-05 (issue #935 Bug 1) — Step 02 was
      # in CreateContainerConfigError for 11+ retries, blocking
      # cutover indefinitely.
-      version: 1.2.17
+      version: 1.2.15
      sourceRef:
        kind: HelmRepository
        name: bp-harbor
        namespace: flux-system
  # Event-driven install per docs/INVIOLABLE-PRINCIPLES.md #3.
-  # timeout: 15m — Harbor's post-install hooks (DB migration, job-service
-  # init) legitimately need >5m on cold k3s. Same canonical-seam pattern
-  # as Fix #127 (cutover), Fix #131 (gitea), Fix #143 (es-stores):
-  # explicit HR-level timeout overrides Helm's 5m default which expires
-  # before Harbor reaches Ready (prov #24 c776423270f4ae30 04:17 incident).
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/19a-bp-sandbox.yaml
+++ b/clusters/_template/bootstrap-kit/19a-bp-sandbox.yaml
@ -1,151 +0,0 @@
-# bp-sandbox — Catalyst bootstrap-kit Blueprint slot 19a (post-harbor).
-#
-# Deploys the sandbox-controller (Wave 1 + Wave 8) on a Sovereign so
-# that `sandbox.openova.io/v1.Sandbox` CRs are actually reconciled.
-# Wave 8 extends the controller to ALSO render per-Sandbox pty-server
-# StatefulSet + MCP Deployment + Service + HTTPRoute (architecture.md
-# §7) — without this slot enabled, every Sandbox CR sits unreconciled.
-#
-# ─── Slot history: 61 → 19a (Wave 11 convergence fix, 2026-05-18) ────
-# Originally slot 61. Caught live on t16.omantel.biz: bp-sandbox HR
-# stuck Reconciling because its chart pull went through
-# harbor.<sov-fqdn> (bp-self-sovereign-cutover Step-06 phase-1 rewrites
-# every HelmRepository URL `oci://ghcr.io/openova-io` →
-# `oci://harbor.<sov-fqdn>/openova-io` after handover), but harbor.<sov
-# -fqdn> wasn't reachable yet because bp-harbor itself hadn't reached
-# Ready — chicken-and-egg. Same failure shape as Wave 7 #1610 with
-# bp-hcloud-csi (REMOVED — see kustomization.yaml comment block).
-#
-# Fix here is the cleaner long-term cousin of the Wave 7 hotfix:
-# instead of removing the slot, sequence it AFTER bp-harbor (slot 19)
-# by renumbering to 19a + adding `bp-harbor` to dependsOn. Once
-# bp-harbor is Ready (its chart pull goes through harbor.openova.io,
-# the mothership-warmed proxy-cache wired into k3s registries.yaml at
-# cloud-init time — NOT through harbor.<sov-fqdn>, so no cycle there),
-# this slot's chart pull can resolve against either ghcr.io
-# (pre-cutover) or harbor.<sov-fqdn> (post-cutover) and find the
-# artifact. The cutover Step-06 phase-1 URL rewrite is safe by then.
-
---
-apiVersion: source.toolkit.fluxcd.io/v1beta2
-kind: HelmRepository
-metadata:
-  name: bp-sandbox
-  namespace: flux-system
-spec:
-  type: oci
-  interval: 15m
-  url: oci://ghcr.io/openova-io
-  secretRef:
-    name: ghcr-pull
---
-apiVersion: helm.toolkit.fluxcd.io/v2
-kind: HelmRelease
-metadata:
-  name: bp-sandbox
-  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "19a"
-    catalyst.openova.io/component: sandbox-controller
-spec:
-  interval: 15m
-  releaseName: sandbox
-  targetNamespace: catalyst-system
-  dependsOn:
-    - name: bp-vcluster-helmrepo
-    - name: bp-catalyst-platform
-    # bp-harbor (slot 19, Wave 11 convergence fix 2026-05-18) — sandbox's
-    # chart pull goes through harbor.<sov-fqdn> after the post-handover
-    # cutover Step-06 phase-1 HelmRepository URL rewrite. Without this
-    # edge, source-controller hits harbor.<sov-fqdn> before bp-harbor
-    # is Ready, the OCI fetch 503s, and bp-sandbox sits Reconciling for
-    # the entire bootstrap-kit timeout window — preventing the umbrella
-    # Kustomization from ever reaching Ready. Same chicken-and-egg as
-    # Wave 7 #1610 (bp-hcloud-csi, REMOVED) but resolved by sequencing
-    # rather than removal so the slot remains available for Wave 11
-    # Sandbox MVP without manual Day-2 add-app re-introduction.
-    - name: bp-harbor
-  chart:
-    spec:
-      chart: sandbox
-      version: 0.1.0
-      sourceRef:
-        kind: HelmRepository
-        name: bp-sandbox
-        namespace: flux-system
-  install:
-    timeout: 10m
-    disableWait: true
-    remediation:
-      retries: 3
-  upgrade:
-    timeout: 10m
-    disableWait: true
-    remediation:
-      retries: 3
-  # Per-Sovereign overlay surface.
-  #
-  # enabled — default-ON via ${SANDBOX_ENABLED:-true} on the
-  # bootstrap-kit Kustomization substitute. Wave 11 convergence fix
-  # (TBD-D11, t22.omantel.biz 2026-05-18): every Sandbox CR sat
-  # unreconciled because the bootstrap-kit Kustomization's substitute
-  # map never wires SANDBOX_ENABLED, so the envsubst resolved to the
-  # `:-false` fallback and the chart skip-rendered the entire
-  # controller Deployment. With Wave 8 pty-server + MCP images now
-  # SHA-stamped in chart values.yaml (auto-bumped by .github/workflows/
-  # build-sandbox-{pty-server,mcp-server}.yaml), the gate's original
-  # purpose is satisfied — flip default-ON so the controller materialises
-  # on every fresh prov. Operators may still opt-OUT by setting
-  # `SANDBOX_ENABLED=false` on the per-Sovereign overlay's substitute
-  # map (mirrors how MARKETPLACE_ENABLED works in slot 13).
-  #
-  # runtime.* — Wave 8 pty-server / MCP / NEWAPI wiring. The
-  # controller surfaces these to its per-Sandbox renderer (manifests
-  # rendered into the per-Org `catalyst-tenant` Gitea repo at
-  # sandbox/<owner-uid>/).
-  #
-  # Image overrides are OMITTED from this slot's HR values — the
-  # chart's values.yaml already SHA-pins both images (auto-bumped by
-  # CI) and exposing them as substitute vars without the corresponding
-  # entries in the bootstrap-kit Kustomization postBuild.substitute
-  # map causes Flux to substitute empty strings → null → the chart's
-  # `required` guard would fail render once enabled=true. Day-2 SHA
-  # overrides remain available via Sovereign-overlay HelmRelease
-  # patches under spec.values.runtime.{ptyServerImage,mcpImage} — but
-  # the canonical path is bumping chart values.yaml + bootstrap-kit
-  # pin (single source of truth, INVIOLABLE-PRINCIPLES.md #4a).
-  values:
-    enabled: ${SANDBOX_ENABLED:-true}
-    env:
-      hostCluster: ${SOVEREIGN_REGION_CANONICAL_LABEL}
-      sovereignFQDN: ${SOVEREIGN_FQDN}
-      # TBD-D35c (Wave 32 verifier fix) — comma-separated list of
-      # NewAPI channel names the controller stamps as `allowed_channels`
-      # on every per-Sandbox token mint. Default `qwen` matches the
-      # only channel bp-newapi's channel-seed-job.yaml writes on a
-      # fresh Sovereign install (alias for `qwen3.6-bankdhofar`,
-      # products/sandbox/docs/newapi-proxy-contract.md §2). Per-
-      # Sovereign overlays MUST extend this list to mirror their
-      # channel rollout (e.g. `qwen,anthropic,openai`) — the chart's
-      # NoAllowedChannels guard fails every mint if this resolves to
-      # empty.
-      newapiDefaultChannels: ${SANDBOX_DEFAULT_CHANNELS:-qwen}
-    runtime:
-      newapiURL: https://newapi.${SOVEREIGN_FQDN}/v1
-    # D31 active-hot-standby — when SOVEREIGN_ENABLE_HOT_STANDBY=true on
-    # the per-Sovereign overlay (and both regions are non-empty AND
-    # distinct), sandbox.db.provision materialises a primary + replica
-    # Cluster.postgresql.cnpg.io pair instead of a single Cluster
-    # (mirrors the bp-cnpg-pair pattern + bp-wordpress-tenant chart
-    # 0.2.0+). Same trio of envsubst placeholders bp-catalyst-platform
-    # slot 13 consumes for the marketplace tenant path — flipping one
-    # knob on the per-Sovereign overlay covers BOTH paths so HA stays
-    # consistent across the marketplace tenant install and the
-    # sandbox.db plane. Default empty = single-Cluster CNPG (zero
-    # regression). Region keys MUST match the canonical openova.io/
-    # region node label value (e.g. `hz-fsn-rtz-prod`).
-    cnpg:
-      activeHotStandby:
-        enabled: ${SOVEREIGN_ENABLE_HOT_STANDBY:-}
-        primaryRegion: ${SOVEREIGN_PRIMARY_REGION:-}
-        replicaRegion: ${SOVEREIGN_REPLICA_REGION:-}
--- a/clusters/_template/bootstrap-kit/20-opentelemetry.yaml
+++ b/clusters/_template/bootstrap-kit/20-opentelemetry.yaml
@ -49,8 +49,6 @@ kind: HelmRelease
 metadata:
  name: bp-opentelemetry
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "20"
 spec:
  interval: 15m
  timeout: 15m
@ -67,12 +65,10 @@ spec:
        name: bp-opentelemetry
        namespace: flux-system
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/21-alloy.yaml
+++ b/clusters/_template/bootstrap-kit/21-alloy.yaml
@ -46,8 +46,6 @@ kind: HelmRelease
 metadata:
  name: bp-alloy
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "21"
 spec:
  interval: 15m
  timeout: 15m
@ -64,12 +62,10 @@ spec:
        name: bp-alloy
        namespace: flux-system
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/22-loki.yaml
+++ b/clusters/_template/bootstrap-kit/22-loki.yaml
@ -43,8 +43,6 @@ kind: HelmRelease
 metadata:
  name: bp-loki
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "22"
 spec:
  interval: 15m
  timeout: 15m
@ -61,12 +59,10 @@ spec:
        name: bp-loki
        namespace: flux-system
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/23-mimir.yaml
+++ b/clusters/_template/bootstrap-kit/23-mimir.yaml
@ -43,8 +43,6 @@ kind: HelmRelease
 metadata:
  name: bp-mimir
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "23"
 spec:
  interval: 15m
  timeout: 15m
@ -55,18 +53,16 @@ spec:
  chart:
    spec:
      chart: bp-mimir
-      version: 1.0.4
+      version: 1.0.2
      sourceRef:
        kind: HelmRepository
        name: bp-mimir
        namespace: flux-system
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/24-tempo.yaml
+++ b/clusters/_template/bootstrap-kit/24-tempo.yaml
@ -41,8 +41,6 @@ kind: HelmRelease
 metadata:
  name: bp-tempo
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "24"
 spec:
  interval: 15m
  timeout: 15m
@ -59,12 +57,10 @@ spec:
        name: bp-tempo
        namespace: flux-system
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/25-grafana.yaml
+++ b/clusters/_template/bootstrap-kit/25-grafana.yaml
@ -46,8 +46,6 @@ kind: HelmRelease
 metadata:
  name: bp-grafana
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "25"
 spec:
  interval: 15m
  timeout: 15m
@ -65,18 +63,16 @@ spec:
  chart:
    spec:
      chart: bp-grafana
-      version: 1.0.1
+      version: 1.0.0
      sourceRef:
        kind: HelmRepository
        name: bp-grafana
        namespace: flux-system
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/27-kyverno.yaml
+++ b/clusters/_template/bootstrap-kit/27-kyverno.yaml
@ -43,8 +43,6 @@ kind: HelmRelease
 metadata:
  name: bp-kyverno
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "27"
 spec:
  interval: 15m
  releaseName: kyverno
@ -54,7 +52,7 @@ spec:
  chart:
    spec:
      chart: bp-kyverno
-      version: 1.1.0
+      version: 1.0.0
      sourceRef:
        kind: HelmRepository
        name: bp-kyverno
@ -66,12 +64,10 @@ spec:
  # past the point where downstream HRs could legitimately reconcile.
  # disableWait lets Flux mark this Ready as soon as manifests apply.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/28-reloader.yaml
+++ b/clusters/_template/bootstrap-kit/28-reloader.yaml
@ -41,8 +41,6 @@ kind: HelmRelease
 metadata:
  name: bp-reloader
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "28"
 spec:
  interval: 15m
  releaseName: reloader
@ -60,12 +58,10 @@ spec:
  # HR Ready signal aligned with manifest apply rather than runtime
  # convergence, matching the rest of the bootstrap-kit.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/29-vpa.yaml
+++ b/clusters/_template/bootstrap-kit/29-vpa.yaml
@ -41,8 +41,6 @@ kind: HelmRelease
 metadata:
  name: bp-vpa
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "29"
 spec:
  interval: 15m
  releaseName: vpa
@ -59,12 +57,10 @@ spec:
  # updater, admission-controller) plus admission webhook TLS bootstrap.
  # disableWait keeps Flux's Ready signal aligned with manifest apply.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/30-trivy.yaml
+++ b/clusters/_template/bootstrap-kit/30-trivy.yaml
@ -42,8 +42,6 @@ kind: HelmRelease
 metadata:
  name: bp-trivy
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "30"
 spec:
  interval: 15m
  releaseName: trivy
@ -64,12 +62,10 @@ spec:
  # mark this Ready as soon as manifests apply; runtime convergence
  # (DB hydration, first scan reports landing) is observed via kubectl.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/31-falco.yaml
+++ b/clusters/_template/bootstrap-kit/31-falco.yaml
@ -40,8 +40,6 @@ kind: HelmRelease
 metadata:
  name: bp-falco
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "31"
 spec:
  interval: 15m
  releaseName: falco
@ -62,12 +60,10 @@ spec:
  # Helm `--wait`. disableWait keeps Flux's signal aligned with
  # manifest apply.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/32-sigstore.yaml
+++ b/clusters/_template/bootstrap-kit/32-sigstore.yaml
@ -43,8 +43,6 @@ kind: HelmRelease
 metadata:
  name: bp-sigstore
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "32"
 spec:
  interval: 15m
  releaseName: sigstore
@ -64,12 +62,10 @@ spec:
  # Certificate is issued + bound. disableWait avoids holding the HR
  # signal on a runtime-convergence event.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/33-syft-grype.yaml
+++ b/clusters/_template/bootstrap-kit/33-syft-grype.yaml
@ -40,8 +40,6 @@ kind: HelmRelease
 metadata:
  name: bp-syft-grype
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "33"
 spec:
  interval: 15m
  releaseName: syft-grype
@ -61,12 +59,10 @@ spec:
  # meaningful — disableWait is the correct shape so Flux marks Ready
  # as soon as manifests apply.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/34-velero.yaml
+++ b/clusters/_template/bootstrap-kit/34-velero.yaml
@ -58,8 +58,6 @@ kind: HelmRelease
 metadata:
  name: bp-velero
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "34"
 spec:
  interval: 15m
  releaseName: velero
@ -67,7 +65,7 @@ spec:
  chart:
    spec:
      chart: bp-velero
-      version: 1.2.2
+      version: 1.2.0
      sourceRef:
        kind: HelmRepository
        name: bp-velero
@ -78,12 +76,10 @@ spec:
  # observes via the BSL CR phase, not via Helm `--wait`. disableWait
  # keeps the HR's Ready signal aligned with manifest apply.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/35-coraza.yaml
+++ b/clusters/_template/bootstrap-kit/35-coraza.yaml
@ -45,8 +45,6 @@ kind: HelmRelease
 metadata:
  name: bp-coraza
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "35"
 spec:
  interval: 15m
  releaseName: coraza
@ -63,12 +61,10 @@ spec:
        name: bp-coraza
        namespace: flux-system
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/49-bp-cert-manager-powerdns-webhook.yaml
+++ b/clusters/_template/bootstrap-kit/49-bp-cert-manager-powerdns-webhook.yaml
@ -82,8 +82,6 @@ kind: HelmRelease
 metadata:
  name: bp-cert-manager-powerdns-webhook
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "49"
 spec:
  interval: 15m
  releaseName: cert-manager-powerdns-webhook
@ -97,7 +95,7 @@ spec:
  chart:
    spec:
      chart: bp-cert-manager-powerdns-webhook
-      version: 1.1.0
+      version: 1.0.4
      sourceRef:
        kind: HelmRepository
        name: bp-cert-manager-powerdns-webhook
@ -107,12 +105,10 @@ spec:
  # so blocking on Helm `--wait` for the leaf Certificate to reach
  # Ready is unnecessary. Replaces blanket spec.timeout band-aids.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/50-cluster-autoscaler.yaml
+++ b/clusters/_template/bootstrap-kit/50-cluster-autoscaler.yaml
@ -68,8 +68,6 @@ kind: HelmRelease
 metadata:
  name: bp-cluster-autoscaler-hcloud
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "50"
 spec:
  interval: 15m
  releaseName: cluster-autoscaler
@ -77,24 +75,7 @@ spec:
  chart:
    spec:
      chart: bp-cluster-autoscaler-hcloud
-      # 1.3.0 — qa-loop chroot-canvas Fix: wire HCLOUD_NETWORK /
-      # HCLOUD_FIREWALL / HCLOUD_SSH_KEY env vars onto the autoscaler
-      # deployment so scale-up VMs land on the Phase-0 private network +
-      # firewall + ssh-key, identical to Phase-0 workers. Without these
-      # the autoscaler-spawned VMs only receive a public IP, the worker
-      # cloud-init's `K3S_URL=https://10.0.1.2:6443` is unreachable, the
-      # k3s agent join silently fails, and every scale-up times out at
-      # 15m → backoff. Live evidence: prov #38/#39/#41/#43 omantel.biz.
-      #
-      # 1.2.0 — qa-loop Wave 5 Fix #79 Gap D: chart-derived
-      # HCLOUD_CLUSTER_CONFIG fallback. When the per-Sovereign
-      # cloud-init has not stamped the `hcloud-cloud-init` key into
-      # `flux-system/cloud-credentials`, the chart synthesises a
-      # minimal HCLOUD_CLUSTER_CONFIG JSON from the existing
-      # `cluster-autoscaler.autoscalingGroups[]` so the autoscaler
-      # never FATALs with the generic
-      # "HCLOUD_CLUSTER_CONFIG or HCLOUD_CLOUD_INIT is not specified".
-      version: 1.3.0
+      version: 1.0.0
      sourceRef:
        kind: HelmRepository
        name: bp-cluster-autoscaler-hcloud
@ -105,12 +86,10 @@ spec:
  # not a Helm-wait concern. disableWait keeps Flux's Ready signal
  # aligned with manifest apply.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
@ -152,39 +131,6 @@ spec:
      # operators rotate by re-running cloud-init or by patching
      # cloud-credentials directly).
      optional: true
-    # ── Issue #1778 — Hetzner network/firewall/ssh-key attachment ──────
-    # The cluster-autoscaler-hcloud provider only attaches scale-up
-    # servers to an existing private network when HCLOUD_NETWORK is set
-    # at startup. Without it, the Phase-0 workers (which join via
-    # 10.0.1.2:6443 on the private subnet) and the autoscaler-spawned
-    # workers (which only have a public IP) live on different network
-    # planes — the autoscaler VMs cannot reach the apiserver private
-    # endpoint, the k3s agent join times out, the node never registers,
-    # the autoscaler hits a 15m scale-up timeout and enters backoff.
-    # Names are written by cloud-init (see
-    # infra/hetzner/cloudinit-control-plane.tftpl `hcloud-network-name`
-    # etc.) so the autoscaler attaches every scale-up VM to the
-    # SAME network + firewall + ssh-key the Phase-0 Tofu module created.
-    # The chart's values.yaml default of empty-string keeps the upstream
-    # deployment shape valid for legacy Sovereigns whose cloud-init
-    # never stamped these keys; on those Sovereigns Flux just skips the
-    # entry (optional: true) and the autoscaler runs in its pre-#1778
-    # shape (still broken, but no Helm render error).
-    - kind: Secret
-      name: cloud-credentials
-      valuesKey: hcloud-network-name
-      targetPath: cluster-autoscaler.extraEnv.HCLOUD_NETWORK
-      optional: true
-    - kind: Secret
-      name: cloud-credentials
-      valuesKey: hcloud-firewall-name
-      targetPath: cluster-autoscaler.extraEnv.HCLOUD_FIREWALL
-      optional: true
-    - kind: Secret
-      name: cloud-credentials
-      valuesKey: hcloud-ssh-key-name
-      targetPath: cluster-autoscaler.extraEnv.HCLOUD_SSH_KEY
-      optional: true
  # Per-Sovereign baseline values. clusters/<sovereign>/bootstrap-kit/
  # 40-cluster-autoscaler.yaml MAY override `autoscalingGroups` to set
  # the actual instanceType + region + min/max + name the Tofu module
--- a/clusters/_template/bootstrap-kit/51-bp-k8s-ws-proxy.yaml
+++ b/clusters/_template/bootstrap-kit/51-bp-k8s-ws-proxy.yaml
@ -49,8 +49,6 @@ kind: HelmRelease
 metadata:
  name: bp-k8s-ws-proxy
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "51"
 spec:
  interval: 15m
  releaseName: k8s-ws-proxy
@ -66,34 +64,16 @@ spec:
      # in values.yaml. The imagePullSecrets default is required so
      # omantel pods can pull from private GHCR without per-Sovereign
      # overlay (the catalyst-system `ghcr-pull` secret is canonical).
-      # 0.1.6 (qa-loop bounded-cycle Wave 5 Fix #78, Gap E): adds
-      # pre-install hook-weight -10 Job that auto-generates the
-      # `k8s-ws-proxy-hmac` Secret from /dev/urandom when absent.
-      # Pre-this, every fresh Sovereign sat with three k8s-ws-proxy
-      # pods ContainerCreating forever — the chart referenced a
-      # Secret that nothing ever created. Idempotent on upgrade
-      # (preserves the existing key — rotating it would invalidate
-      # every in-flight catalyst-api signature).
-      # 0.1.9 (qa-loop bounded-cycle Fix #95, regression of Fix #78):
-      # explicit hook-weight ordering for the hmac-bootstrap quartet
-      # (SA=-20, Role+RoleBinding=-15, Job=-10) so the SA lands BEFORE
-      # the Job that references it. Pre-this, prov #8 failed with
-      # `serviceaccount "k8s-ws-proxy-hmac-bootstrap" not found`
-      # because the Job (weight -10, lower=earlier in Helm) was
-      # applied before its SA (weight 0). Bumps Chart.yaml 0.1.7 ->
-      # 0.1.8; CI promote auto-bumps to 0.1.9 with new image SHA.
-      version: 0.1.11
+      version: 0.1.5
      sourceRef:
        kind: HelmRepository
        name: bp-k8s-ws-proxy
        namespace: flux-system
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/52-bp-guacamole.yaml
+++ b/clusters/_template/bootstrap-kit/52-bp-guacamole.yaml
@ -63,8 +63,6 @@ kind: HelmRelease
 metadata:
  name: bp-guacamole
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "52"
 spec:
  interval: 15m
  releaseName: guacamole
@ -91,55 +89,16 @@ spec:
      # chart wanted seaweedfs-storage, K8s rejected the immutable-spec
      # patch with `cannot patch ... PersistentVolumeClaim ... is
      # invalid: spec: Forbidden: spec is immutable after creation`).
-      # 0.1.15 (Fix #158): migrationImage bumped to
-      # bitnamilegacy/kubectl:1.30.7 (was 1.29.3); template fallback no
-      # longer references bitnami/kubectl (deleted from Docker Hub 2025-08).
-      # 0.1.19 (2026-05-12): chart-bump mirror chain caught up — tests/
-      # render.sh expect_total realigned from 15 → 19 (Fix #125's
-      # bootstrap Job for guacamole-oidc Secret added 4 resources: 1 Job
-      # + 1 ServiceAccount + 1 Role + 1 RoleBinding). Prior to the test
-      # fix, every Build bp-guacamole run published images but the
-      # Blueprint Release dispatched on the bump commit failed render.sh
-      # → 0.1.13–0.1.18 were never published to GHCR → bootstrap-kit
-      # HRs wedged at "ghcr.io/openova-io/bp-guacamole:0.1.17: not found".
-      # 0.1.21 (Refs TBD-G4 / C5-009, 2026-05-18): pulls in PR #1684
-      # (guacamole-deployment.yaml mount /home/guacamole instead of
-      # /home/guacamole/.guacamole). The official Apache Guacamole
-      # image entrypoint runs `rm -rf $GUACAMOLE_HOME` before
-      # repopulating the directory on every start; when the emptyDir
-      # was mounted directly at /home/guacamole/.guacamole the path
-      # was a mount point and `rm` failed with `Read-only file
-      # system`, crash-looping the webapp before Tomcat ever booted
-      # (observed on t22, 16 restarts). Mounting the parent dir
-      # makes .guacamole a regular subdirectory the entrypoint can
-      # freely rm and recreate.
-      # 0.1.22 (Refs TBD-G6 / C12-004, 2026-05-18): pulls in PR #1692
-      # (values default guacamole.httproute.parentRef.namespace
-      # gateway-system -> kube-system). The
-      # catalyst-system/guacamole-server HTTPRoute on t22 went
-      # Accepted=False because gateway-system/cilium-gateway does not
-      # exist on any Sovereign — the canonical gateway is
-      # kube-system/cilium-gateway installed by 01-cilium.yaml and
-      # used by every other Sovereign HTTPRoute.
-      # 0.1.23 (Refs TBD-G4 phase 2 / C12-005, 2026-05-18): pulls in
-      # PR #1699 (liveness + readiness probe paths flipped from `/`
-      # to `/guacamole/`). The Apache Guacamole webapp deploys under
-      # Tomcat's context path /guacamole/, not /, so probing `/`
-      # made kubelet restart the Pod every ~60s and the kube-system
-      # Cilium gateway returned 503 to the public hostname because
-      # the Endpoint was never Ready (observed on t22, 5 restarts).
-      version: 0.1.24
+      version: 0.1.9
      sourceRef:
        kind: HelmRepository
        name: bp-guacamole
        namespace: flux-system
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
--- a/clusters/_template/bootstrap-kit/54-bp-dmz-vcluster.yaml
+++ b/clusters/_template/bootstrap-kit/54-bp-dmz-vcluster.yaml
@ -1,107 +0,0 @@
-# bp-dmz-vcluster — Catalyst bootstrap-kit Blueprint slot 54.
-#
-# Per-region DMZ vCluster — installed on EVERY region (primary AND
-# every secondary). The DMZ vCluster is:
-#   - the public-fronted vCluster (hosts Cilium Gateway HTTPS ingress)
-#   - the inter-region WireGuard hop per docs/SOVEREIGN-MULTI-REGION-
-#     DOD.md A2 (inter-region link = DMZ WG over PUBLIC IPs, ALWAYS)
-#   - the home of clustermesh-apiserver Service type=LoadBalancer
-#     per DoD A3
-#
-# Per docs/SOVEREIGN-MULTI-REGION-DOD.md A4 (vCluster topology):
-#   primary    region → MGMT  + DMZ  vCluster (slot 58 + this slot)
-#   secondary  region → DMZ   + RTZ  vCluster (this slot + slot 59)
-#
-# Supersedes the inert `bp-dmz-vcluster` slot 54 entry declared in
-# scripts/expected-bootstrap-deps.yaml since qa-loop iter-12 Fix #53C
-# (the chart was authored at products/dmz-vcluster but never wired
-# into the bootstrap kit). The chart at products/dmz-vcluster
-# remains the per-tenant marketplace deliverable — different artifact,
-# now also a different chart name: `bp-dmz-vcluster-tenant` (renamed
-# 2026-05-18 per TBD-A6c, issue #1719, so the pin-sync audit can
-# disambiguate the two). This slot pins THIS chart (platform/) at
-# the version declared in platform/bp-dmz-vcluster/chart/Chart.yaml.
-#
-# Wrapper chart: platform/bp-dmz-vcluster/chart/
-#   Bundles loft-sh/vcluster 0.20.0 as a Helm subchart.
-#
-# Reconciled by: Flux on every region's k3s control plane.
-#
-# dependsOn:
-#   - bp-cilium       — CNI + Gateway API
-#   - bp-cert-manager — TLS for ClusterIssuers / wildcard cert
-
---
-apiVersion: source.toolkit.fluxcd.io/v1beta2
-kind: HelmRepository
-metadata:
-  name: bp-dmz-vcluster
-  namespace: flux-system
-spec:
-  type: oci
-  interval: 15m
-  url: oci://ghcr.io/openova-io
-  secretRef:
-    name: ghcr-pull
---
-apiVersion: helm.toolkit.fluxcd.io/v2
-kind: HelmRelease
-metadata:
-  name: bp-dmz-vcluster
-  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "54"
-    catalyst.openova.io/vcluster-role: dmz
-    catalyst.openova.io/topology: dod-a4
-spec:
-  interval: 15m
-  releaseName: dmz-vcluster
-  targetNamespace: dmz
-  dependsOn:
-    - name: bp-cilium
-    - name: bp-cert-manager
-  chart:
-    spec:
-      chart: bp-dmz-vcluster
-      version: 0.1.0
-      sourceRef:
-        kind: HelmRepository
-        name: bp-dmz-vcluster
-        namespace: flux-system
-  install:
-    timeout: 15m
-    disableWait: true
-    remediation:
-      retries: 3
-  upgrade:
-    timeout: 15m
-    disableWait: true
-    remediation:
-      retries: 3
-  # Per-Sovereign overlay surface.
-  #
-  # dmzVcluster.enabled — DMZ runs on every region by design (DoD A4).
-  # Default-ON to deliver the topology contract on day-one.
-  values:
-    dmzVcluster:
-      enabled: true
-      hostNamespace: dmz
-      vclusterName: dmz
-      role: every-region
-      nodeSelector:
-        regionLabelKey: openova.io/region
-        # Substituted by the bootstrap-kit Kustomization with the THIS
-        # region's CANONICAL k3s node-label value (e.g.
-        # `hz-hel-rtz-prod` on hel1, `hz-nbg-rtz-prod` on nbg1,
-        # `hz-sin-rtz-prod` on sin). Caught on t126 (2026-05-16): the
-        # prior `${SOVEREIGN_REGION_KEY}` ("hel1"/"nbg1-1"/"sin-2")
-        # didn't match the node label written by cloud-init (which uses
-        # `region_canonical_label` not `sovereign_region_key`) so every
-        # DMZ vCluster Pod sat Pending with FailedScheduling.
-        regionLabelValue: ${SOVEREIGN_REGION_CANONICAL_LABEL}
-    vcluster:
-      controlPlane:
-        statefulSet:
-          scheduling:
-            nodeSelector:
-              openova.io/region: ${SOVEREIGN_REGION_CANONICAL_LABEL}
--- a/clusters/_template/bootstrap-kit/55-bp-hcloud-ccm.yaml
+++ b/clusters/_template/bootstrap-kit/55-bp-hcloud-ccm.yaml
@ -1,110 +0,0 @@
-# bp-hcloud-ccm — Catalyst bootstrap-kit Blueprint #55
-# (Tier 5 — Cloud Integration). Pairs with bp-cluster-autoscaler-hcloud
-# (slot 50) and bp-hcloud-csi (slot 51, when present) — the full
-# Hetzner-cloud-direct trio.
-#
-# Wires hcloud-cloud-controller-manager into the cluster as the canonical
-# Hetzner cloud-provider integration. Without this CCM running:
-#   - Node providerID stays as k3s://<node-name> (kube-controller-manager
-#     + scheduler cannot correlate Pods with Hetzner zones).
-#   - Service-of-type-LoadBalancer stays in EXTERNAL-IP: <pending> forever
-#     (no cloud-provider to call out to).
-#
-# The second consequence is the proximate root cause clustermesh-apiserver
-# could not migrate from NodePort to LB on omantel multi-region (qa-loop
-# iter-12 Fix #53D + Fix #54 Workstream 1) — until hcloud-CCM is
-# installed, the LB-typed Service from Fix #53D's PR #1274 stays Pending.
-#
-# Wrapper chart: platform/hcloud-ccm/chart/ — umbrella over upstream
-# hetznercloud/hcloud-cloud-controller-manager chart 1.20.0
-# (appVersion 1.20.0). Catalyst-curated values flow under the
-# `hcloud-cloud-controller-manager:` key + a vendor-agnostic
-# `hcloudCcm.*` block that ships the namespace-local Hetzner-API-token
-# Secret (`hcloud-token`).
-#
-# Reconciled by: Flux on the new Sovereign's k3s control plane.
-#
-# Hetzner-token wiring (mirrors bp-cluster-autoscaler-hcloud at slot 50
-# + bp-velero at slot 34 + bp-harbor at slot 19):
-#   - cloud-init writes `flux-system/cloud-credentials` Secret with the
-#     `hcloud-token` key (see infra/hetzner/cloudinit-control-plane.tftpl
-#     §"cloud-credentials-secret").
-#   - This HelmRelease lifts the `hcloud-token` value into the umbrella
-#     chart's `hcloudCcm.hcloudToken` value via Flux `valuesFrom`. The
-#     umbrella chart then synthesises a namespace-local
-#     `kube-system/hcloud-token` Secret (templates/hcloud-token-secret.yaml)
-#     the upstream chart's `env.HCLOUD_TOKEN` wiring binds as the
-#     deployment's HCLOUD_TOKEN env var.
-#
-# dependsOn: (none) — hcloud-CCM is the FIRST cloud-provider seam, must
-# install BEFORE any blueprint that creates a LoadBalancer Service. The
-# cloud-credentials Secret is provisioned by cloud-init BEFORE Flux
-# installs anything.
-
---
-# kube-system is built into every Kubernetes cluster — never re-declare it.
-# The HelmRelease's targetNamespace below installs hcloud-CCM into
-# kube-system (canonical CCM placement; mirrors hcloud-CCM upstream
-# documentation).
-apiVersion: source.toolkit.fluxcd.io/v1beta2
-kind: HelmRepository
-metadata:
-  name: bp-hcloud-ccm
-  namespace: flux-system
-spec:
-  type: oci
-  interval: 15m
-  url: oci://ghcr.io/openova-io
-  secretRef:
-    name: ghcr-pull
---
-apiVersion: helm.toolkit.fluxcd.io/v2
-kind: HelmRelease
-metadata:
-  name: bp-hcloud-ccm
-  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "55"
-spec:
-  interval: 15m
-  releaseName: hcloud-ccm
-  targetNamespace: kube-system
-  chart:
-    spec:
-      chart: bp-hcloud-ccm
-      version: 1.0.0
-      sourceRef:
-        kind: HelmRepository
-        name: bp-hcloud-ccm
-        namespace: flux-system
-  # Event-driven install: hcloud-CCM is a single Deployment +
-  # ServiceAccount + RBAC. Helm install completes when manifests apply;
-  # the binary's Hetzner-API connectivity check is a runtime concern,
-  # not a Helm-wait concern. disableWait keeps Flux's Ready signal
-  # aligned with manifest apply.
-  install:
-    timeout: 15m
-    disableWait: true
-    remediation:
-      retries: 3
-  upgrade:
-    timeout: 15m
-    disableWait: true
-    remediation:
-      retries: 3
-  # ── Hetzner-token wiring ─────────────────────────────────────────────
-  # Pulls the `hcloud-token` key from the canonical
-  # `flux-system/cloud-credentials` Secret cloud-init writes at Phase 0
-  # (infra/hetzner/cloudinit-control-plane.tftpl §"cloud-credentials-secret").
-  # Flux dereferences `valuesFrom` at HelmRelease apply time, so the
-  # plaintext payload never appears in this committed manifest.
-  #
-  # The chart's templates/hcloud-token-secret.yaml renders this value
-  # into a namespace-local `kube-system/hcloud-token` Secret which the
-  # upstream chart's `env.HCLOUD_TOKEN.valueFrom.secretKeyRef` binding
-  # lifts onto the deployment's env.
-  valuesFrom:
-    - kind: Secret
-      name: cloud-credentials
-      valuesKey: hcloud-token
-      targetPath: hcloudCcm.hcloudToken
--- a/clusters/_template/bootstrap-kit/56-bp-openova-flow-server.yaml
+++ b/clusters/_template/bootstrap-kit/56-bp-openova-flow-server.yaml
@ -1,113 +0,0 @@
-# bp-openova-flow-server — Catalyst bootstrap-kit Blueprint slot 56
-# (Observability / OpenovaFlow event router).
-#
-# Stateless HTTP+SSE event router for OpenovaFlow. Emitters
-# (bp-openova-flow-emitter on every cluster, catalyst-api proxy on the
-# mother) POST FlowMessage envelopes; consumers (Sovereign Console
-# canvas) GET snapshots and subscribe to the SSE stream.
-#
-# Architecture:
-#   - Primary-cluster only — one Service per Sovereign, reached
-#     cross-region via Cilium Gateway HTTPRoute over public HTTPS.
-#     No NetBird required for v1.
-#   - In-memory ring buffer per flowId (default 4096 envelopes).
-#     State is lost on Pod restart; emitters re-emit snapshot on
-#     reconnect.
-#   - Workload: single Deployment, ClusterIP Service, optional
-#     HTTPRoute for cross-cluster reachability.
-#
-# Wrapper chart: platform/openova-flow-server/chart/
-# Catalyst-curated values: platform/openova-flow-server/chart/values.yaml
-# Reconciled by: Flux on the new Sovereign's k3s control plane.
-#
-# dependsOn:
-#   - bp-cilium       — Pod network + Gateway API for the operator-
-#                       facing HTTPRoute.
-#   - bp-cert-manager — TLS for openova-flow.<sovereign-fqdn>.
-#
-# Per docs/INVIOLABLE-PRINCIPLES.md #1 (target-state) the chart ships
-# the real workload. Per #4 (never hardcode) the hostname,
-# ringCapacity, and image tag are operator-driven.
-
---
-apiVersion: source.toolkit.fluxcd.io/v1beta2
-kind: HelmRepository
-metadata:
-  name: bp-openova-flow-server
-  namespace: flux-system
-spec:
-  type: oci
-  interval: 15m
-  url: oci://ghcr.io/openova-io
-  secretRef:
-    name: ghcr-pull
---
-apiVersion: helm.toolkit.fluxcd.io/v2
-kind: HelmRelease
-metadata:
-  name: bp-openova-flow-server
-  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "56"
-spec:
-  interval: 15m
-  releaseName: openova-flow-server
-  # Lands in catalyst-system co-located with the rest of the
-  # Catalyst control-plane stack (catalyst-api / catalyst-ui / etc.).
-  targetNamespace: catalyst-system
-  dependsOn:
-    - name: bp-cilium
-    - name: bp-cert-manager
-    # CNPG provides postgresql.cnpg.io/v1, the CRD used by the chart's
-    # cnpg-cluster.yaml. Without this dep the cold install's `kind:
-    # Cluster` manifest is rejected and the HR loops on InstallFailed.
-    # Added 2026-05-14 alongside the in-memory → CNPG-backed store
-    # rewrite (PR replacing the brittle in-memory map+RingBuffer that
-    # lost ALL flow state on pod restart).
-    - name: bp-cnpg
-  chart:
-    spec:
-      chart: bp-openova-flow-server
-      version: 0.2.0
-      sourceRef:
-        kind: HelmRepository
-        name: bp-openova-flow-server
-        namespace: flux-system
-  # Event-driven install: openova-flow-server is a single Deployment +
-  # Service + ServiceAccount. Helm install completes when manifests
-  # apply; readiness signalled via Flux dependsOn, never via
-  # spec.timeout watchdogs.
-  install:
-    timeout: 15m
-    disableWait: true
-    remediation:
-      retries: 3
-  upgrade:
-    timeout: 15m
-    disableWait: true
-    remediation:
-      retries: 3
-  # Per-Sovereign overlay surface. The Sovereign's FQDN is interpolated
-  # at Flux apply time via the bootstrap-kit Kustomization's
-  # postBuild.substitute env hook — `${SOVEREIGN_FQDN}` is replaced
-  # with the concrete sovereign FQDN before the HR bytes land in the
-  # cluster.
-  values:
-    flowServer:
-      enabled: true
-      httproute:
-        # Default ON — cross-cluster emitters reach this server's
-        # public HTTPS endpoint via the Cilium Gateway. Per-Sovereign
-        # overlay disables when only the in-cluster Service is needed.
-        enabled: true
-        hostname: openova-flow.${SOVEREIGN_FQDN}
-        # Canonical Sovereign Gateway — every other HTTPRoute
-        # (catalyst-api, catalyst-ui, marketplace, gitea, harbor,
-        # keycloak, …) parents to kube-system/cilium-gateway installed
-        # by bootstrap-kit/01-cilium.yaml. Fix (TBD-G6 / C12-004):
-        # the previous value `catalyst-gateway` does not exist on any
-        # Sovereign — the HTTPRoute went Accepted=False with "no
-        # matching parent" on t22.
-        gatewayRef:
-          name: cilium-gateway
-          namespace: kube-system
--- a/clusters/_template/bootstrap-kit/57-bp-openova-flow-emitter.yaml
+++ b/clusters/_template/bootstrap-kit/57-bp-openova-flow-emitter.yaml
@ -1,101 +0,0 @@
-# bp-openova-flow-emitter — Catalyst bootstrap-kit Blueprint slot 57
-# (Observability / OpenovaFlow Flux adapter).
-#
-# Region-aware DaemonSet sidecar that watches HelmRelease + HelmChart
-# CRs on the LOCAL cluster's Flux and POSTs FlowMessage envelopes to
-# the configured openova-flow-server (slot 56, primary cluster only).
-#
-# Topology — runs on EVERY cluster (mother + primary Sovereign + every
-# secondary region). The receiving server sits on the primary cluster;
-# cross-cluster reachability is via the Cilium Gateway HTTPRoute over
-# public HTTPS.
-#
-# Wrapper chart: platform/openova-flow-emitter/chart/
-# Catalyst-curated values: platform/openova-flow-emitter/chart/values.yaml
-# Reconciled by: Flux on the new Sovereign's k3s control plane.
-#
-# dependsOn:
-#   - bp-flux         — informer needs Flux's helmrelease CRDs.
-#
-# Per docs/INVIOLABLE-PRINCIPLES.md #1 (target-state) the emitter runs
-# from first cut on every cluster. Per #4 (never hardcode) the
-# FLOW_SERVER_URL, FLOW_ID, and REGION_KEY all flow from the
-# per-Sovereign overlay's substitute env.
-
---
-apiVersion: source.toolkit.fluxcd.io/v1beta2
-kind: HelmRepository
-metadata:
-  name: bp-openova-flow-emitter
-  namespace: flux-system
-spec:
-  type: oci
-  interval: 15m
-  url: oci://ghcr.io/openova-io
-  secretRef:
-    name: ghcr-pull
---
-apiVersion: helm.toolkit.fluxcd.io/v2
-kind: HelmRelease
-metadata:
-  name: bp-openova-flow-emitter
-  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "57"
-spec:
-  interval: 15m
-  releaseName: openova-flow-emitter
-  targetNamespace: catalyst-system
-  dependsOn:
-    - name: bp-flux
-  chart:
-    spec:
-      chart: bp-openova-flow-emitter
-      version: 0.1.1
-      sourceRef:
-        kind: HelmRepository
-        name: bp-openova-flow-emitter
-        namespace: flux-system
-  install:
-    timeout: 15m
-    disableWait: true
-    remediation:
-      retries: 3
-  upgrade:
-    timeout: 15m
-    disableWait: true
-    remediation:
-      retries: 3
-  # Per-Sovereign overlay surface. ${SOVEREIGN_FQDN},
-  # ${SOVEREIGN_DEPLOYMENT_ID} and ${SOVEREIGN_REGION_KEY} are all
-  # provided by the bootstrap-kit Kustomization's postBuild.substitute
-  # env hook (see infra/hetzner/cloudinit-control-plane.tftpl, wired in
-  # main.tf for primary CP + secondary CP for_each so multi-region
-  # Sovereigns get distinct region tags on FlowNodes).
-  #
-  # FlowID — the catalyst-api per-deployment 16-char hex id. The catalyst-
-  # api proxy /api/v1/flows/{deploymentId}/* queries the openova-flow-
-  # server under the same id, so this is the canonical key linking the
-  # canvas to the emitter.
-  # RegionKey — Hetzner region code for this cluster ("fsn1" for primary,
-  # "hel1"/etc for secondaries). Stamped onto every FlowNode.region so
-  # the canvas groups bubbles into per-region super-bubbles via
-  # `contains` relationships.
-  values:
-    flowEmitter:
-      enabled: true
-      # In-cluster Service URL — the emitter DaemonSet lives in the same
-      # k3s as the openova-flow-server Deployment, so the POST stays
-      # cluster-local with no TLS dependency. The public HTTPRoute at
-      # https://openova-flow.<fqdn> exists for the MOTHERSHIP
-      # catalyst-api proxy (Agent #8 PR #1405) and any external consumer,
-      # NOT for the in-cluster emitter. Using the public URL was a live
-      # regression on prov #34, 2026-05-11: emitter posted to
-      # https://openova-flow.omantel.biz, TLS handshake EOF'd because
-      # bp-catalyst-platform InstallFailed → no wildcard *.<fqdn> cert
-      # → no Gateway listener → emitter retry-loop → server stays empty
-      # → canvas showed "No nodes to render".
-      flowServerUrl: http://openova-flow-server.catalyst-system.svc.cluster.local
-      flowId: ${SOVEREIGN_DEPLOYMENT_ID}
-      regionKey: ${SOVEREIGN_REGION_KEY}
-      namespaceFilter: flux-system
--- a/clusters/_template/bootstrap-kit/58-bp-mgmt-vcluster.yaml
+++ b/clusters/_template/bootstrap-kit/58-bp-mgmt-vcluster.yaml
@ -1,126 +0,0 @@
-# bp-mgmt-vcluster — Catalyst bootstrap-kit Blueprint slot 58.
-#
-# Primary-region MGMT vCluster. Hosts catalyst-api, catalyst-ui,
-# openova-flow-server, and other Sovereign control-plane workloads.
-#
-# Per docs/SOVEREIGN-MULTI-REGION-DOD.md A4 (vCluster topology):
-#   primary    region → MGMT  + DMZ  vCluster (this slot + slot 54)
-#   secondary  region → DMZ   + RTZ  vCluster (slot 54   + slot 59)
-#
-# Cross-vCluster intra-region traffic between MGMT and DMZ stays
-# inside the host k3s via Cilium endpoint identity routing. Inter-
-# region traffic goes over the DMZ WireGuard hop per DoD A2.
-#
-# Wrapper chart: platform/bp-mgmt-vcluster/chart/
-#   Bundles loft-sh/vcluster 0.20.0 as a Helm subchart so
-#   `helm dependency build` packages it into the OCI artifact.
-#
-# Reconciled by: Flux on the new Sovereign's k3s control plane.
-#
-# dependsOn:
-#   - bp-cilium       — CNI + Gateway API
-#   - bp-cert-manager — TLS for ClusterIssuers (vCluster's exported
-#                       kubeconfig needs the cluster's CA chain)
-#
-# Per-role gating: this slot defaults to DISABLED (mgmtVcluster.
-# enabled=${MGMT_VCLUSTER_ENABLED:=false}). The Sovereign-provisioning
-# tofu module (infra/hetzner/main.tf primary-CP block) flips this to
-# "true" via postBuild.substitute on the PRIMARY region's CP only.
-# Secondary regions leave it unset → defaults to false → no resources
-# render.
-#
-# Until the tofu-substitute follow-up PR lands (which adds
-# MGMT_VCLUSTER_ENABLED to the substitute block per the v3.2 Gap A
-# refactor), operators can opt in per-Sovereign overlay by patching
-# `values.mgmtVcluster.enabled: true` on the primary cluster's
-# bootstrap-kit slot.
-
---
-apiVersion: source.toolkit.fluxcd.io/v1beta2
-kind: HelmRepository
-metadata:
-  name: bp-mgmt-vcluster
-  namespace: flux-system
-spec:
-  type: oci
-  interval: 15m
-  url: oci://ghcr.io/openova-io
-  secretRef:
-    name: ghcr-pull
---
-apiVersion: helm.toolkit.fluxcd.io/v2
-kind: HelmRelease
-metadata:
-  name: bp-mgmt-vcluster
-  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "58"
-    catalyst.openova.io/vcluster-role: mgmt
-    catalyst.openova.io/topology: dod-a4
-spec:
-  interval: 15m
-  releaseName: mgmt-vcluster
-  # The chart's templates/namespace.yaml creates the host namespace,
-  # so we point Flux to install INTO that namespace. The vCluster
-  # subchart's StatefulSet + Service + RBAC land here.
-  targetNamespace: mgmt
-  dependsOn:
-    - name: bp-cilium
-    - name: bp-cert-manager
-  chart:
-    spec:
-      chart: bp-mgmt-vcluster
-      version: 0.1.0
-      sourceRef:
-        kind: HelmRepository
-        name: bp-mgmt-vcluster
-        namespace: flux-system
-  install:
-    timeout: 15m
-    disableWait: true
-    remediation:
-      retries: 3
-  upgrade:
-    timeout: 15m
-    disableWait: true
-    remediation:
-      retries: 3
-  # Per-Sovereign overlay surface.
-  #
-  # mgmtVcluster.enabled — flipped on by the primary CP's tofu postBuild
-  # substitute (MGMT_VCLUSTER_ENABLED). When the substitute is unset
-  # (secondary regions, or pre-substitute-rollout primary), envsubst
-  # leaves the placeholder which Flux resolves to the literal
-  # "${MGMT_VCLUSTER_ENABLED:=false}" → Helm's YAML parser interprets
-  # the rendered string. To prevent that subtlety, the chart's default
-  # in values.yaml is already `enabled: false`; this slot ONLY sets it
-  # via valuesFrom on the primary CP. The simplest correct shape is to
-  # leave the value at chart-default `false` and rely on per-Sovereign
-  # overlay patches OR the follow-up tofu substitute (which will add
-  # `values.mgmtVcluster.enabled: true` only on the primary CP's
-  # cloud-init).
-  values:
-    mgmtVcluster:
-      # Flipped on for the primary region by tofu's primary postBuild
-      # (mgmt_vcluster_enabled=true). Secondaries render this slot but
-      # tofu sets the substitute false → chart renders zero resources.
-      enabled: ${MGMT_VCLUSTER_ENABLED:=false}
-      hostNamespace: mgmt
-      vclusterName: mgmt
-      role: primary
-      nodeSelector:
-        regionLabelKey: openova.io/region
-        # Canonical region label (hz-<stem>-rtz-prod) — matches the
-        # k3s node-label written at install time. See slot 54 + the
-        # SOVEREIGN_REGION_CANONICAL_LABEL substitute in
-        # infra/hetzner/cloudinit-control-plane.tftpl.
-        regionLabelValue: ${SOVEREIGN_REGION_CANONICAL_LABEL}
-    # Subchart values overlay — pinned to the per-region canonical label
-    # so the upstream vcluster StatefulSet's nodeSelector binds the same
-    # CP node as the umbrella's nodeSelector helper.
-    vcluster:
-      controlPlane:
-        statefulSet:
-          scheduling:
-            nodeSelector:
-              openova.io/region: ${SOVEREIGN_REGION_CANONICAL_LABEL}
--- a/clusters/_template/bootstrap-kit/59-bp-rtz-vcluster.yaml
+++ b/clusters/_template/bootstrap-kit/59-bp-rtz-vcluster.yaml
@ -1,97 +0,0 @@
-# bp-rtz-vcluster — Catalyst bootstrap-kit Blueprint slot 59.
-#
-# Per-secondary-region RTZ vCluster — installed ONLY on secondary
-# regions. Hosts regional tenant workloads + caches.
-#
-# Per docs/SOVEREIGN-MULTI-REGION-DOD.md A4 (vCluster topology):
-#   primary    region → MGMT  + DMZ  vCluster (slot 58 + slot 54)
-#   secondary  region → DMZ   + RTZ  vCluster (slot 54 + this slot)
-#
-# Cross-vCluster intra-region traffic between RTZ and DMZ stays
-# inside the host k3s via Cilium endpoint identity routing. Cross-
-# region traffic (RTZ secondary ↔ MGMT primary) goes through the DMZ
-# WireGuard hop per DoD A2.
-#
-# Wrapper chart: platform/bp-rtz-vcluster/chart/
-#   Bundles loft-sh/vcluster 0.20.0 as a Helm subchart.
-#
-# Reconciled by: Flux on every secondary region's k3s control plane.
-#
-# dependsOn:
-#   - bp-cilium       — CNI + Gateway API
-#   - bp-cert-manager — TLS for ClusterIssuers
-#
-# Per-role gating: defaults to DISABLED (rtzVcluster.enabled=false).
-# The Sovereign-provisioning tofu module (infra/hetzner/main.tf
-# secondary-CP block) flips this to "true" via postBuild.substitute
-# on secondary regions only — pending the follow-up substitute PR
-# (which adds RTZ_VCLUSTER_ENABLED to the substitute block). Until
-# that lands, operators opt in per-Sovereign overlay by patching
-# `values.rtzVcluster.enabled: true` on secondary clusters'
-# bootstrap-kit slot.
-
---
-apiVersion: source.toolkit.fluxcd.io/v1beta2
-kind: HelmRepository
-metadata:
-  name: bp-rtz-vcluster
-  namespace: flux-system
-spec:
-  type: oci
-  interval: 15m
-  url: oci://ghcr.io/openova-io
-  secretRef:
-    name: ghcr-pull
---
-apiVersion: helm.toolkit.fluxcd.io/v2
-kind: HelmRelease
-metadata:
-  name: bp-rtz-vcluster
-  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "59"
-    catalyst.openova.io/vcluster-role: rtz
-    catalyst.openova.io/topology: dod-a4
-spec:
-  interval: 15m
-  releaseName: rtz-vcluster
-  targetNamespace: rtz
-  dependsOn:
-    - name: bp-cilium
-    - name: bp-cert-manager
-  chart:
-    spec:
-      chart: bp-rtz-vcluster
-      version: 0.1.0
-      sourceRef:
-        kind: HelmRepository
-        name: bp-rtz-vcluster
-        namespace: flux-system
-  install:
-    timeout: 15m
-    disableWait: true
-    remediation:
-      retries: 3
-  upgrade:
-    timeout: 15m
-    disableWait: true
-    remediation:
-      retries: 3
-  values:
-    rtzVcluster:
-      # Flipped on by tofu's secondary-CP postBuild (rtz_vcluster_enabled=true).
-      # Primary renders this slot too but tofu sets the substitute false →
-      # chart renders zero resources on the primary.
-      enabled: ${RTZ_VCLUSTER_ENABLED:=false}
-      hostNamespace: rtz
-      vclusterName: rtz
-      role: secondary
-      nodeSelector:
-        regionLabelKey: openova.io/region
-        regionLabelValue: ${SOVEREIGN_REGION_CANONICAL_LABEL}
-    vcluster:
-      controlPlane:
-        statefulSet:
-          scheduling:
-            nodeSelector:
-              openova.io/region: ${SOVEREIGN_REGION_CANONICAL_LABEL}
--- a/clusters/_template/bootstrap-kit/60-bp-vcluster-helmrepo.yaml
+++ b/clusters/_template/bootstrap-kit/60-bp-vcluster-helmrepo.yaml
@ -1,109 +0,0 @@
-# bp-vcluster-helmrepo — Catalyst bootstrap-kit Blueprint slot 60.
-#
-# Pre-stages the upstream loft-sh vcluster Helm chart source on the
-# Sovereign cluster so the Organization controller
-# (core/controllers/organization) can render per-tenant
-# `helm.toolkit.fluxcd.io/v2 HelmRelease` CRs whose `sourceRef` points
-# at `name=loft, namespace=vcluster-system` (the controller's defaults
-# at core/controllers/organization/cmd/main.go).
-#
-# Without this slot, every per-tenant vcluster HelmRelease the
-# Organization controller writes into the per-Org Gitea repo fails
-# Source reconcile with:
-#
-#   HelmRepository.source.toolkit.fluxcd.io "loft" not found
-#
-# → no per-tenant vCluster is ever spawned → the Organization
-# controller's reconciliation loop blocks on tenant onboarding.
-# Convergence blocker #2 (vCluster source install on Sovereign).
-#
-# Wrapper chart: platform/bp-vcluster-helmrepo/chart/
-#   Pure source-registration chart — registers a HelmRepository CR +
-#   the vcluster-system namespace it lives in. Ships NO upstream
-#   subchart (same shape as bp-gateway-api). The upstream chart is
-#   pulled per-tenant by Flux at HelmRelease reconcile time, NOT
-#   bundled into this slot's OCI artifact.
-#
-# Reconciled by: Flux on the new Sovereign's k3s control plane.
-#
-# Slot 60 chosen as the first free slot after the existing vCluster
-# cohort (54/58/59 — DMZ/MGMT/RTZ Sovereign-tier vClusters). This
-# slot is the per-TENANT vCluster source registration (a different
-# layer): the Sovereign-tier slots embed loft-sh/vcluster 0.20.0 as
-# a Helm subchart so they ship a single OCI artifact; this slot
-# registers a live `source.toolkit.fluxcd.io/HelmRepository` CR so
-# the Organization controller's per-tenant rendered HelmReleases
-# can resolve `chart.spec.sourceRef name=loft namespace=vcluster-
-# system` at reconcile time. The two paths are independent — this
-# slot does NOT depend on slots 54/58/59 (and vice versa).
-#
-# dependsOn:
-#   - bp-flux  — Flux's source-controller must be Ready so the
-#                HelmRepository CR is actually reconciled (otherwise
-#                the CR sits without artifacts and downstream Flux
-#                HelmReleases that reference it can't resolve).
-
---
-apiVersion: source.toolkit.fluxcd.io/v1beta2
-kind: HelmRepository
-metadata:
-  name: bp-vcluster-helmrepo
-  namespace: flux-system
-spec:
-  type: oci
-  interval: 15m
-  url: oci://ghcr.io/openova-io
-  secretRef:
-    name: ghcr-pull
---
-apiVersion: helm.toolkit.fluxcd.io/v2
-kind: HelmRelease
-metadata:
-  name: bp-vcluster-helmrepo
-  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "60"
-    catalyst.openova.io/tenant-spawn: vcluster
-spec:
-  interval: 15m
-  releaseName: vcluster-helmrepo
-  # The release marker Secret lives next to every other bootstrap-kit
-  # release. The chart's templates/namespace.yaml creates the actual
-  # vcluster-system namespace (cluster-scoped Namespace resource).
-  targetNamespace: flux-system
-  dependsOn:
-    - name: bp-flux
-  chart:
-    spec:
-      chart: bp-vcluster-helmrepo
-      version: 0.1.0
-      sourceRef:
-        kind: HelmRepository
-        name: bp-vcluster-helmrepo
-        namespace: flux-system
-  install:
-    timeout: 5m
-    disableWait: false
-    remediation:
-      retries: 3
-  upgrade:
-    timeout: 5m
-    disableWait: false
-    remediation:
-      retries: 3
-  # Per-Sovereign overlay surface — operators MAY swap the upstream URL
-  # for a Harbor proxy cache (MIRROR-EVERYTHING per
-  # docs/INVIOLABLE-PRINCIPLES.md #4a) or rename the CR / namespace
-  # to align with a custom Organization-controller config.
-  #
-  # Defaults match the controller's hardcoded defaults at
-  # core/controllers/organization/cmd/main.go:
-  #   CATALYST_VCLUSTER_HELMREPO_NAME      = "loft"
-  #   CATALYST_VCLUSTER_HELMREPO_NAMESPACE = "vcluster-system"
-  values:
-    vclusterHelmRepo:
-      name: loft
-      namespace: vcluster-system
-      url: https://charts.loft.sh
-      interval: 15m
-      createNamespace: true
--- a/clusters/_template/bootstrap-kit/80-newapi.yaml
+++ b/clusters/_template/bootstrap-kit/80-newapi.yaml
@ -40,8 +40,6 @@ kind: HelmRelease
 metadata:
  name: bp-newapi
  namespace: flux-system
-  labels:
-    catalyst.openova.io/slot: "80"
 spec:
  interval: 15m
  releaseName: newapi
@ -86,64 +84,7 @@ spec:
      # of the PRIVATE newapi-mirror + metering-sidecar images. Paired
      # with cloud-init adding `newapi` to flux-system/ghcr-pull's
      # reflector auto-namespaces list.
-      # 1.4.2 (qa-loop bounded-cycle audit prov #7 Gap F, 2026-05-10):
-      # `.Values.newapi.image.tag` repointed from `v0.4.5` (fictitious —
-      # never built by any CI workflow) to `v0.13.2` (actual upstream
-      # Calcium-Ion/new-api Docker Hub release, mirrored into
-      # ghcr.io/openova-io/openova/newapi-mirror by the new
-      # `.github/workflows/build-bp-newapi.yaml` workflow). Pre-1.4.2
-      # the NewAPI Pod ImagePullBackOff'd 403 on every fresh Sovereign,
-      # blocking alice signup gate 5 (LLM).
-      # 1.4.4 (qa-loop bounded-cycle audit prov #20 Fix #138, 2026-05-11):
-      # add pre-install/pre-upgrade hook that polls the external-secrets
-      # validating-admission webhook until it returns a structured HTTP
-      # response — closes the race between bp-external-secrets reaching
-      # HR Ready=True and the apiserver-side EndpointSlice for the
-      # webhook Service being observable. Pre-1.4.4 the chart's
-      # ExternalSecret apply was rejected with `no endpoints available
-      # for service "external-secrets-webhook"` on every fresh provision,
-      # blocking the chart from reaching Ready and the Catalyst signup
-      # hook (ADR-0003 §3.2) from finding the admin-token Secret.
-      # 1.4.10 (fix-convergence-wave11, 2026-05-18): gate the
-      # defaultChannels.qwenBankDhofar entry on attestation-complete
-      # rather than hard-failing the helm template. Pre-1.4.10 the
-      # chart raised `commercial-contract attestation requires accountId`
-      # on every Sovereign that opted in to marketplace
-      # (MARKETPLACE_ENABLED=true) without ALSO supplying a signed
-      # commercial contract's `LLM_BANK_DHOFAR_ACCOUNT_ID` /
-      # `LLM_BANK_DHOFAR_CONTRACT_REF` envsubst variables. Post-1.4.10
-      # the chart silently skips the qwenBankDhofar channel when
-      # attestation is incomplete; once the operator overlay supplies
-      # the attestation values the channel composes on the next
-      # reconcile.
-      # 1.4.12 (PR #1677, 2026-05-18): default
-      # `.Values.sandboxTokenSigningKey.reflectorNamespaces` flipped
-      # from `"sandbox"` → `"catalyst-system,sandbox"`. Pre-1.4.12 the
-      # chart-emitted `newapi-bp-newapi-token-signing-key` Secret was
-      # mirrored only into a `sandbox` namespace (which does NOT exist
-      # on a stock Sovereign — bp-sandbox installs into
-      # `catalyst-system` per slot 19a `targetNamespace`); the sandbox-
-      # controller's `NEWAPI_ADMIN_SECRET` env var (secretKeyRef
-      # `optional: true`) landed EMPTY, the controller silently dropped
-      # into gitops-only mode, and zero per-Sandbox LLM-gateway tokens
-      # were ever minted (operator-visible only via the controller's
-      # `newapi_admin_secret_set=false` startup log). Caught on t22
-      # 2026-05-18 (TBD-D14). Bumping the pin pulls the post-#1677
-      # default so reflector mirrors into `catalyst-system` too.
-      # 1.4.14 (current main, 2026-05-18): latest upstream-tracking
-      # chart cut — includes 1.4.12's reflector fix.
-      # 1.4.19 (TBD-A12 #1798, 2026-05-18): add startupProbe so kubelet
-      # does NOT SIGKILL the binary at the 50s mark while GORM
-      # AutoMigrate is still in-flight on the freshly-provisioned empty
-      # `newapi` CNPG database. Pre-1.4.19 the empty DB on t22 sat with
-      # ZERO tables after 29 CrashLoopBackOff restarts — every kill
-      # raced AutoMigrate's first CREATE TABLE call mid-TLS-handshake;
-      # pg_stat_activity on the CNPG primary showed no `newapi` user
-      # connections because the kill happened before the GORM
-      # connection pool's first wire write completed. Probe budget:
-      # 30 × 10s = 5 min, comfortably above the observed 60-120s
-      # ceiling on cpx21/cpx31 nodes with sslmode=require.
-      version: 1.4.20
+      version: 1.4.1
      sourceRef:
        kind: HelmRepository
        name: bp-newapi
@ -153,12 +94,10 @@ spec:
  # ~10 s once the Postgres DSN Secret is present; the long pole is
  # waiting for the operator's Crossplane claim to materialise the DB.
  install:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
  upgrade:
-    timeout: 15m
    disableWait: true
    remediation:
      retries: 3
@ -189,20 +128,6 @@ spec:
      tls:
        enabled: true
        issuer: letsencrypt-prod
-      # Cilium Gateway HTTPRoute for `newapi.<fqdn>` (TBD-D35d, issue
-      # #1778). Sandbox runtimes hit the LLM gateway at the URL the
-      # sandbox controller mints into their environment
-      # (`NEWAPI_BASE_URL=https://newapi.${SOVEREIGN_FQDN}/v1`). Without
-      # this HTTPRoute the marketplace `tenant-wildcard` (hostnames=
-      # `*.${SOVEREIGN_FQDN}`) absorbs every newapi.${SOVEREIGN_FQDN}
-      # request and forwards to the storefront `console` Service —
-      # blocking the entire BYOS Claude Code journey at the LLM gate.
-      # An exact-hostname HTTPRoute outranks the wildcard per Gateway
-      # API spec, so enabling this on every Sovereign restores LLM
-      # reachability without touching the marketplace wildcard.
-      httpRoute:
-        enabled: true
-        host: newapi.${SOVEREIGN_FQDN}
    auth:
      adminUI:
        mode: keycloak
@ -262,27 +187,11 @@ spec:
    # operators that run their own bp-vllm + open-weight model in-
    # cluster; it composes after `qwenBankDhofar` and any operator
    # `.Values.channels`.
-    # Sandbox Wave 4 (2026-05-18, retry of sandbox-wave4-newapi-sovereign-install):
-    # qwenBankDhofar is now gated on `${MARKETPLACE_ENABLED:-false}` — the
-    # same envsubst variable bp-catalyst-platform (slot 13) reads to flip
-    # marketplace.enabled on the Catalyst control plane. This lets a
-    # franchised Sovereign with `MARKETPLACE_ENABLED=true` auto-seed the
-    # default Bank Dhofar Qwen3.6 channel without the operator having to
-    # supply per-Sovereign overlay values. The endpoint defaults to the
-    # canonical first-otech relay; `LLM_BANK_DHOFAR_BASE_URL` overrides
-    # it (e.g. for staging at https://omtd.bankdhofar.com). The upstream
-    # API key MUST be present in the Secret `newapi-channel-qwen-bankdhofar`
-    # under key `API_KEY` — either pre-seeded by cloud-init or pulled from
-    # OpenBao via the operator's ExternalSecret at path
-    # `sovereign/<fqdn>/newapi/channel-qwen-bankdhofar`. Sandbox agents
-    # (sandbox-wave4) depend on this channel being live on every Sovereign
-    # that opted in to marketplace; without it the agents fall back to
-    # mothership newapi, defeating the per-Sovereign sandboxing.
    defaultChannels:
      qwenBankDhofar:
-        enabled: ${MARKETPLACE_ENABLED:-false}
+        enabled: false
        name: qwen3.6-bankdhofar
-        endpoint: ${LLM_BANK_DHOFAR_BASE_URL:-https://llm-api.omtd.bankdhofar.com}
+        endpoint: ""
        models:
          - qwen3.6
          - qwen3-coder
@ -290,8 +199,8 @@ spec:
        existingSecretKey: API_KEY
        attestation:
          kind: commercial-contract
-          accountId: ${LLM_BANK_DHOFAR_ACCOUNT_ID:-}
-          contractRef: ${LLM_BANK_DHOFAR_CONTRACT_REF:-}
+          accountId: ""
+          contractRef: ""
      vllm:
        enabled: false
        name: qwen
--- a/clusters/_template/bootstrap-kit/kustomization.yaml
+++ b/clusters/_template/bootstrap-kit/kustomization.yaml
@ -24,48 +24,8 @@ resources:
  - 15a-external-secrets-stores.yaml
  - 16-cnpg.yaml
  - 17-valkey.yaml
-  # bp-hcloud-csi (formerly slot 17a) REMOVED 2026-05-17 (Wave 7):
-  # the Flux source-controller chart pull went through harbor.t11.* OCI
-  # endpoint BEFORE harbor itself was reachable (chicken-and-egg —
-  # harbor depends on Gateway, Gateway lives in sovereign-tls which
-  # dependsOn bootstrap-kit Ready, which never went Ready because
-  # bp-hcloud-csi was stuck on harbor pull). Caught live on t11 fresh
-  # prov 2026-05-17: bootstrap-kit Reconciliation-in-progress for 30+
-  # min → sovereign-tls "not ready: dependency bootstrap-kit not ready"
-  # → no Gateway CR → console.t11.<sov> ERR_CONNECTION_CLOSED →
-  # entire UI test matrix BLOCKED. C9-006 (hcloud-volumes default SC)
-  # is a cosmetic operator-facing nice-to-have; Gateway availability
-  # is launch-critical. Removing this slot unblocks the chain. Follow-
-  # up PR will re-add at a later slot (e.g., 19a, AFTER bp-harbor 19)
-  # OR fix the pull path to bypass the registry pivot during bootstrap.
  - 18-seaweedfs.yaml
  - 19-harbor.yaml
-  # bp-sandbox (slot 19a) — sandbox-controller Wave 1 (PR #1622) + Wave 8
-  # pty-server / MCP / NEWAPI runtime wiring. Reconciles
-  # `sandbox.openova.io/v1.Sandbox` CRs into per-Sandbox manifests
-  # written into the per-Org `catalyst-tenant` Gitea repo.
-  #
-  # Wave 11 convergence fix (2026-05-18, caught on t16.omantel.biz):
-  # originally slot 61 — moved here after bp-harbor (slot 19) because the
-  # post-handover cutover (slot 06a, Step-06 phase-1) rewrites every
-  # HelmRepository URL `oci://ghcr.io/openova-io` →
-  # `oci://harbor.<sov-fqdn>/openova-io`, and the bp-sandbox chart pull
-  # then hits harbor.<sov-fqdn> BEFORE bp-harbor is Ready — chicken-and-
-  # egg. Same failure shape as Wave 7 #1610 with bp-hcloud-csi (REMOVED,
-  # see the slot-17a comment block above) but resolved here by
-  # sequencing rather than removal so the slot remains available for
-  # the Wave 11 Sandbox MVP without manual Day-2 add-app re-introduction.
-  # HR's dependsOn pins ordering to AFTER bp-harbor + bp-vcluster-
-  # helmrepo + bp-catalyst-platform. Wave 11 convergence fix (TBD-D11,
-  # 2026-05-18): now gated default-ON via ${SANDBOX_ENABLED:-true} on
-  # the bootstrap-kit Kustomization substitute so the controller
-  # materialises on every fresh prov (Wave 8 pty-server + MCP images
-  # are SHA-stamped in chart values.yaml). Operators may opt-OUT via
-  # SANDBOX_ENABLED=false on the per-Sovereign overlay's substitute
-  # map. The chart's own values.enabled default remains false (defence
-  # in depth — a stale per-Sovereign overlay that hand-installs the
-  # HR without our envsubst layer still default-OFFs gracefully).
-  - 19a-bp-sandbox.yaml
  # 06a — Post-handover Self-Sovereignty Cutover (issue #791). Filename
  # carries the 06a prefix to colocate cohorts visually, but the slot's
  # dependsOn pins actual install order to AFTER bp-gitea (slot 10) and
@ -99,63 +59,6 @@ resources:
  # installs proxy → gateway.
  - 51-bp-k8s-ws-proxy.yaml
  - 52-bp-guacamole.yaml
-  # qa-loop iter-12 Fix #53C — EPIC-5 leftovers (NetBird zero-trust mesh
-  # + DMZ vCluster isolation). Slots 53/54. Both default-OFF; flip on
-  # via NETBIRD_ENABLED=true / DMZ_VCLUSTER_ENABLED=true on the
-  # bootstrap-kit Kustomization substitute.
-  #
-  # Slot 54 (bp-dmz-vcluster) implements docs/SOVEREIGN-MULTI-REGION-
-  # DOD.md A4 ("each region runs a DMZ vCluster") + A2 ("inter-region
-  # link = DMZ WireGuard over PUBLIC IPs"). Default-ON because the DMZ
-  # vCluster is the public-fronted vCluster AND the inter-region WG
-  # hop — every region needs it for the topology to converge.
-  - 54-bp-dmz-vcluster.yaml
-  # qa-loop iter-12 Fix #54 Workstream 1 — bp-hcloud-ccm (slot 55).
-  # Hetzner Cloud Controller Manager. The CCM owns node providerID
-  # flips (k3s://… → hcloud://<server-id>) AND materialisation of
-  # Service-of-type-LoadBalancer as Hetzner Cloud LBs. Without this,
-  # every LB-typed Service stays Pending — the proximate root cause
-  # clustermesh-apiserver could not migrate from NodePort to LB on
-  # omantel multi-region (qa-loop iter-12 Fix #53D).
-  - 55-bp-hcloud-ccm.yaml
-  # OpenovaFlow observability cohort — slots 56/57. Three-agent split
-  # (Agent #1: TS @openova/flow-core + @openova/flow-canvas, Agent #2:
-  # Go server + flux adapter, Agent #3: bootstrap-kit + catalyst-api
-  # proxy integration). Slot 56 (server) installs on PRIMARY clusters
-  # only; per-Sovereign overlay disables on secondaries. Slot 57
-  # (emitter) is a DaemonSet — runs on every cluster (mother + every
-  # Sovereign + every secondary region) so each region's Flux events
-  # land in the same per-deployment flow.
-  - 56-bp-openova-flow-server.yaml
-  - 57-bp-openova-flow-emitter.yaml
-  # DoD A4 vCluster topology (2026-05-16) — slots 58 + 59 finish the
-  # primary-mgmt + secondary-rtz pair that goes alongside the slot 54
-  # DMZ vCluster (every region). Combined topology per region:
-  #   primary    region → MGMT (58) + DMZ (54) vCluster
-  #   secondary  region → DMZ  (54) + RTZ (59) vCluster
-  # Slot 58 default-OFF until the per-CP postBuild substitute follow-up
-  # PR adds MGMT_VCLUSTER_ENABLED only on primary. Slot 59 same shape
-  # for secondaries via RTZ_VCLUSTER_ENABLED. See each slot's header
-  # comment for the migration plan.
-  - 58-bp-mgmt-vcluster.yaml
-  - 59-bp-rtz-vcluster.yaml
-  # bp-vcluster-helmrepo (slot 60) — pre-stages the upstream loft-sh
-  # vcluster Helm chart source so the Organization controller
-  # (core/controllers/organization) can render per-tenant
-  # `helm.toolkit.fluxcd.io/v2 HelmRelease` CRs whose `chart.spec.
-  # sourceRef` points at `name=loft, namespace=vcluster-system`.
-  # Convergence blocker #2 (vCluster source install on Sovereign).
-  # Different layer from slots 54/58/59 (those bundle loft-sh/vcluster
-  # 0.20.0 as a subchart for the Sovereign-tier DMZ/MGMT/RTZ vClusters;
-  # this slot registers a live Flux source so per-TENANT vClusters can
-  # be spawned by the Organization controller at runtime). Default-ON.
-  - 60-bp-vcluster-helmrepo.yaml
-  # bp-sandbox MOVED 2026-05-18 (Wave 11 convergence fix) from slot 61
-  # to slot 19a (above, after bp-harbor) to break a chicken-and-egg
-  # cycle with harbor.<sov-fqdn> chart pulls during bootstrap. See the
-  # slot-19a comment block + 19a-bp-sandbox.yaml header for full
-  # diagnostic chain. No functional difference for operators — the
-  # SANDBOX_ENABLED knob still gates rendering identically.
  # bp-newapi (slot 80) — multi-tenant LLM marketplace gateway. Sequenced
  # after the W2.K1 dependency wave (cnpg/keycloak/openbao Ready) so
  # NewAPI's ExternalSecret + DSN dependencies resolve on first reconcile.
--- a/clusters/_template/sovereign-tls/cilium-envoy-tls-restart-job.yaml
+++ b/clusters/_template/sovereign-tls/cilium-envoy-tls-restart-job.yaml
@ -1,283 +0,0 @@
-# cilium-envoy SDS hot-reload trigger.
-#
-# Root cause (qa-loop bounded-cycle Provision #7, ad2532a8):
-#
-# cilium-envoy DaemonSet starts as part of the bp-cilium HelmRelease,
-# which lands ~20 min before the wildcard cert backing the
-# `sovereign-wildcard-tls` Secret is issued by cert-manager (the cert
-# resource itself is in this same Kustomization, but the DNS-01 challenge
-# round-trip against the central PowerDNS adds 60-90s on top of the
-# Kustomization apply). Envoy's xDS subscription for the SDS Secret
-# observed the Secret was missing at startup, hit its initial-fetch
-# timeout, and marked the Gateway listener unready. cilium-envoy does
-# NOT re-subscribe after the Secret materialises — once the SDS bind
-# is in `error` state the listener stays down until the envoy process
-# restarts. Symptom: `console.<sov>` returns curl rc=000 (TLS handshake
-# failure: "no listener on this port") indefinitely after a fresh
-# provision, even though every HelmRelease reaches Ready.
-#
-# Affected: every fresh Sovereign provision will hit this until the
-# upstream cilium-envoy ships SDS hot-reload. Each one previously
-# required a manual `kubectl rollout restart ds/cilium-envoy` for the
-# console to come up — exactly the kind of out-of-band step that
-# violates the zero-touch-provision rule (memory entry
-# feedback_zero_touch_provision_no_questions.md).
-#
-# Fix path B from the brief: ship a Job in this same Kustomization that
-# (a) waits for the `sovereign-wildcard-tls` Secret to materialise with
-# a non-empty `tls.crt` field, then (b) bumps the cilium-envoy DaemonSet
-# pod template via `kubectl rollout restart`. The fresh envoy pods
-# subscribe to the now-existing Secret and the listener comes up cleanly
-# within ~30s of the cert appearing.
-#
-# Why this Kustomization (and not the bp-cilium chart):
-#   - The cert lifecycle and the restart trigger live together. When the
-#     cert resource is removed (multi-zone migration via #831), removing
-#     this Job is a single-PR delete.
-#   - bp-cilium chart has wide blast radius; bumping its OCI artifact for
-#     a Sovereign-bootstrap-only behaviour would force every cluster
-#     (including SME, contabo) to take the change.
-#   - The SDS Secret name + namespace are the contract between this
-#     Kustomization (cert producer) and cilium-envoy (cert consumer);
-#     keeping the restart trigger here keeps both ends of the contract
-#     in one file tree.
-#
-# Re-fire semantics:
-#   - The Job has a deterministic name (no generateName) so re-applying
-#     this Kustomization with the same revision is a no-op once the Job
-#     reached Complete.
-#   - On a chart-bump or re-provision (different revision), Kustomize
-#     re-creates the Job with the new generation; it runs again, re-
-#     bumps the DaemonSet, and exits — idempotent.
-#   - ttlSecondsAfterFinished cleans up the Pod/Job after 1h so kubectl
-#     output stays clean.
-#
-# Idempotency at the envoy side:
-#   - `kubectl rollout restart` patches a `kubectl.kubernetes.io/restartedAt`
-#     annotation on the pod template. If the cert was already present at
-#     envoy startup (steady-state cluster), the restart still happens but
-#     incurs only a ~10s data-plane blip; the bootstrap window is short
-#     enough that this is the expected hot-path on every fresh provision
-#     and a no-op-cost on every subsequent revision.
-
---
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: cilium-envoy-tls-restart
-  namespace: kube-system
-  labels:
-    catalyst.openova.io/sovereign: ${SOVEREIGN_FQDN}
-    catalyst.openova.io/component: cilium-envoy-tls-restart
---
-# Namespaced Role — only needs to (a) read the Secret to detect
-# materialisation and (b) patch the DaemonSet pod template to trigger
-# rollout. Both verbs scoped to kube-system, both resources scoped to
-# the exact resource names so this SA cannot be repurposed to mutate
-# other workloads.
-#
-# Note: `patch` (not `update`) on daemonsets is sufficient for
-# `kubectl rollout restart` because that command issues a strategic-
-# merge patch on `.spec.template.metadata.annotations`. Verified
-# against kubectl 1.31.4 source (pkg/cmd/rollout/rollout_restart.go).
-apiVersion: rbac.authorization.k8s.io/v1
-kind: Role
-metadata:
-  name: cilium-envoy-tls-restart
-  namespace: kube-system
-  labels:
-    catalyst.openova.io/sovereign: ${SOVEREIGN_FQDN}
-    catalyst.openova.io/component: cilium-envoy-tls-restart
-rules:
-  - apiGroups: [""]
-    resources: ["secrets"]
-    # 2026-05-17 t143 dual-cert collision cleanup: the per-zone Secret
-    # the Cilium Gateway now references is named
-    # `sovereign-wildcard-tls-${SOVEREIGN_FQDN_DASHED}`
-    # (see clusters/_template/sovereign-tls/cilium-gateway.yaml:44 +
-    #  clusters/_template/sovereign-tls/cilium-gateway-cert.yaml). The
-    # legacy `sovereign-wildcard-tls` (no dashed suffix) is no longer
-    # produced anywhere — drop it from the resourceNames allowlist so
-    # this Role grants the minimum needed for the live Secret name.
-    resourceNames: ["sovereign-wildcard-tls-${SOVEREIGN_FQDN_DASHED}"]
-    verbs: ["get", "watch", "list"]
-  - apiGroups: ["apps"]
-    resources: ["daemonsets"]
-    resourceNames: ["cilium-envoy"]
-    verbs: ["get", "patch", "list", "watch"]
-  # ALSO patch the cilium-operator Deployment. Reason: on a fresh
-  # Sovereign, cilium-operator's first CEC reconciliation produces a
-  # CiliumEnvoyConfig WITHOUT the hostNetwork bind `additionalAddresses.
-  # socketAddress: 0.0.0.0:30443` — even though `gateway-api-hostnetwork-
-  # enabled=true` and `gateway-api-hostnetwork-nodelabelselector=kubernetes.io/os=linux`
-  # are correctly set in cilium-config. After an operator pod-restart
-  # the next CEC reconcile DOES populate the bind, and cilium-envoy
-  # binds host:30443 cleanly. Without this restart, Hetzner LB targets
-  # stay `unhealthy` on 30080/30443 forever and console.<sov-fqdn>
-  # never serves. Caught live on prov 492c81e2 (omantel.biz, 2026-05-15)
-  # plus every prior multi-region prov where operators were doing the
-  # restart manually. This rule lets the Job fix it without operator
-  # intervention.
-  - apiGroups: ["apps"]
-    resources: ["deployments"]
-    resourceNames: ["cilium-operator"]
-    verbs: ["get", "patch", "list", "watch"]
-  # Read rollout status so the Job can wait for new pods to come up
-  # before exiting. `kubectl rollout status` does NOT just GET — it
-  # uses client-go informerwatcher to LIST+WATCH the
-  # Deployment/DaemonSet resource. Without list+watch verbs the
-  # informer fails with "forbidden: cannot list resource ..." and the
-  # Job stalls at the rollout-status check until activeDeadlineSeconds.
-  # Caught on prov t110.omani.works (fe09897a1b6b3c1d, 2026-05-15):
-  # tls-restart Job stuck Running 10m+ on the cilium-operator rollout
-  # check, never restarted cilium-envoy, console.<fqdn> never served.
---
-apiVersion: rbac.authorization.k8s.io/v1
-kind: RoleBinding
-metadata:
-  name: cilium-envoy-tls-restart
-  namespace: kube-system
-  labels:
-    catalyst.openova.io/sovereign: ${SOVEREIGN_FQDN}
-    catalyst.openova.io/component: cilium-envoy-tls-restart
-roleRef:
-  apiGroup: rbac.authorization.k8s.io
-  kind: Role
-  name: cilium-envoy-tls-restart
-subjects:
-  - kind: ServiceAccount
-    name: cilium-envoy-tls-restart
-    namespace: kube-system
---
-apiVersion: batch/v1
-kind: Job
-metadata:
-  name: cilium-envoy-tls-restart
-  namespace: kube-system
-  labels:
-    catalyst.openova.io/sovereign: ${SOVEREIGN_FQDN}
-    catalyst.openova.io/component: cilium-envoy-tls-restart
-spec:
-  # backoffLimit 6 with a default backoff of 10s..6m == ≈10 min of total
-  # retry headroom. The cert provisioning DNS-01 round-trip is the only
-  # thing this Job waits on, and that completes ≤90s in steady-state. A
-  # higher limit absorbs the rare PowerDNS propagation flake.
-  backoffLimit: 6
-  # Clean up Job + Pod 1h after success so kubectl get jobs stays sane.
-  ttlSecondsAfterFinished: 3600
-  # 15 min hard cap. If the cert hasn't arrived in 15 min, something is
-  # broken upstream (cert-manager, PowerDNS webhook, ACME) and a Job
-  # restart loop won't fix it; surface the failure to Flux so the
-  # operator sees Kustomization NotReady.
-  activeDeadlineSeconds: 900
-  template:
-    metadata:
-      labels:
-        app.kubernetes.io/name: cilium-envoy-tls-restart
-        catalyst.openova.io/sovereign: ${SOVEREIGN_FQDN}
-        catalyst.openova.io/component: cilium-envoy-tls-restart
-    spec:
-      serviceAccountName: cilium-envoy-tls-restart
-      restartPolicy: OnFailure
-      # alpine/k8s:1.31.4 — canonical kubectl image used across the
-      # Catalyst fleet (self-sovereign-cutover, seaweedfs, harbor have
-      # all converged on this after Bitnami deprecated public Docker
-      # Hub in 2025). Ships kubectl + sh + standard busybox toolchain.
-      containers:
-        - name: wait-and-restart
-          image: alpine/k8s:1.31.4
-          imagePullPolicy: IfNotPresent
-          resources:
-            requests:
-              cpu: 10m
-              memory: 32Mi
-            limits:
-              cpu: 200m
-              memory: 128Mi
-          securityContext:
-            allowPrivilegeEscalation: false
-            readOnlyRootFilesystem: true
-            runAsNonRoot: true
-            runAsUser: 65532
-            capabilities:
-              drop: ["ALL"]
-          command:
-            - /bin/sh
-            - -c
-            # NOTE: Flux postBuild.substitute processes ${...} in this
-            # YAML BEFORE it lands as a Job. Bash variable references
-            # below MUST be escaped as $${...} so Flux emits a literal
-            # ${...} that bash then evaluates at Job runtime. Without
-            # the escape, Flux replaces $${SECRET_NS} (etc.) with an
-            # empty string because those names aren't in
-            # substituteFrom, and the Job ends up running
-            # `kubectl get secret -n "" ""` forever (caught live on
-            # prov c9df5eed1c1ba6cf, t101.omani.works, 2026-05-15).
-            - |
-              set -eu
-
-              SECRET_NS=kube-system
-              # 2026-05-17 t143 dual-cert collision cleanup: the canonical
-              # SDS Secret the Cilium Gateway now references is the
-              # per-zone `sovereign-wildcard-tls-${SOVEREIGN_FQDN_DASHED}`.
-              # Cloud-init substitutes SOVEREIGN_FQDN_DASHED via Flux
-              # postBuild.substitute, so the literal cluster value lands
-              # here at apply time (verified in
-              # infra/hetzner/cloudinit-control-plane.tftpl §SOVEREIGN_FQDN_DASHED).
-              SECRET_NAME=sovereign-wildcard-tls-${SOVEREIGN_FQDN_DASHED}
-              DS_NS=kube-system
-              DS_NAME=cilium-envoy
-
-              echo "[tls-restart] waiting for $${SECRET_NS}/$${SECRET_NAME} with non-empty tls.crt"
-              # Poll up to ~14 min (84 * 10s); activeDeadlineSeconds=900
-              # is the outer hard limit. We treat "not yet" and "empty
-              # tls.crt" the same — both mean the cert hasn't been issued.
-              for i in $(seq 1 84); do
-                # `--ignore-not-found` so the early polls (Secret not
-                # created yet) don't error — kubectl returns empty
-                # output and the for-loop continues.
-                tls_crt=$(kubectl get secret -n "$${SECRET_NS}" "$${SECRET_NAME}" \
-                  --ignore-not-found \
-                  -o jsonpath='{.data.tls\.crt}' 2>/dev/null || true)
-                if [ -n "$${tls_crt}" ]; then
-                  echo "[tls-restart] $${SECRET_NAME} present with non-empty tls.crt (attempt $${i})"
-                  break
-                fi
-                if [ "$${i}" = "84" ]; then
-                  echo "[tls-restart] FATAL: $${SECRET_NAME} did not become non-empty after 14m" >&2
-                  exit 1
-                fi
-                sleep 10
-              done
-
-              # Step 1 — restart cilium-operator FIRST so it regenerates the
-              # CiliumEnvoyConfig with the hostNetwork bind
-              # `additionalAddresses.socketAddress: 0.0.0.0:30443`. On a
-              # fresh Sovereign, the FIRST CEC reconcile (driven by
-              # Gateway create) misses this bind even though the
-              # configmap keys gateway-api-hostnetwork-{enabled,
-              # nodelabelselector} are correct. An operator pod-restart
-              # forces a fresh CEC render that includes the bind. Without
-              # this, cilium-envoy is restarted in step 2 but binds only
-              # 127.0.0.1:* control sockets — host:30443 stays empty,
-              # Hetzner LB targets stay unhealthy, console returns 000.
-              echo "[tls-restart] bumping deploy/cilium-operator to regenerate CEC with hostNetwork bind"
-              kubectl rollout restart -n "$${DS_NS}" deploy/cilium-operator
-              echo "[tls-restart] waiting for deploy/cilium-operator rollout"
-              kubectl rollout status -n "$${DS_NS}" deploy/cilium-operator --timeout=3m
-
-              # Step 2 — restart cilium-envoy so it picks up (a) the
-              # freshly-regenerated CEC with the hostNetwork bind, AND
-              # (b) the now-existing sovereign-wildcard-tls SDS Secret.
-              echo "[tls-restart] bumping ds/$${DS_NAME} in $${DS_NS} to force envoy SDS re-subscribe + CEC reload"
-              kubectl rollout restart -n "$${DS_NS}" "ds/$${DS_NAME}"
-
-              # Block until the new pods are Ready. `kubectl rollout
-              # status` exits 0 on full rollout, non-zero on timeout.
-              # Bound to 5 min — a single-node k3s rolls 1 pod, a 3-node
-              # HA cluster rolls 3 in parallel; both finish ≤90s in
-              # practice.
-              echo "[tls-restart] waiting for ds/$${DS_NAME} rollout"
-              kubectl rollout status -n "$${DS_NS}" "ds/$${DS_NAME}" --timeout=5m
-
-              echo "[tls-restart] complete — cilium-operator regenerated hostNetwork CEC + cilium-envoy serves SDS Secret $${SECRET_NAME}"
--- a/clusters/_template/sovereign-tls/cilium-gateway-cert.yaml
+++ b/clusters/_template/sovereign-tls/cilium-gateway-cert.yaml
@ -1,87 +1,68 @@
-# Per-name TLS Certificates for the Cilium Gateway listeners.
+# Wildcard TLS Certificate for the Cilium Gateway listener.
 #
-# Architecture change (2026-05-15): switched from ONE wildcard cert
-# `*.<sovereignFQDN>` to N per-name certs (console, auth, gitea, harbor,
-# registry, api, bao, grafana, hubble, openova-flow, pdns, marketplace).
+# Split from clusters/_template/bootstrap-kit/01-cilium.yaml in
+# fix/cilium-cert-split-from-bootstrap-kit (Phase-8a bug #13). The
+# Cert lives in its OWN Flux Kustomization (`sovereign-tls`) which
+# depends on bootstrap-kit being Ready — i.e. cert-manager + the
+# powerdns-webhook are both installed and their CRDs registered.
 #
-# Why: Let's Encrypt enforces "5 New Certificates per Exact Set of
-# Identifiers per 168h". The wildcard pattern bundled ALL hostnames
-# under ONE identifier set `[*.<fqdn>, <fqdn>]` — every prov-cycle
-# burned 1 of 5 slots from that single bucket. Five iterations on the
-# same FQDN locked the apex for a week. Hit live three times:
-#   - omantel.biz exhausted 2026-05-13 (12 reprovs)
-#   - omani.works exhausted 2026-05-14 (5 reprovs in 90 min)
-#   - omantel.biz exhausted again 2026-05-15 (this PR's origin)
+# Without this split, Flux's server-side dry-run on the bootstrap-kit
+# Kustomization fails with `no matches for kind "Certificate" in
+# version "cert-manager.io/v1"` because the validation runs BEFORE any
+# HelmRelease has installed the cert-manager CRDs — and a single
+# dry-run failure aborts the entire Kustomization apply, leaving the
+# Sovereign with zero HRs reconciled.
 #
-# Per-name model gives each hostname its OWN 5/168h bucket:
-#   - console.<fqdn>  → 5 reprovs/week
-#   - auth.<fqdn>     → 5 reprovs/week
-#   - gitea.<fqdn>    → 5 reprovs/week
-#   ... × 12 hostnames = 60 effective reprov-slots/week
+# The Gateway resource stays in 01-cilium.yaml: Gateway.networking.k8s.io
+# CRDs ship with Cilium itself (gatewayAPI.enabled=true) and dry-run
+# against them only requires the Gateway API CRD bundle which Cilium
+# pre-installs at chart-time. The Certificate is the ONLY resource
+# whose CRD is provided by a HelmRelease in the same Kustomization
+# that needs to validate it.
 #
-# 2026-05-17 t143 dual-cert collision cleanup
-# -------------------------------------------
-# Previously this Certificate was named `sovereign-wildcard-tls` and
-# wrote a Secret of the same name. After PR O (2026-05-17) moved the
-# Cilium Gateway listener's certificateRefs to the per-zone Secret
-# `sovereign-wildcard-tls-${SOVEREIGN_FQDN_DASHED}` (see
-# clusters/_template/sovereign-tls/cilium-gateway.yaml:44), the legacy
-# Secret stopped being referenced by anything — but the Certificate
-# kept renewing, burning LE budget for no production value and showing
-# up in audits as an orphan TLS Secret on every Sovereign.
+# Issuer: `letsencrypt-dns01-prod-powerdns` is shipped by
+# bp-cert-manager-powerdns-webhook (bootstrap-kit slot 49). It writes
+# the ACME challenge TXT record to contabo's central PowerDNS at
+# https://pdns.openova.io (authoritative for omani.works) so Let's
+# Encrypt validation succeeds even before the Sovereign's own NS
+# delegation has propagated. Replaces the previous letsencrypt-dns01-prod
+# (dynadot-webhook-backed) — Dynadot is not the API-level authority for
+# omani.works subdomains. Caught live on otech43–46.
 #
-# Single-source-of-truth fix: this Certificate now writes to the SAME
-# dashed-suffix Secret the Gateway already references. One Cert, one
-# Secret, one LE issuance per renewal. No more dual-cert collision
-# and no extra LE budget consumed.
+# ──────────────────────────────────────────────────────────────────────────
+# Multi-zone Sovereign (issue #827, parent epic #825) coexistence note
+# ──────────────────────────────────────────────────────────────────────────
+# bp-catalyst-platform 1.4.0+ ships templates/sovereign-wildcard-certs.yaml
+# which renders one Certificate PER ENTRY in `.Values.parentZones`, each
+# named `sovereign-wildcard-tls-<sanitised-zone>` (e.g.
+# `sovereign-wildcard-tls-omani-trade`). Those resource names are DISTINCT
+# from this file's `sovereign-wildcard-tls` so the two paths never collide:
+#   - Single-zone Sovereigns (parentZones empty) — this file owns the only
+#     wildcard cert.
+#   - Multi-zone Sovereigns (parentZones populated) — this file STILL owns
+#     `sovereign-wildcard-tls` (covering the operator's primary parent
+#     zone) AND the chart adds N additional zone-specific certs. The
+#     Cilium Gateway listener is updated in the per-cluster overlay to
+#     reference the appropriate Secret per zone listener.
 #
-# This pattern is the standard production approach (see Cloudflare,
-# Vercel, Render). Wildcards are reserved for the limited cases where
-# customer-provided subdomains aren't known in advance.
+# Once issue #831 lands a multi-listener Gateway template in
+# bp-catalyst-platform itself, this file becomes redundant and is
+# deletable.

 apiVersion: cert-manager.io/v1
 kind: Certificate
 metadata:
-  # Match the Secret name the Gateway listener references
-  # (clusters/_template/sovereign-tls/cilium-gateway.yaml:44). Cloud-init
-  # substitutes SOVEREIGN_FQDN_DASHED = SOVEREIGN_FQDN with `.` → `-`
-  # (infra/hetzner/cloudinit-control-plane.tftpl §SOVEREIGN_FQDN_DASHED).
-  name: sovereign-wildcard-tls-${SOVEREIGN_FQDN_DASHED}
+  name: sovereign-wildcard-tls
  namespace: kube-system
  labels:
    catalyst.openova.io/sovereign: ${SOVEREIGN_FQDN}
    catalyst.openova.io/component: cilium-gateway
 spec:
-  secretName: sovereign-wildcard-tls-${SOVEREIGN_FQDN_DASHED}
+  secretName: sovereign-wildcard-tls
  issuerRef:
-    name: ${WILDCARD_CERT_ISSUER}
+    name: letsencrypt-dns01-prod-powerdns
    kind: ClusterIssuer
-  commonName: "console.${SOVEREIGN_FQDN}"
-  # SubjectAltNames — explicit list of Sovereign-facing surfaces. New
-  # services added to bootstrap-kit MUST be added here so the cert
-  # covers them at issuance time. Order is preserved in the cert; for
-  # cosmetic reasons the operator-facing names come first.
+  commonName: "*.${SOVEREIGN_FQDN}"
  dnsNames:
-    - "console.${SOVEREIGN_FQDN}"
-    - "auth.${SOVEREIGN_FQDN}"
-    - "gitea.${SOVEREIGN_FQDN}"
-    - "registry.${SOVEREIGN_FQDN}"
-    - "api.${SOVEREIGN_FQDN}"
-    - "bao.${SOVEREIGN_FQDN}"
-    - "grafana.${SOVEREIGN_FQDN}"
-    - "hubble.${SOVEREIGN_FQDN}"
-    - "pdns.${SOVEREIGN_FQDN}"
-    - "openova-flow.${SOVEREIGN_FQDN}"
-    - "guacamole.${SOVEREIGN_FQDN}"
-    - "marketplace.${SOVEREIGN_FQDN}"
-    # sandbox.<sov-fqdn> — public URL for the per-Sandbox pty-server (PR #1641).
-    # The sandbox-controller renders an HTTPRoute on sandbox.<sov-fqdn>/
-    # sessions/<owner-uid>/* attached to the cilium-gateway. The wildcard
-    # `*.<sov-fqdn>` Gateway listener already matches the hostname, but the
-    # per-name SAN cert here must include `sandbox.<sov-fqdn>` for
-    # cilium-envoy to serve the right cert (otherwise browsers see
-    # NET::ERR_CERT_COMMON_NAME_INVALID). Matches the entry in
-    # products/catalyst/bootstrap/api/internal/handler/sovereign_dns_records.go
-    # CanonicalSovereignSubdomains so the parent-zone A-record set + cert
-    # SAN list stay aligned.
-    - "sandbox.${SOVEREIGN_FQDN}"
+    - "*.${SOVEREIGN_FQDN}"
+    - "${SOVEREIGN_FQDN}"
--- a/clusters/_template/sovereign-tls/cilium-gateway.yaml
+++ b/clusters/_template/sovereign-tls/cilium-gateway.yaml
@ -4,79 +4,6 @@
 # whole Kustomization before applying any HR, so Gateway dry-run fails on
 # a fresh cluster. The sovereign-tls Kustomization dependsOn bootstrap-kit
 # Ready, so by the time Gateway is applied here, Cilium has installed.
-#
-# Multi-zone listeners (issue #831, parent epic #827)
-# ---------------------------------------------------
-# Before this change the Gateway declared a single listener pair (HTTPS +
-# HTTP) on `*.${SOVEREIGN_FQDN}`. That worked for legacy single-zone
-# Sovereigns but BROKE every tenant URL under a non-primary parent zone:
-#   - Primary zone: omani.works   → console.omani.works  ✅ TLS terminates
-#   - SME pool:    omani.homes    → wp-foo.omani.homes  ❌ TLS handshake
-#                                                       mismatch (cert
-#                                                       exists per chart's
-#                                                       sovereign-wildcard-
-#                                                       certs.yaml but no
-#                                                       Gateway listener
-#                                                       claims the
-#                                                       hostname).
-# Symptom: cilium-envoy serves the default fallback cert, browser shows
-# NET::ERR_CERT_COMMON_NAME_INVALID, marketplace WordPress tenants on
-# omani.homes are unreachable.
-#
-# Fix: render one listener pair per parent zone. The listener block is
-# materialised at Terraform plan time (infra/hetzner/main.tf
-# locals.parent_domains_listeners_yaml — jsonencode of the listener
-# objects), threaded through Flux postBuild.substitute as
-# ${PARENT_DOMAINS_LISTENERS_YAML}, and consumed BELOW as a YAML inline-
-# flow array value on `spec.listeners`. Each pair's certificateRefs
-# target the per-zone Secret rendered by products/catalyst/chart/
-# templates/sovereign-wildcard-certs.yaml (PR #827) so the Gateway
-# listener and the cert resource are always in lockstep.
-#
-# Why a scalar placeholder, not a multi-line block:
-#   - kustomize-build PARSES the YAML before Flux runs envsubst. A
-#     placeholder on its own line at column 0 is rejected by the YAML
-#     parser ("could not find expected ':'"), and kustomize fails. A
-#     scalar like `listeners: ${VAR}` parses cleanly — kustomize sees
-#     the value as the literal string `${VAR}` and emits it unchanged.
-#     Flux's envsubst step then swaps it for the JSON-flow array string
-#     `[{name: https-omani-works, ...}, ...]`, which the apiserver
-#     parses as the real listener list.
-#
-# Why not a Helm template here: the Cilium Gateway resource lives in the
-# Kustomize-managed sovereign-tls path (not the chart) because its
-# Kustomization dependsOn bootstrap-kit Ready — i.e. it lands BEFORE
-# bp-catalyst-platform reconciles. Moving it into the chart would invert
-# the ordering and produce a transient "no Gateway → no envoy listener →
-# console unreachable" gap during every Helm upgrade. envsubst-driven
-# pre-rendered YAML is the canonical pattern for this slot.
-#
-# Listener naming convention (t20 critical fix #3):
-#   - SINGLE parent zone (the common case) → bare names `https` /
-#     `http`. Every platform chart's HTTPRoute (harbor, keycloak,
-#     grafana, gitea, openbao, powerdns, stalwart-tenant) hardcodes
-#     `parentRefs[0].sectionName: https`. If we rename the listener to
-#     `https-<sanitised-zone>` for a single-zone Sovereign, every
-#     HTTPRoute reports `Accepted=False NoMatchingListener` and the
-#     Sovereign Console / Harbor / Keycloak etc. are unreachable at
-#     the Gateway. Keeping bare names for the single-zone case is the
-#     safer rollback. (Was broken between PR #1640 and the t20 fix.)
-#   - MULTIPLE parent zones → unique names `https-<sanitised-zone>` /
-#     `http-<sanitised-zone>` where sanitised-zone = zone name with
-#     '.' → '-' (e.g. omani-works, omani-homes). Distinct names per
-#     listener so the Gateway controller programs them all (duplicate
-#     `name: https` produces a Conflicting status condition and skips
-#     all but the first). For multi-zone Sovereigns whose HTTPRoutes
-#     must attach under a non-primary zone, override `sectionName` via
-#     values.yaml at the chart level.
-#   - The certificateRefs.name is ALWAYS the per-zone
-#     `sovereign-wildcard-tls-<sanitised-zone>` (see
-#     products/catalyst/chart/templates/sovereign-wildcard-certs.yaml)
-#     — independent of the listener-name choice above.
-#
-# The listener block is rendered by infra/hetzner/main.tf locals.
-# parent_domains_listeners_yaml using local.parent_domains_single_zone
-# to switch between the two naming schemes.

 apiVersion: gateway.networking.k8s.io/v1
 kind: Gateway
@ -105,4 +32,23 @@ spec:
  #
  # See infra/hetzner/main.tf hcloud_load_balancer_service.{http,https}
  # destination_port settings — they MUST match these listener ports.
-  listeners: ${PARENT_DOMAINS_LISTENERS_YAML}
+  listeners:
+    - name: https
+      port: 30443
+      protocol: HTTPS
+      hostname: "*.${SOVEREIGN_FQDN}"
+      tls:
+        mode: Terminate
+        certificateRefs:
+          - kind: Secret
+            name: sovereign-wildcard-tls
+      allowedRoutes:
+        namespaces:
+          from: All
+    - name: http
+      port: 30080
+      protocol: HTTP
+      hostname: "*.${SOVEREIGN_FQDN}"
+      allowedRoutes:
+        namespaces:
+          from: All
--- a/clusters/_template/sovereign-tls/kustomization.yaml
+++ b/clusters/_template/sovereign-tls/kustomization.yaml
@ -3,18 +3,3 @@ kind: Kustomization
 resources:
  - cilium-gateway-cert.yaml
  - cilium-gateway.yaml
-  # Watch+rollout-restart Job for cilium-envoy. cilium-envoy's xDS SDS
-  # subscription does NOT recover after the initial-fetch timeout, so a
-  # fresh Sovereign whose envoy started before the wildcard cert was
-  # issued serves no listener forever. This Job waits for the Secret
-  # then bumps the DaemonSet, restoring the listener within ≤90s of
-  # the cert appearing. See file header for full root cause + design
-  # rationale (qa-loop bounded-cycle Provision #7).
-  - cilium-envoy-tls-restart-job.yaml
-  # C7-007 (2026-05-17 t143) — one-shot cleanup of the pre-PR-O legacy
-  # `sovereign-wildcard-tls` Certificate + Secret pair. Idempotent
-  # (`--ignore-not-found`), runs once per Flux reconciliation
-  # generation. Fresh Sovereigns succeed as a no-op; pre-PR-O
-  # Sovereigns delete the orphan resources. Removable from the list
-  # once every live prov has reconciled past it.
-  - legacy-cert-cleanup-job.yaml
--- a/clusters/_template/sovereign-tls/legacy-cert-cleanup-job.yaml
+++ b/clusters/_template/sovereign-tls/legacy-cert-cleanup-job.yaml
@ -1,151 +0,0 @@
-# C7-007 (2026-05-17 t143) — one-shot cleanup Job for the legacy
-# `sovereign-wildcard-tls` Certificate + Secret pair.
-#
-# Background
-# ----------
-# Pre-PR-O Sovereigns rendered a Certificate named `sovereign-wildcard-tls`
-# (with a Secret of the same name) AND, after PR O moved the Cilium
-# Gateway listener to the per-zone `sovereign-wildcard-tls-${SOVEREIGN_FQDN_DASHED}`
-# Secret, the legacy Certificate kept renewing on cert-manager's
-# default schedule. Result: every audit on a pre-PR-O Sovereign showed
-# an orphan TLS Secret in kube-system, cert-manager wasted LE budget
-# renewing a Secret nothing consumed, and operators had to remember to
-# `kubectl delete` it after every Flux reconciliation re-asserted the
-# legacy resource (which it no longer does — PR O's `cilium-gateway-cert.yaml`
-# now produces ONLY the dashed-suffix shape).
-#
-# What this Job does
-# ------------------
-# Idempotent delete of:
-#   1. `kube-system/sovereign-wildcard-tls` Certificate (cert-manager.io/v1)
-#   2. `kube-system/sovereign-wildcard-tls` Secret (kubernetes.io/tls)
-#
-# Each delete is `--ignore-not-found` so a fresh Sovereign that never
-# carried the legacy shape reports "no-op" and Succeeds. The Job runs
-# ONCE per Flux reconciliation generation (the helm.sh/hook
-# annotations on the bp-self-sovereign-cutover chart aren't applicable
-# here because this lives in the per-Sovereign overlay, not a Helm
-# chart — Flux's Kustomization re-applies idempotently).
-#
-# Image
-# -----
-# Uses the canonical OpenOva-mirrored alpine/k8s image (mothership
-# Harbor proxy-cache for Docker Hub, per CLAUDE.md mirror rule).
-# Bitnami/kubectl was deprecated 2025-08; alpine/k8s is the standard
-# replacement (see platform/self-sovereign-cutover/chart/values.yaml:252
-# for the canonical reasoning, captured live on otech103 2026-05-04).
-#
-# Why a Job and not a Helm hook
-# -----------------------------
-# This file lives in `clusters/_template/sovereign-tls/` — a per-Sovereign
-# Kustomize overlay reconciled by Flux, NOT a Helm chart. Helm hooks
-# require a HelmRelease container; this is a single one-shot K8s Job.
-# Flux's Kustomization reconciliation drives idempotent re-apply.
-#
-# Removal plan
-# ------------
-# Once every live Sovereign has reconciled past this Job (verified via
-# `kubectl get jobs -n kube-system | grep legacy-cert-cleanup` showing
-# Complete on every prov), this file may be deleted from
-# clusters/_template/sovereign-tls/kustomization.yaml.
-
---
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: legacy-cert-cleanup
-  namespace: kube-system
-  labels:
-    catalyst.openova.io/component: legacy-cert-cleanup
-    catalyst.openova.io/sovereign: ${SOVEREIGN_FQDN}
---
-apiVersion: rbac.authorization.k8s.io/v1
-kind: Role
-metadata:
-  name: legacy-cert-cleanup
-  namespace: kube-system
-  labels:
-    catalyst.openova.io/component: legacy-cert-cleanup
-rules:
-  # Legacy Secret to delete. Only the specific name — RBAC stays
-  # least-privilege.
-  - apiGroups: [""]
-    resources: ["secrets"]
-    resourceNames: ["sovereign-wildcard-tls"]
-    verbs: ["get", "delete"]
-  # cert-manager Certificate to delete. Only the specific name.
-  - apiGroups: ["cert-manager.io"]
-    resources: ["certificates"]
-    resourceNames: ["sovereign-wildcard-tls"]
-    verbs: ["get", "delete"]
---
-apiVersion: rbac.authorization.k8s.io/v1
-kind: RoleBinding
-metadata:
-  name: legacy-cert-cleanup
-  namespace: kube-system
-  labels:
-    catalyst.openova.io/component: legacy-cert-cleanup
-roleRef:
-  apiGroup: rbac.authorization.k8s.io
-  kind: Role
-  name: legacy-cert-cleanup
-subjects:
-  - kind: ServiceAccount
-    name: legacy-cert-cleanup
-    namespace: kube-system
---
-apiVersion: batch/v1
-kind: Job
-metadata:
-  name: legacy-cert-cleanup
-  namespace: kube-system
-  labels:
-    catalyst.openova.io/component: legacy-cert-cleanup
-    catalyst.openova.io/sovereign: ${SOVEREIGN_FQDN}
-spec:
-  # Keep the Job around 5 minutes after completion so an operator can
-  # `kubectl logs job/legacy-cert-cleanup -n kube-system` to confirm
-  # what was (or wasn't) cleaned up. After TTL the GC reclaims.
-  ttlSecondsAfterFinished: 300
-  backoffLimit: 2
-  template:
-    metadata:
-      labels:
-        catalyst.openova.io/component: legacy-cert-cleanup
-    spec:
-      serviceAccountName: legacy-cert-cleanup
-      restartPolicy: OnFailure
-      containers:
-        - name: cleanup
-          # Pinned via Harbor proxy-cache. See CLAUDE.md mirror-everything
-          # rule + values.yaml:252 in self-sovereign-cutover for the
-          # Bitnami→alpine/k8s decision history.
-          image: harbor.openova.io/proxy-dockerhub/alpine/k8s:1.31.1
-          imagePullPolicy: IfNotPresent
-          command: ["/bin/sh", "-c"]
-          args:
-            - |
-              set -eu
-              echo "[legacy-cert-cleanup] starting on ${SOVEREIGN_FQDN}"
-              # The dashed-suffix Secret (the live one PR O introduced)
-              # MUST remain — only delete the bare-name legacy pair.
-              echo "[legacy-cert-cleanup] removing legacy Certificate sovereign-wildcard-tls"
-              kubectl -n kube-system delete certificate.cert-manager.io sovereign-wildcard-tls --ignore-not-found=true --wait=false
-              echo "[legacy-cert-cleanup] removing legacy Secret sovereign-wildcard-tls"
-              kubectl -n kube-system delete secret sovereign-wildcard-tls --ignore-not-found=true --wait=false
-              echo "[legacy-cert-cleanup] complete"
-          securityContext:
-            allowPrivilegeEscalation: false
-            readOnlyRootFilesystem: true
-            runAsNonRoot: true
-            runAsUser: 65532
-            capabilities:
-              drop: ["ALL"]
-          resources:
-            requests:
-              cpu: "10m"
-              memory: "32Mi"
-            limits:
-              cpu: "100m"
-              memory: "64Mi"
--- a/clusters/omantel.omani.works/bootstrap-kit/01-cilium.yaml
+++ b/clusters/omantel.omani.works/bootstrap-kit/01-cilium.yaml
@ -36,12 +36,11 @@ spec:
  chart:
    spec:
      chart: bp-cilium
-      # 1.3.1 (qa-loop iter-12 Fix #54 Workstream 2): bpf.preallocateMaps=true
-      # + socketLB.hostNamespaceOnly=true defaults so fresh worker pods on
-      # catalyst-omantel-biz-w2/w3 can resolve DNS on first-join.
-      # 1.3.0 (qa-loop iter-12 Fix #53C): Hubble UI HTTPRoute overlay +
-      # Cilium ClusterMesh LoadBalancer-typed Service shape.
-      version: 1.3.2
+      # 1.3.0 (qa-loop iter-12 Fix #53C): Hubble UI HTTPRoute overlay
+      # (slice H7 #1095) that the catalystOverlay.hubbleUI block depends
+      # on; +Cilium ClusterMesh values shape (LoadBalancer-typed Service
+      # for cross-region peering per Fix #53D).
+      version: 1.3.0
      sourceRef:
        kind: HelmRepository
        name: bp-cilium
--- a/clusters/omantel.omani.works/bootstrap-kit/02-cert-manager.yaml
+++ b/clusters/omantel.omani.works/bootstrap-kit/02-cert-manager.yaml
@ -38,7 +38,7 @@ spec:
  chart:
    spec:
      chart: bp-cert-manager
-      version: 1.2.0
+      version: 1.1.1
      sourceRef:
        kind: HelmRepository
        name: bp-cert-manager
--- a/clusters/omantel.omani.works/bootstrap-kit/09-keycloak.yaml
+++ b/clusters/omantel.omani.works/bootstrap-kit/09-keycloak.yaml
@ -38,7 +38,7 @@ spec:
  chart:
    spec:
      chart: bp-keycloak
-      version: 1.4.1
+      version: 1.3.0
      sourceRef:
        kind: HelmRepository
        name: bp-keycloak
@ -58,17 +58,10 @@ spec:
    timeout: 15m
    remediation:
      retries: 3
-  # Per-Sovereign overrides — issue #387 + #604 + qa-loop iter-12:
+  # Per-Sovereign overrides — issue #387 + #604:
  # Wire the per-Sovereign hostname into the HTTPRoute template and
  # sovereign realm ConfigMap (catalyst-ui redirect URIs).
-  # sovereignRealm.name: per `feedback_no_mvp_no_workarounds.md` target-state
-  # rule, each Sovereign owns its KC realm named after the tenant short-name.
-  # Matrix tests (TC-124, TC-125, TC-159, TC-160, TC-161, TC-176, TC-190,
-  # TC-285) assert paths like `/admin/realms/omantel/...`.
  values:
    sovereignFQDN: omantel.omani.works
-    sovereignRealm:
-      name: omantel
-      displayName: "Omantel Sovereign"
    gateway:
      host: auth.omantel.omani.works
--- a/clusters/omantel.omani.works/bootstrap-kit/18-seaweedfs.yaml
+++ b/clusters/omantel.omani.works/bootstrap-kit/18-seaweedfs.yaml
@ -55,9 +55,7 @@ spec:
  chart:
    spec:
      chart: bp-seaweedfs
-      # 1.2.0 — qa-loop Wave 5 Fix #79 Gap B: chart-rendered
-      # `seaweedfs-storage` StorageClass.
-      version: 1.2.0
+      version: 1.1.1
      sourceRef:
        kind: HelmRepository
        name: bp-seaweedfs
--- a/clusters/omantel.omani.works/bootstrap-kit/51-bp-k8s-ws-proxy.yaml
+++ b/clusters/omantel.omani.works/bootstrap-kit/51-bp-k8s-ws-proxy.yaml
@ -38,23 +38,7 @@ spec:
      # 0.1.5: 0.1.4 (default imagePullSecrets) + CI auto-bumped
      # image.tag. imagePullSecrets default required for omantel pods
      # to pull from private GHCR.
-      # 0.1.6 (qa-loop bounded-cycle Wave 5 Fix #78, Gap E): adds
-      # pre-install hook-weight -10 Job that auto-generates the
-      # `k8s-ws-proxy-hmac` Secret from /dev/urandom. Without this,
-      # every fresh Sovereign provision left k8s-ws-proxy pods stuck
-      # ContainerCreating forever — the chart referenced a Secret
-      # that nothing ever created. Idempotent on upgrade (preserves
-      # the existing key — rotating it would invalidate every
-      # in-flight catalyst-api signature).
-      # 0.1.9 (qa-loop bounded-cycle Fix #95, regression of Fix #78):
-      # explicit hook-weight ordering for the hmac-bootstrap quartet
-      # (SA=-20, Role+RoleBinding=-15, Job=-10) so the SA lands BEFORE
-      # the Job that references it. Pre-this, prov #8 failed with
-      # `serviceaccount "k8s-ws-proxy-hmac-bootstrap" not found`
-      # because the Job (weight -10, lower=earlier in Helm) was
-      # applied before its SA (weight 0). CI promote auto-bumps from
-      # Chart.yaml 0.1.8 to 0.1.9 with the new image SHA on merge.
-      version: 0.1.9
+      version: 0.1.5
      sourceRef:
        kind: HelmRepository
        name: bp-k8s-ws-proxy
--- a/clusters/omantel.omani.works/bootstrap-kit/kustomization.yaml
+++ b/clusters/omantel.omani.works/bootstrap-kit/kustomization.yaml
@ -51,14 +51,3 @@ resources:
  # via the catalystOverlay block in 01-cilium.yaml (no separate slot).
  - 53-bp-netbird.yaml
  - 54-bp-dmz-vcluster.yaml
-  # qa-loop iter-12 Fix #54 Workstream 1 — bp-hcloud-ccm (slot 55).
-  # The chroot omantel reconciles from clusters/_template/bootstrap-kit/
-  # which carries the actual 55-bp-hcloud-ccm.yaml file. The line below
-  # is intentionally omitted from this per-Sovereign overlay until Phase-2
-  # cutover separates the chroot reconcile from the per-Sovereign one
-  # (per the same pattern as 26-langfuse.yaml — present in this overlay
-  # but NOT in _template, vs slot 55 — present in _template only). When
-  # the per-Sovereign overlay becomes the canonical reconcile target,
-  # copy clusters/_template/bootstrap-kit/55-bp-hcloud-ccm.yaml here AND
-  # uncomment the resource list entry below.
-  # - 55-bp-hcloud-ccm.yaml
--- a/clusters/otech.omani.works/bootstrap-kit/02-cert-manager.yaml
+++ b/clusters/otech.omani.works/bootstrap-kit/02-cert-manager.yaml
@ -38,7 +38,7 @@ spec:
  chart:
    spec:
      chart: bp-cert-manager
-      version: 1.2.0
+      version: 1.1.1
      sourceRef:
        kind: HelmRepository
        name: bp-cert-manager
--- a/clusters/otech.omani.works/bootstrap-kit/18-seaweedfs.yaml
+++ b/clusters/otech.omani.works/bootstrap-kit/18-seaweedfs.yaml
@ -55,9 +55,7 @@ spec:
  chart:
    spec:
      chart: bp-seaweedfs
-      # 1.2.0 — qa-loop Wave 5 Fix #79 Gap B: chart-rendered
-      # `seaweedfs-storage` StorageClass.
-      version: 1.2.0
+      version: 1.1.1
      sourceRef:
        kind: HelmRepository
        name: bp-seaweedfs
--- a/core/controllers/go.mod
+++ b/core/controllers/go.mod
@ -7,8 +7,6 @@ go 1.23

 require (
 	github.com/go-logr/logr v1.4.2
-	github.com/nats-io/nats.go v1.37.0
-	github.com/prometheus/client_golang v1.19.1
 	github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
 	github.com/stretchr/testify v1.9.0
 	k8s.io/api v0.31.1
@ -45,10 +43,12 @@ require (
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
+	github.com/nats-io/nats.go v1.37.0 // indirect
 	github.com/nats-io/nkeys v0.4.7 // indirect
 	github.com/nats-io/nuid v1.0.1 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
+	github.com/prometheus/client_golang v1.19.1 // indirect
 	github.com/prometheus/client_model v0.6.1 // indirect
 	github.com/prometheus/common v0.55.0 // indirect
 	github.com/prometheus/procfs v0.15.1 // indirect
--- a/core/controllers/organization/cmd/main.go
+++ b/core/controllers/organization/cmd/main.go
@ -11,7 +11,6 @@
 package main

 import (
-	"context"
 	"flag"
 	"fmt"
 	"os"
@ -23,13 +22,11 @@ import (
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/healthz"
 	"sigs.k8s.io/controller-runtime/pkg/log/zap"
-	"sigs.k8s.io/controller-runtime/pkg/manager"
 	metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"

+	"github.com/openova-io/openova/core/controllers/pkg/gitea"
 	"github.com/openova-io/openova/core/controllers/organization/internal/controller"
 	orgapi "github.com/openova-io/openova/core/controllers/organization/internal/orgapi"
-	"github.com/openova-io/openova/core/controllers/pkg/gitea"
-	"github.com/openova-io/openova/core/controllers/pkg/natsbus"
 )

 var scheme = runtime.NewScheme()
@ -120,57 +117,6 @@ func main() {
 		os.Exit(1)
 	}

-	// D35 consume-leg — subscribe to the two canonical Catalyst NATS
-	// subjects so a `tenant.created` / `order.placed` envelope nudges
-	// the matching Organization CR into a fresh Reconcile within ~50ms
-	// of the publish. Best-effort wiring: when NATS_URL is unset (e.g.
-	// Catalyst-Zero contabo path where NATS is not deployed) we log
-	// "NATS not wired" and continue — the existing 30s informer
-	// requeue fallback inside r.Reconcile keeps the controller correct.
-	natsURL := strings.TrimSpace(os.Getenv("NATS_URL"))
-	if natsURL != "" {
-		if err := mgr.Add(manager.RunnableFunc(func(ctx context.Context) error {
-			sub, err := natsbus.Connect(natsURL)
-			if err != nil {
-				log.Error(err, "natsbus: connect failed — D35 consume-leg disabled",
-					"nats_url", natsURL)
-				return nil // non-fatal — informer requeue is the canonical fallback
-			}
-			bridge := &controller.NATSBridge{
-				Client: mgr.GetClient(),
-				Log:    log.WithName("natsbridge"),
-			}
-			if err := sub.Subscribe(ctx,
-				natsbus.SubjectTenantCreated,
-				"organization-controller-tenant-created",
-				bridge.HandleTenantCreated,
-				natsbus.SubscribeOptions{},
-			); err != nil {
-				log.Error(err, "natsbus: subscribe tenant.created failed")
-			}
-			if err := sub.Subscribe(ctx,
-				natsbus.SubjectOrderPlaced,
-				"organization-controller-order-placed",
-				bridge.HandleOrderPlaced,
-				natsbus.SubscribeOptions{},
-			); err != nil {
-				log.Error(err, "natsbus: subscribe order.placed failed")
-			}
-			<-ctx.Done()
-			sub.Close()
-			return nil
-		})); err != nil {
-			log.Error(err, "natsbus: add runnable failed")
-			os.Exit(1)
-		}
-		log.Info("natsbus: D35 consume-leg wired",
-			"nats_url", natsURL,
-			"subjects", []string{natsbus.SubjectTenantCreated, natsbus.SubjectOrderPlaced},
-		)
-	} else {
-		log.Info("natsbus: NATS_URL unset — D35 consume-leg disabled (informer-requeue fallback only)")
-	}
-
 	log.Info("starting manager",
 		"host_cluster", hostCluster,
 		"keycloak_addr", kcAddr,
--- a/core/controllers/organization/internal/controller/nats_bridge.go
+++ b/core/controllers/organization/internal/controller/nats_bridge.go
@ -1,202 +0,0 @@
-// nats_bridge wires the canonical Catalyst NATS subjects (D35 consume
-// leg) into the organization-controller's reconcile loop.
-//
-// PR #1626 closed the publish-side of D35 — tenant + billing services
-// now emit `catalyst.tenant.created` + `catalyst.billing.order.placed`
-// on the NATS JetStream `CATALYST_SME` stream per ADR-0001 §6. The
-// consume-side was missing: no in-cluster controller subscribed, so the
-// envelopes accumulated on the broker and the only path that
-// reconciled an Organization CR was the 30s informer requeue plus
-// whatever wrote the CR in the first place. D35 (gate: "NATS broker
-// round-trips end-to-end") therefore stayed yellow even though the
-// publish leg shipped.
-//
-// This bridge subscribes to both subjects and, on each envelope:
-//
-//  1. Decodes the Event body into the tenant_id / slug fields the
-//     publishers stamp (see core/services/tenant/handlers/handlers.go +
-//     core/services/billing/handlers/handlers.go dispatchOrderPlaced).
-//  2. Looks up the Organization CR whose `spec.slug` matches the event's
-//     slug. The CR may not exist yet (e.g. tenant.created arrives
-//     before the operator wrote the CR) — that's a soft miss, we log
-//     and Ack so JetStream advances.
-//  3. Stamps `openova.io/last-event-observed-at` (RFC3339) +
-//     `openova.io/last-event-subject` on the CR via a patch. The
-//     annotation patch is treated as a generation-2 mutation by
-//     controller-runtime, which enqueues a fresh Reconcile within
-//     ~50ms — far faster than the 30s informer requeue fallback. The
-//     30s requeue is RETAINED inside Reconcile so a missed NATS message
-//     never strands a CR; subscription is an accelerator, not the only
-//     path.
-//
-// The bridge is intentionally idempotent — JetStream guarantees
-// at-least-once delivery, so the same envelope may arrive twice on a
-// broker rebalance. Stamping an annotation with the broker-side
-// Event.Timestamp keeps the patch byte-stable on duplicate delivery,
-// so controller-runtime does NOT enqueue a redundant Reconcile.
-//
-// Per HARD CONSTRAINT: no credential write-paths. The bridge reads
-// only the Event envelope + the matching CR; it never touches Secrets
-// or Keycloak service-account creds.
-
-package controller
-
-import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"strings"
-	"time"
-
-	"github.com/go-logr/logr"
-	apierrors "k8s.io/apimachinery/pkg/api/errors"
-	"k8s.io/apimachinery/pkg/types"
-	"sigs.k8s.io/controller-runtime/pkg/client"
-
-	orgapi "github.com/openova-io/openova/core/controllers/organization/internal/orgapi"
-	"github.com/openova-io/openova/core/controllers/pkg/natsbus"
-)
-
-// Annotation keys stamped on the matching Organization CR when a
-// canonical NATS envelope is observed. Stable across pod restarts so
-// duplicate JetStream delivery does NOT trigger a redundant
-// Reconcile (Event.Timestamp is a stable per-event value).
-const (
-	AnnotationLastNATSObservedAt = "openova.io/last-event-observed-at"
-	AnnotationLastNATSSubject    = "openova.io/last-event-subject"
-)
-
-// NATSBridge is the consume-leg adapter for the organization-controller.
-// One bridge instance per canonical subject; the bridge handles all
-// envelopes that match its handler shape.
-type NATSBridge struct {
-	Client client.Client
-	Log    logr.Logger
-}
-
-// HandleTenantCreated reacts to a `catalyst.tenant.created` envelope.
-// The publish-side (PR #1626) ships the tenant doc as the Data payload
-// — we read `slug` (canonical Org slug) and `id` (tenant id, used for
-// the audit log). When the matching Organization CR exists, we stamp
-// the observation annotation so controller-runtime enqueues a fresh
-// Reconcile. When it does not exist, we log + Ack — the operator (or a
-// future provisioning controller) is responsible for creating the CR.
-func (b *NATSBridge) HandleTenantCreated(ctx context.Context, ev *natsbus.Event) error {
-	if ev == nil {
-		return nil
-	}
-	var payload struct {
-		ID    string `json:"id"`
-		Slug  string `json:"slug"`
-		TenID string `json:"tenant_id"`
-	}
-	if err := json.Unmarshal(ev.Data, &payload); err != nil {
-		// Malformed inside the envelope — log and Ack via the
-		// natsbus dispatcher (returning nil acks). Don't Nak; the
-		// next delivery would fail identically.
-		b.Log.Error(err, "tenant.created: malformed Data payload — ack to skip",
-			"event_id", ev.ID)
-		return nil
-	}
-	slug := strings.TrimSpace(payload.Slug)
-	if slug == "" {
-		// PR #1626 stamps `slug` on the tenant doc; if it's missing
-		// the publish side regressed. Log loudly so the operator
-		// notices but Ack so the subscriber doesn't hot-loop.
-		b.Log.Error(fmt.Errorf("missing slug"), "tenant.created: payload has no slug — ack to skip",
-			"event_id", ev.ID, "tenant_id", payload.TenID)
-		return nil
-	}
-	return b.stampObservation(ctx, slug, natsbus.SubjectTenantCreated, ev)
-}
-
-// HandleOrderPlaced reacts to a `catalyst.billing.order.placed`
-// envelope. The publish-side (PR #1626 dispatchOrderPlaced) ships a
-// payload enriched with the tenant's subdomain — we read `subdomain`
-// (matches Org slug on the Sovereign-side wildcard tenancy model) and
-// `tenant_id` for the audit trail.
-func (b *NATSBridge) HandleOrderPlaced(ctx context.Context, ev *natsbus.Event) error {
-	if ev == nil {
-		return nil
-	}
-	var payload struct {
-		TenantID  string `json:"tenant_id"`
-		Subdomain string `json:"subdomain"`
-		OrgSlug   string `json:"org_slug"`
-	}
-	if err := json.Unmarshal(ev.Data, &payload); err != nil {
-		b.Log.Error(err, "order.placed: malformed Data payload — ack to skip",
-			"event_id", ev.ID)
-		return nil
-	}
-	// Prefer the explicit org_slug field when present (forward-compat);
-	// fall back to subdomain which dispatchOrderPlaced currently stamps.
-	slug := strings.TrimSpace(payload.OrgSlug)
-	if slug == "" {
-		slug = strings.TrimSpace(payload.Subdomain)
-	}
-	if slug == "" {
-		b.Log.Error(fmt.Errorf("missing slug"), "order.placed: payload has neither org_slug nor subdomain — ack to skip",
-			"event_id", ev.ID, "tenant_id", payload.TenantID)
-		return nil
-	}
-	return b.stampObservation(ctx, slug, natsbus.SubjectOrderPlaced, ev)
-}
-
-// stampObservation looks up the Organization CR by slug and patches in
-// the two observation annotations. The patch is byte-stable on
-// duplicate delivery (Event.Timestamp is the broker-side timestamp,
-// which is fixed per envelope), so controller-runtime does NOT enqueue
-// a redundant Reconcile.
-//
-// Missing CR is not an error — log + return nil so the natsbus
-// dispatcher Acks. A Nak on a soft miss would hot-loop the subscriber
-// against a permanently-absent CR.
-func (b *NATSBridge) stampObservation(ctx context.Context, slug, subject string, ev *natsbus.Event) error {
-	var org orgapi.Organization
-	// Organization is cluster-scoped (see orgapi/types.go), name == slug.
-	if err := b.Client.Get(ctx, types.NamespacedName{Name: slug}, &org); err != nil {
-		if apierrors.IsNotFound(err) {
-			b.Log.Info("nats observation: no matching Organization CR — ack and skip",
-				"subject", subject, "slug", slug, "event_id", ev.ID)
-			return nil
-		}
-		// Transient API-server error — return so the dispatcher Naks
-		// and JetStream redelivers after backoff.
-		return fmt.Errorf("get organization %s: %w", slug, err)
-	}
-
-	observedAt := ev.Timestamp.UTC().Format(time.RFC3339Nano)
-	if observedAt == "" || ev.Timestamp.IsZero() {
-		observedAt = time.Now().UTC().Format(time.RFC3339Nano)
-	}
-
-	// Skip the patch when the annotations already match — JetStream's
-	// at-least-once delivery means we will see the same envelope on
-	// broker rebalance, and a redundant patch would churn the informer.
-	cur := org.GetAnnotations()
-	if cur != nil &&
-		cur[AnnotationLastNATSObservedAt] == observedAt &&
-		cur[AnnotationLastNATSSubject] == subject {
-		b.Log.V(1).Info("nats observation: duplicate envelope — skip patch",
-			"subject", subject, "slug", slug, "event_id", ev.ID)
-		return nil
-	}
-
-	desired := &orgapi.Organization{}
-	org.DeepCopyInto(desired)
-	anns := desired.GetAnnotations()
-	if anns == nil {
-		anns = map[string]string{}
-	}
-	anns[AnnotationLastNATSObservedAt] = observedAt
-	anns[AnnotationLastNATSSubject] = subject
-	desired.SetAnnotations(anns)
-
-	if err := b.Client.Patch(ctx, desired, client.MergeFrom(&org)); err != nil {
-		return fmt.Errorf("patch organization %s: %w", slug, err)
-	}
-	b.Log.Info("nats observation stamped — reconcile enqueued",
-		"subject", subject, "slug", slug, "event_id", ev.ID, "observed_at", observedAt)
-	return nil
-}
--- a/core/controllers/organization/internal/controller/nats_bridge_test.go
+++ b/core/controllers/organization/internal/controller/nats_bridge_test.go
@ -1,261 +0,0 @@
-// Unit tests for the NATS consume-leg bridge (D35).
-//
-// The handler is wired through a fake controller-runtime client so we
-// can assert:
-//
-//   - tenant.created envelope with a matching CR → annotations stamped.
-//   - order.placed envelope with the legacy subdomain field → CR found
-//     and annotated (back-compat with PR #1626 publish-side).
-//   - envelope with no matching CR → handler returns nil (Ack-to-skip),
-//     no patch attempted (assert via list count).
-//   - duplicate envelope (same timestamp) → no redundant patch.
-//   - malformed Data payload → handler returns nil so dispatcher Acks
-//     instead of hot-looping.
-//
-// The bridge is decoupled from JetStream by construction — the
-// natsbus.Handler signature is `func(ctx, *Event) error`, so these
-// tests exercise the same surface the live subscriber drives.
-
-package controller
-
-import (
-	"context"
-	"encoding/json"
-	"testing"
-	"time"
-
-	"github.com/go-logr/logr/testr"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/apimachinery/pkg/runtime"
-	"k8s.io/apimachinery/pkg/types"
-	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
-	"sigs.k8s.io/controller-runtime/pkg/client/fake"
-
-	orgapi "github.com/openova-io/openova/core/controllers/organization/internal/orgapi"
-	"github.com/openova-io/openova/core/controllers/pkg/natsbus"
-)
-
-func newBridgeFixture(t *testing.T, objs ...runtime.Object) *NATSBridge {
-	t.Helper()
-	scheme := runtime.NewScheme()
-	if err := clientgoscheme.AddToScheme(scheme); err != nil {
-		t.Fatalf("clientgo addtoscheme: %v", err)
-	}
-	if err := orgapi.AddToScheme(scheme); err != nil {
-		t.Fatalf("orgapi addtoscheme: %v", err)
-	}
-	cb := fake.NewClientBuilder().WithScheme(scheme)
-	if len(objs) > 0 {
-		cb = cb.WithRuntimeObjects(objs...)
-	}
-	return &NATSBridge{
-		Client: cb.Build(),
-		Log:    testr.New(t),
-	}
-}
-
-// TestNATSBridge_TenantCreated_HappyPath pins: an envelope on
-// catalyst.tenant.created with a matching Organization CR results in
-// both observation annotations being patched. This is the D35 happy
-// path proof.
-func TestNATSBridge_TenantCreated_HappyPath(t *testing.T) {
-	org := &orgapi.Organization{
-		ObjectMeta: metav1.ObjectMeta{Name: "acme"},
-		Spec:       orgapi.OrganizationSpec{Slug: "acme"},
-	}
-	bridge := newBridgeFixture(t, org)
-
-	ts := time.Date(2026, 5, 18, 12, 34, 56, 789012345, time.UTC)
-	body, _ := json.Marshal(map[string]any{
-		"id":   "tnt-1",
-		"slug": "acme",
-	})
-	ev := &natsbus.Event{
-		ID:        "evt-tc-1",
-		Type:      "tenant.created",
-		Source:    "tenant-service",
-		Timestamp: ts,
-		TenantID:  "tnt-1",
-		Data:      body,
-	}
-	if err := bridge.HandleTenantCreated(context.Background(), ev); err != nil {
-		t.Fatalf("HandleTenantCreated: %v", err)
-	}
-
-	var got orgapi.Organization
-	if err := bridge.Client.Get(context.Background(), types.NamespacedName{Name: "acme"}, &got); err != nil {
-		t.Fatalf("get organization: %v", err)
-	}
-	anns := got.GetAnnotations()
-	if anns[AnnotationLastNATSSubject] != natsbus.SubjectTenantCreated {
-		t.Errorf("subject annotation: got %q want %q",
-			anns[AnnotationLastNATSSubject], natsbus.SubjectTenantCreated)
-	}
-	wantObservedAt := ts.Format(time.RFC3339Nano)
-	if anns[AnnotationLastNATSObservedAt] != wantObservedAt {
-		t.Errorf("observed-at annotation: got %q want %q",
-			anns[AnnotationLastNATSObservedAt], wantObservedAt)
-	}
-}
-
-// TestNATSBridge_OrderPlaced_BackCompatSubdomain pins the back-compat
-// path: when org_slug is absent, the bridge falls back to the
-// `subdomain` field PR #1626's dispatchOrderPlaced enriches in.
-// Bodyguard against the publish-side renaming the field without the
-// consume-side noticing.
-func TestNATSBridge_OrderPlaced_BackCompatSubdomain(t *testing.T) {
-	org := &orgapi.Organization{
-		ObjectMeta: metav1.ObjectMeta{Name: "globex"},
-		Spec:       orgapi.OrganizationSpec{Slug: "globex"},
-	}
-	bridge := newBridgeFixture(t, org)
-
-	body, _ := json.Marshal(map[string]any{
-		"tenant_id": "tnt-2",
-		"subdomain": "globex",
-	})
-	ev := &natsbus.Event{
-		ID:        "evt-op-1",
-		Type:      "order.placed",
-		Source:    "billing-service",
-		Timestamp: time.Date(2026, 5, 18, 13, 0, 0, 0, time.UTC),
-		TenantID:  "tnt-2",
-		Data:      body,
-	}
-	if err := bridge.HandleOrderPlaced(context.Background(), ev); err != nil {
-		t.Fatalf("HandleOrderPlaced: %v", err)
-	}
-
-	var got orgapi.Organization
-	if err := bridge.Client.Get(context.Background(), types.NamespacedName{Name: "globex"}, &got); err != nil {
-		t.Fatalf("get organization: %v", err)
-	}
-	if got.GetAnnotations()[AnnotationLastNATSSubject] != natsbus.SubjectOrderPlaced {
-		t.Errorf("subject annotation: got %q want %q",
-			got.GetAnnotations()[AnnotationLastNATSSubject], natsbus.SubjectOrderPlaced)
-	}
-}
-
-// TestNATSBridge_NoMatchingCR pins: an envelope referencing a slug
-// that doesn't exist returns nil (Ack-to-skip) and does NOT churn the
-// API server. Critical for cold-start ordering — tenant.created may
-// arrive before the operator's Organization CR write.
-func TestNATSBridge_NoMatchingCR(t *testing.T) {
-	bridge := newBridgeFixture(t) // empty fake client
-
-	body, _ := json.Marshal(map[string]any{"slug": "nonexistent"})
-	ev := &natsbus.Event{
-		ID:        "evt-miss",
-		Type:      "tenant.created",
-		Timestamp: time.Now().UTC(),
-		Data:      body,
-	}
-	if err := bridge.HandleTenantCreated(context.Background(), ev); err != nil {
-		t.Fatalf("HandleTenantCreated on missing CR returned error (should soft-miss): %v", err)
-	}
-}
-
-// TestNATSBridge_DuplicateEnvelope_NoChurn pins: replaying the same
-// envelope (same Timestamp) does not mutate the CR a second time. The
-// gen-2 controller-runtime informer enqueues on annotation drift; a
-// byte-stable patch keeps the reconcile queue clean.
-func TestNATSBridge_DuplicateEnvelope_NoChurn(t *testing.T) {
-	org := &orgapi.Organization{
-		ObjectMeta: metav1.ObjectMeta{Name: "dup", ResourceVersion: "1"},
-		Spec:       orgapi.OrganizationSpec{Slug: "dup"},
-	}
-	bridge := newBridgeFixture(t, org)
-	ts := time.Date(2026, 5, 18, 14, 0, 0, 0, time.UTC)
-	body, _ := json.Marshal(map[string]any{"slug": "dup"})
-	ev := &natsbus.Event{
-		ID:        "evt-dup",
-		Type:      "tenant.created",
-		Timestamp: ts,
-		Data:      body,
-	}
-
-	// First delivery — patches.
-	if err := bridge.HandleTenantCreated(context.Background(), ev); err != nil {
-		t.Fatalf("first delivery: %v", err)
-	}
-	var afterFirst orgapi.Organization
-	if err := bridge.Client.Get(context.Background(), types.NamespacedName{Name: "dup"}, &afterFirst); err != nil {
-		t.Fatalf("get after first: %v", err)
-	}
-	rvAfterFirst := afterFirst.GetResourceVersion()
-
-	// Second delivery (same envelope, same timestamp) — skip path.
-	if err := bridge.HandleTenantCreated(context.Background(), ev); err != nil {
-		t.Fatalf("second delivery: %v", err)
-	}
-	var afterSecond orgapi.Organization
-	if err := bridge.Client.Get(context.Background(), types.NamespacedName{Name: "dup"}, &afterSecond); err != nil {
-		t.Fatalf("get after second: %v", err)
-	}
-	if afterSecond.GetResourceVersion() != rvAfterFirst {
-		t.Errorf("duplicate envelope mutated CR — rv went %q → %q",
-			rvAfterFirst, afterSecond.GetResourceVersion())
-	}
-}
-
-// TestNATSBridge_MalformedData pins: a Data blob that fails
-// json.Unmarshal returns nil so the natsbus dispatcher Acks-to-skip.
-// A Nak would hot-loop the consumer against a poison pill.
-func TestNATSBridge_MalformedData(t *testing.T) {
-	bridge := newBridgeFixture(t)
-	ev := &natsbus.Event{
-		ID:        "evt-bad",
-		Type:      "tenant.created",
-		Timestamp: time.Now().UTC(),
-		Data:      []byte("{not-json"),
-	}
-	if err := bridge.HandleTenantCreated(context.Background(), ev); err != nil {
-		t.Errorf("malformed Data should NOT return error (would Nak + hot-loop), got: %v", err)
-	}
-}
-
-// TestNATSBridge_OrderPlaced_PreferOrgSlug pins: when both org_slug and
-// subdomain are present, org_slug wins. Forward-compat with the
-// publish-side normalizing to the explicit field.
-func TestNATSBridge_OrderPlaced_PreferOrgSlug(t *testing.T) {
-	org := &orgapi.Organization{
-		ObjectMeta: metav1.ObjectMeta{Name: "winner"},
-		Spec:       orgapi.OrganizationSpec{Slug: "winner"},
-	}
-	loser := &orgapi.Organization{
-		ObjectMeta: metav1.ObjectMeta{Name: "loser"},
-		Spec:       orgapi.OrganizationSpec{Slug: "loser"},
-	}
-	bridge := newBridgeFixture(t, org, loser)
-
-	body, _ := json.Marshal(map[string]any{
-		"tenant_id": "tnt-99",
-		"org_slug":  "winner",
-		"subdomain": "loser",
-	})
-	ev := &natsbus.Event{
-		ID:        "evt-pref",
-		Type:      "order.placed",
-		Timestamp: time.Now().UTC(),
-		Data:      body,
-	}
-	if err := bridge.HandleOrderPlaced(context.Background(), ev); err != nil {
-		t.Fatalf("HandleOrderPlaced: %v", err)
-	}
-
-	var winner orgapi.Organization
-	if err := bridge.Client.Get(context.Background(), types.NamespacedName{Name: "winner"}, &winner); err != nil {
-		t.Fatalf("get winner: %v", err)
-	}
-	if _, ok := winner.GetAnnotations()[AnnotationLastNATSSubject]; !ok {
-		t.Error("winner Organization was not annotated despite org_slug match")
-	}
-
-	var loserGot orgapi.Organization
-	if err := bridge.Client.Get(context.Background(), types.NamespacedName{Name: "loser"}, &loserGot); err != nil {
-		t.Fatalf("get loser: %v", err)
-	}
-	if _, ok := loserGot.GetAnnotations()[AnnotationLastNATSSubject]; ok {
-		t.Error("loser Organization was unexpectedly annotated; org_slug should outrank subdomain")
-	}
-}
--- a/core/controllers/organization/internal/controller/organization_controller.go
+++ b/core/controllers/organization/internal/controller/organization_controller.go
@ -38,9 +38,9 @@ import (
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"

+	"github.com/openova-io/openova/core/controllers/pkg/gitea"
 	"github.com/openova-io/openova/core/controllers/organization/internal/gitops"
 	orgapi "github.com/openova-io/openova/core/controllers/organization/internal/orgapi"
-	"github.com/openova-io/openova/core/controllers/pkg/gitea"
 )

 // userAccessGVR is the namespace-scoped UserAccess CR group/version/kind
@ -243,19 +243,6 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
 		return r.fail(ctx, &org, idpCond.Reason, fedErr.Error())
 	}

-	// 5b. Per-tenant public-hostname HTTPRoute (issue #1629 follow-up).
-	// When `spec.tenantPublic.parentDomain` is set, render a Gateway-API
-	// HTTPRoute attaching `<subdomain>.<parentDomain>` to the supplied
-	// backend Service on the canonical cilium-gateway. No-op when the
-	// field is empty — Orgs that don't yet have a public hostname keep
-	// working via the Sovereign-wide `*.<sovFQDN>` tenant-wildcard
-	// route. Failure is non-fatal for the Org's other reconciliation
-	// outputs (Keycloak group + Gitea Org + vCluster manifests already
-	// landed) so we requeue instead of marking the whole Org Failed.
-	if _, err := r.reconcileTenantRoute(ctx, &org); err != nil {
-		return r.fail(ctx, &org, "TenantRouteFailed", err.Error())
-	}
-
 	// 6. Status update — Ready=True plus the per-step federation
 	// conditions (always present so the access-matrix UI can render
 	// the federation column without conditional logic).
--- a/core/controllers/organization/internal/controller/organization_controller_test.go
+++ b/core/controllers/organization/internal/controller/organization_controller_test.go
@ -36,8 +36,8 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"

-	orgapi "github.com/openova-io/openova/core/controllers/organization/internal/orgapi"
 	"github.com/openova-io/openova/core/controllers/pkg/gitea"
+	orgapi "github.com/openova-io/openova/core/controllers/organization/internal/orgapi"

 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
@ -53,10 +53,10 @@ type fakeKeycloak struct {
 	groupPath string

 	// Federation surface (F2).
-	idps              map[string]KCIdentityProvider
-	mappers           map[string][]KCIdentityProviderMapper // key = alias
-	idpEnsureCalls    int
-	idpDeleteCalls    int
+	idps           map[string]KCIdentityProvider
+	mappers        map[string][]KCIdentityProviderMapper // key = alias
+	idpEnsureCalls int
+	idpDeleteCalls int
 	mapperEnsureCalls int
 }

@ -661,10 +661,10 @@ func TestReconcile_Missing_NoError(t *testing.T) {
 // no Pod was ever scheduled.
 //
 // This test asserts:
-//  1. Upsert writes the UserAccess CR into the configured
-//     r.UserAccessNamespace (default `catalyst-system`).
-//  2. The CR carries metadata.namespace == that namespace (NOT empty).
-//  3. The owner-per-CR mapping holds (1 owner = 1 CR).
+//   1. Upsert writes the UserAccess CR into the configured
+//      r.UserAccessNamespace (default `catalyst-system`).
+//   2. The CR carries metadata.namespace == that namespace (NOT empty).
+//   3. The owner-per-CR mapping holds (1 owner = 1 CR).
 func TestUpsertUserAccess_NamespaceScoped(t *testing.T) {
 	t.Parallel()
 	org := sampleOrg()
@ -752,114 +752,3 @@ func TestUpsertUserAccess_DefaultsToCatalystSystem(t *testing.T) {
 	}
 }

-// TestReconcile_TenantPublic_RendersHTTPRoute covers the issue #1629
-// follow-up: when spec.tenantPublic.parentDomain is set, the reconciler
-// MUST render an HTTPRoute in the Org's namespace pointing at the
-// supplied backend Service. Without this, PowerDNS-resolved tenant
-// hostnames (e.g. `acme.omani.homes`) fall through to the marketplace
-// `tenant-wildcard` route and 404 instead of hitting the tenant's
-// installed WordPress.
-func TestReconcile_TenantPublic_RendersHTTPRoute(t *testing.T) {
-	t.Parallel()
-	org := sampleOrg()
-	org.Spec.TenantPublic = orgapi.OrganizationTenantPublic{
-		ParentDomain:   "omani.homes",
-		BackendService: "wordpress-x-acme-x-vcluster",
-		BackendPort:    80,
-		Product:        "wordpress",
-	}
-
-	// Register HTTPRoute (Gateway API) with the fake client's scheme so
-	// it can serialise the unstructured object the reconciler writes.
-	r, _, _ := makeReconciler(t, org)
-	scheme := r.Scheme()
-	scheme.AddKnownTypeWithName(schema.GroupVersionKind{
-		Group: "gateway.networking.k8s.io", Version: "v1", Kind: "HTTPRoute",
-	}, &unstructured.Unstructured{})
-	scheme.AddKnownTypeWithName(schema.GroupVersionKind{
-		Group: "gateway.networking.k8s.io", Version: "v1", Kind: "HTTPRouteList",
-	}, &unstructured.UnstructuredList{})
-
-	if _, err := r.Reconcile(context.Background(), ctrl.Request{
-		NamespacedName: types.NamespacedName{Name: "acme"},
-	}); err != nil {
-		t.Fatalf("reconcile: %v", err)
-	}
-
-	hr := unstructured.Unstructured{}
-	hr.SetGroupVersionKind(schema.GroupVersionKind{
-		Group: "gateway.networking.k8s.io", Version: "v1", Kind: "HTTPRoute",
-	})
-	if err := r.Get(context.Background(), client.ObjectKey{Namespace: "acme", Name: "acme"}, &hr); err != nil {
-		t.Fatalf("get HTTPRoute acme/acme: %v", err)
-	}
-	hostnames, _, _ := unstructured.NestedSlice(hr.Object, "spec", "hostnames")
-	if len(hostnames) != 1 || hostnames[0] != "acme.omani.homes" {
-		t.Errorf("hostnames: got %v, want [acme.omani.homes]", hostnames)
-	}
-	parents, _, _ := unstructured.NestedSlice(hr.Object, "spec", "parentRefs")
-	if len(parents) != 1 {
-		t.Fatalf("parentRefs: got %d, want 1", len(parents))
-	}
-	pr := parents[0].(map[string]any)
-	if pr["name"] != "cilium-gateway" || pr["namespace"] != "kube-system" {
-		t.Errorf("parentRef: got %+v, want cilium-gateway/kube-system", pr)
-	}
-	rules, _, _ := unstructured.NestedSlice(hr.Object, "spec", "rules")
-	if len(rules) != 1 {
-		t.Fatalf("rules: got %d, want 1", len(rules))
-	}
-	brs, _, _ := unstructured.NestedSlice(rules[0].(map[string]any), "backendRefs")
-	if len(brs) != 1 {
-		t.Fatalf("backendRefs: got %d, want 1", len(brs))
-	}
-	br := brs[0].(map[string]any)
-	if br["name"] != "wordpress-x-acme-x-vcluster" {
-		t.Errorf("backendRef name: got %v, want wordpress-x-acme-x-vcluster", br["name"])
-	}
-	labels := hr.GetLabels()
-	if labels["catalyst.openova.io/tenant-product"] != "wordpress" {
-		t.Errorf("expected tenant-product=wordpress label, got %q",
-			labels["catalyst.openova.io/tenant-product"])
-	}
-	if labels["catalyst.openova.io/parent-zone"] != "omani.homes" {
-		t.Errorf("expected parent-zone=omani.homes label, got %q",
-			labels["catalyst.openova.io/parent-zone"])
-	}
-}
-
-// TestReconcile_TenantPublic_DisabledByDefault covers the no-op path:
-// when spec.tenantPublic.parentDomain is empty (the default for every
-// existing Org CR), NO HTTPRoute MUST be written. Without this guard
-// every legacy Org would suddenly try to render an HTTPRoute and the
-// reconciler would surface TenantRouteFailed because BackendService is
-// empty.
-func TestReconcile_TenantPublic_DisabledByDefault(t *testing.T) {
-	t.Parallel()
-	org := sampleOrg() // no TenantPublic set
-	r, _, _ := makeReconciler(t, org)
-	scheme := r.Scheme()
-	scheme.AddKnownTypeWithName(schema.GroupVersionKind{
-		Group: "gateway.networking.k8s.io", Version: "v1", Kind: "HTTPRoute",
-	}, &unstructured.Unstructured{})
-	scheme.AddKnownTypeWithName(schema.GroupVersionKind{
-		Group: "gateway.networking.k8s.io", Version: "v1", Kind: "HTTPRouteList",
-	}, &unstructured.UnstructuredList{})
-
-	if _, err := r.Reconcile(context.Background(), ctrl.Request{
-		NamespacedName: types.NamespacedName{Name: "acme"},
-	}); err != nil {
-		t.Fatalf("reconcile: %v", err)
-	}
-
-	hrList := unstructured.UnstructuredList{}
-	hrList.SetGroupVersionKind(schema.GroupVersionKind{
-		Group: "gateway.networking.k8s.io", Version: "v1", Kind: "HTTPRouteList",
-	})
-	if err := r.List(context.Background(), &hrList); err != nil {
-		t.Fatalf("list HTTPRoute: %v", err)
-	}
-	if len(hrList.Items) != 0 {
-		t.Errorf("expected 0 HTTPRoutes when tenantPublic is unset, got %d", len(hrList.Items))
-	}
-}
--- a/core/controllers/organization/internal/controller/tenant_route.go
+++ b/core/controllers/organization/internal/controller/tenant_route.go
@ -1,190 +0,0 @@
-// tenant_route.go — per-Organization HTTPRoute reconciler.
-//
-// Issue #1629 follow-up. PowerDNS now resolves `<slug>.<parentDomain>`
-// (e.g. `acme.omani.homes`) for every Org whose Sovereign has a
-// parent_domains entry with role=sme-pool, but no HTTPRoute attaches
-// that hostname to the Org's installed product Service. Result: the
-// Cilium Gateway happily terminates TLS on the wildcard cert, then
-// returns the storefront landing page (the only HTTPRoute attached
-// to `*.<sovFQDN>` is the `tenant-wildcard` route → marketplace
-// console Service) instead of the tenant's WordPress / Nextcloud /
-// GitLab install.
-//
-// The fix is reconciler-side: when `spec.tenantPublic.parentDomain`
-// is set on an Organization, the controller renders a per-tenant
-// HTTPRoute in the Org's namespace (= spec.slug) pointing at the
-// supplied BackendService. The route attaches to the canonical
-// `cilium-gateway/kube-system` parent — the same parent the
-// marketplace, back-office, and tenant-wildcard routes already attach
-// to — and surfaces `<subdomain>.<parentDomain>` as its hostname so
-// the Cilium Gateway hostname matcher picks the per-tenant route
-// over the wildcard for any request matching the exact host.
-//
-// Design notes:
-//
-//   - HTTPRoute is created/updated via the controller-runtime client
-//     with an Unstructured object (same pattern continuum/switchover
-//     uses for HTTPRoute weight drains). This avoids pulling in the
-//     gateway-api Go types for a single resource.
-//   - BackendService is treated as a Service in the Org's own
-//     namespace — no ReferenceGrant required. Operators that point
-//     at a cross-namespace Service (rare) can ship the
-//     ReferenceGrant alongside the Org.
-//   - The HTTPRoute name is the Org slug (deterministic, idempotent).
-//     OwnerReferences are intentionally NOT set: Organizations are
-//     cluster-scoped while the HTTPRoute is namespaced, and K8s rejects
-//     namespaced→cluster OwnerReferences. Deletion is handled by the
-//     Org's namespace teardown (when the Org's vCluster ns is
-//     removed, every HTTPRoute under it goes with it).
-//   - Skipped silently when ParentDomain is empty (the zero-value
-//     case for Orgs that don't yet have a public hostname).
-//
-// Per docs/INVIOLABLE-PRINCIPLES.md #4 every operationally-meaningful
-// value flows through the CR — no hardcoded gateway name, parent
-// namespace, or port number in the renderer.
-
-package controller
-
-import (
-	"context"
-	"fmt"
-	"strings"
-
-	apierrors "k8s.io/apimachinery/pkg/api/errors"
-	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
-	"k8s.io/apimachinery/pkg/runtime/schema"
-	"sigs.k8s.io/controller-runtime/pkg/client"
-
-	orgapi "github.com/openova-io/openova/core/controllers/organization/internal/orgapi"
-)
-
-// httpRouteGVK identifies the Gateway API HTTPRoute v1 resource the
-// reconciler writes. Matches the GVK referenced by the existing
-// marketplace-routes.yaml, httproute.yaml, and continuum/switchover
-// drainers — every Cilium Gateway-API path on a Sovereign goes through
-// gateway.networking.k8s.io/v1.HTTPRoute.
-var httpRouteGVK = schema.GroupVersionKind{
-	Group:   "gateway.networking.k8s.io",
-	Version: "v1",
-	Kind:    "HTTPRoute",
-}
-
-// tenantRouteParentDefaults are the defaults the reconciler applies
-// when the Organization spec doesn't override them. They match the
-// canonical Cilium Gateway placement on every Sovereign
-// (clusters/_template/sovereign-tls/cilium-gateway.yaml installs the
-// Gateway as `cilium-gateway` in `kube-system`).
-const (
-	tenantRouteDefaultGatewayName      = "cilium-gateway"
-	tenantRouteDefaultGatewayNamespace = "kube-system"
-	tenantRouteDefaultBackendPort      = int32(80)
-)
-
-// reconcileTenantRoute creates or updates the per-Organization
-// HTTPRoute when `spec.tenantPublic.parentDomain` is set. Returns
-// (rendered=true, nil) when the route was written, (false, nil) when
-// the feature is disabled (empty parentDomain), or (false, err) on a
-// transient write failure (the parent reconciler requeues).
-func (r *Reconciler) reconcileTenantRoute(ctx context.Context, org *orgapi.Organization) (bool, error) {
-	tp := org.Spec.TenantPublic
-	parentDomain := strings.TrimSpace(tp.ParentDomain)
-	if parentDomain == "" {
-		// Feature disabled — Orgs that don't yet have a public
-		// hostname are accessed via the Sovereign-wide
-		// `*.<sovFQDN>` tenant-wildcard route. No-op + no condition
-		// surfacing (matches the existing reconciler's quiet-mode
-		// for unset optional fields).
-		return false, nil
-	}
-
-	subdomain := strings.TrimSpace(tp.Subdomain)
-	if subdomain == "" {
-		subdomain = org.Spec.Slug
-	}
-	backend := strings.TrimSpace(tp.BackendService)
-	if backend == "" {
-		return false, fmt.Errorf("tenantPublic.backendService is required when parentDomain is set")
-	}
-	port := tp.BackendPort
-	if port == 0 {
-		port = tenantRouteDefaultBackendPort
-	}
-
-	hostname := fmt.Sprintf("%s.%s", subdomain, parentDomain)
-	ns := org.Spec.Slug
-	name := org.Spec.Slug
-
-	labels := map[string]string{
-		"openova.io/organization":         org.Spec.Slug,
-		"openova.io/sovereign":            org.Spec.SovereignRef,
-		"openova.io/managed-by":           "organization-controller",
-		"app.kubernetes.io/managed-by":    "catalyst",
-		"catalyst.openova.io/component":   "tenant-public-route",
-		"catalyst.openova.io/parent-zone": parentDomain,
-	}
-	if p := strings.TrimSpace(tp.Product); p != "" {
-		labels["catalyst.openova.io/tenant-product"] = p
-	}
-
-	desiredSpec := map[string]any{
-		"parentRefs": []any{
-			map[string]any{
-				"name":      tenantRouteDefaultGatewayName,
-				"namespace": tenantRouteDefaultGatewayNamespace,
-			},
-		},
-		"hostnames": []any{hostname},
-		"rules": []any{
-			map[string]any{
-				"matches": []any{
-					map[string]any{
-						"path": map[string]any{
-							"type":  "PathPrefix",
-							"value": "/",
-						},
-					},
-				},
-				"backendRefs": []any{
-					map[string]any{
-						"name": backend,
-						"port": int64(port),
-					},
-				},
-			},
-		},
-	}
-
-	desired := unstructured.Unstructured{}
-	desired.SetGroupVersionKind(httpRouteGVK)
-	desired.SetName(name)
-	desired.SetNamespace(ns)
-	desired.SetLabels(labels)
-	desired.Object["spec"] = desiredSpec
-
-	current := unstructured.Unstructured{}
-	current.SetGroupVersionKind(httpRouteGVK)
-	err := r.Get(ctx, client.ObjectKey{Namespace: ns, Name: name}, &current)
-	if err != nil {
-		if !apierrors.IsNotFound(err) {
-			return false, fmt.Errorf("get HTTPRoute %s/%s: %w", ns, name, err)
-		}
-		if err := r.Create(ctx, &desired); err != nil {
-			if apierrors.IsAlreadyExists(err) {
-				// Race: another reconcile created it between Get
-				// and Create. Re-Get + Update on next pass.
-				return true, nil
-			}
-			return false, fmt.Errorf("create HTTPRoute %s/%s: %w", ns, name, err)
-		}
-		return true, nil
-	}
-
-	// Update: copy desired spec + labels onto current (preserves
-	// resourceVersion + any operator-added annotations).
-	current.Object["spec"] = desiredSpec
-	current.SetLabels(labels)
-	if err := r.Update(ctx, &current); err != nil {
-		return false, fmt.Errorf("update HTTPRoute %s/%s: %w", ns, name, err)
-	}
-	return true, nil
-}
--- a/core/controllers/organization/internal/orgapi/types.go
+++ b/core/controllers/organization/internal/orgapi/types.go
@ -84,66 +84,6 @@ type OrganizationSpec struct {
 	// Identity holds optional federation config — empty means use the
 	// Sovereign's own Keycloak realm.
 	Identity OrganizationIdentity `json:"identity,omitempty"`
-
-	// TenantPublic optionally exposes the Org's installed product on a
-	// per-tenant public hostname. When set the organization-controller
-	// renders a Gateway-API HTTPRoute in the Org's namespace pointing at
-	// the supplied backend Service. When empty (the zero value) the
-	// controller skips the HTTPRoute step — Orgs that don't yet have a
-	// product installed (or that are accessed only via the per-Sovereign
-	// console wildcard `*.<sovFQDN>`) keep working unchanged.
-	//
-	// The motivating use case (issue #1629 follow-up) is the
-	// `<slug>.omani.homes` family of tenant hostnames: PowerDNS now
-	// resolves them via the sme-pool parent zone reconciler, but no
-	// HTTPRoute was attaching them to the tenant's WordPress install.
-	// Without this struct that traffic 404s at the Cilium Gateway.
-	TenantPublic OrganizationTenantPublic `json:"tenantPublic,omitempty"`
-}
-
-// OrganizationTenantPublic is the per-tenant public-hostname binding
-// the organization-controller renders into an HTTPRoute on Ready Orgs.
-//
-// All fields are optional at the CRD level — the controller treats an
-// empty ParentDomain as "do not render". Defaulting rules:
-//
-//   - Subdomain defaults to spec.slug.
-//   - BackendPort defaults to 80 (the conventional HTTP port WordPress,
-//     Nextcloud, GitLab, BookStack, and Ghost all listen on inside the
-//     vCluster).
-//
-// Per docs/INVIOLABLE-PRINCIPLES.md #4 no value is hardcoded inside the
-// renderer — every knob flows through the CR.
-type OrganizationTenantPublic struct {
-	// ParentDomain is the apex zone the per-tenant hostname lives
-	// under (e.g. "omani.homes"). Sovereign-wide parentZones lists the
-	// pool of valid candidates; this field picks one specific apex per
-	// Organization. Required to render the HTTPRoute — empty disables
-	// the whole TenantPublic feature for this Org.
-	ParentDomain string `json:"parentDomain,omitempty"`
-
-	// Subdomain is the leftmost label of the per-tenant hostname.
-	// Defaults to spec.slug when empty so the canonical
-	// `<slug>.<parentDomain>` hostname renders without extra config.
-	Subdomain string `json:"subdomain,omitempty"`
-
-	// BackendService is the Service name the HTTPRoute routes "/" to —
-	// e.g. `wordpress` for an in-cluster WordPress install, or the
-	// vCluster-synced `wordpress-x-<slug>-x-vcluster` name when the
-	// product lives inside a vCluster. The Service MUST resolve in the
-	// Org's host namespace (= spec.slug) so the HTTPRoute backendRefs
-	// don't need cross-namespace ReferenceGrants.
-	BackendService string `json:"backendService,omitempty"`
-
-	// BackendPort is the Service port number to route to. Defaults to
-	// 80 when zero.
-	BackendPort int32 `json:"backendPort,omitempty"`
-
-	// Product is an operator-meaningful tag carried on the rendered
-	// HTTPRoute's labels (e.g. "wordpress", "nextcloud", "gitlab").
-	// Surfaced on the access-matrix UI so operators can filter routes
-	// by installed product. Optional — empty just omits the label.
-	Product string `json:"product,omitempty"`
 }

 // OrganizationOwner is an entry in spec.owners.
--- a/core/controllers/pkg/gitea/issues.go
+++ b/core/controllers/pkg/gitea/issues.go
@ -1,223 +0,0 @@
-// Issue CRUD on top of the canonical pkg/gitea Client.
-//
-// pulls.go covers the PR read surface (Wave 8) + merge (Wave 11). This
-// file covers the Issue read+write surface needed by Wave 11's
-// openova-sandbox-mcp tools (`gitea.issue.list / get / create /
-// comment`). Same client envelope; same error mapping (ErrRepoNotFound
-// on 404; *HTTPError otherwise).
-//
-// New endpoints (Gitea Admin REST API):
-//
-//	GET    /api/v1/repos/{owner}/{repo}/issues?state=...&page=...&limit=50
-//	GET    /api/v1/repos/{owner}/{repo}/issues/{index}
-//	POST   /api/v1/repos/{owner}/{repo}/issues
-//	POST   /api/v1/repos/{owner}/{repo}/issues/{index}/comments
-//
-// Gitea conflates issues + PRs on the same /issues collection (PRs have
-// `pull_request != nil`); the MCP `gitea.pr.*` family uses the dedicated
-// /pulls endpoint, so callers wanting true issues only should either
-// pass `type=issues` (Gitea ≥1.20) or filter on Issue.IsPullRequest().
-package gitea
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"net/http"
-	"net/url"
-	"time"
-)
-
-// IssueState constrains the `state` query param on ListIssues. Gitea
-// accepts "open" | "closed" | "all"; empty defaults server-side to
-// "open" but we make the wire shape explicit.
-type IssueState string
-
-const (
-	// IssueStateOpen lists only open issues (Gitea default).
-	IssueStateOpen IssueState = "open"
-	// IssueStateClosed lists only closed issues.
-	IssueStateClosed IssueState = "closed"
-	// IssueStateAll lists every issue regardless of state.
-	IssueStateAll IssueState = "all"
-)
-
-// Issue is the slice of Gitea Issue fields the MCP tools surface.
-//
-// Gitea's `pull_request` field is non-nil when the row is actually a PR
-// — callers wanting true issues should `.IsPullRequest()` filter, or
-// pass `type=issues` to ListIssues for server-side scoping.
-type Issue struct {
-	ID          int64      `json:"id,omitempty"`
-	Number      int64      `json:"number,omitempty"`
-	URL         string     `json:"html_url,omitempty"`
-	State       string     `json:"state,omitempty"`
-	Title       string     `json:"title,omitempty"`
-	Body        string     `json:"body,omitempty"`
-	CreatedAt   *time.Time `json:"created_at,omitempty"`
-	UpdatedAt   *time.Time `json:"updated_at,omitempty"`
-	ClosedAt    *time.Time `json:"closed_at,omitempty"`
-	PullRequest *struct {
-		// Only set when the row is a PR. Non-nil → IsPullRequest()==true.
-		Merged bool `json:"merged,omitempty"`
-	} `json:"pull_request,omitempty"`
-}
-
-// IsPullRequest reports whether the row is actually a Pull Request on
-// Gitea's shared /issues collection. Callers wanting true issues only
-// should `if !i.IsPullRequest()` filter.
-func (i Issue) IsPullRequest() bool { return i.PullRequest != nil }
-
-// IssueComment is the slice of Gitea Issue-comment fields the MCP
-// `gitea.issue.comment` tool surfaces back to the agent.
-type IssueComment struct {
-	ID        int64      `json:"id,omitempty"`
-	URL       string     `json:"html_url,omitempty"`
-	Body      string     `json:"body,omitempty"`
-	CreatedAt *time.Time `json:"created_at,omitempty"`
-	UpdatedAt *time.Time `json:"updated_at,omitempty"`
-}
-
-// ListIssuesOpts threads optional filters through ListIssues without
-// growing the positional signature.
-type ListIssuesOpts struct {
-	// State filters by open/closed/all. Empty → server default ("open").
-	State IssueState
-	// Type filters by "issues" | "pulls" | "" (both). Empty → both, with
-	// PRs distinguishable via Issue.IsPullRequest().
-	Type string
-}
-
-// issueCreate is the body of POST /repos/{owner}/{repo}/issues.
-type issueCreate struct {
-	Title string `json:"title"`
-	Body  string `json:"body,omitempty"`
-}
-
-// issueCommentCreate is the body of POST /issues/{index}/comments.
-type issueCommentCreate struct {
-	Body string `json:"body"`
-}
-
-// ListIssues returns every issue on the repo matching opts, walking
-// Gitea's pagination (page=1..N, limit=50). Result order matches what
-// Gitea returns (typically newest-first by creation).
-//
-// Returns ErrRepoNotFound on a first-page 404. Subsequent pagination
-// failures bubble up as *HTTPError.
-//
-// Added Wave 11 for openova-sandbox-mcp `gitea.issue.list`.
-func (c *Client) ListIssues(ctx context.Context, org, repo string, opts ListIssuesOpts) ([]Issue, error) {
-	if org == "" || repo == "" {
-		return nil, errors.New("gitea: ListIssues requires non-empty org, repo")
-	}
-	const pageSize = 50
-	out := make([]Issue, 0, pageSize)
-	for page := 1; ; page++ {
-		q := url.Values{}
-		q.Set("limit", fmt.Sprintf("%d", pageSize))
-		q.Set("page", fmt.Sprintf("%d", page))
-		if opts.State != "" {
-			q.Set("state", string(opts.State))
-		}
-		if opts.Type != "" {
-			q.Set("type", opts.Type)
-		}
-		endpoint := fmt.Sprintf("/repos/%s/%s/issues?%s",
-			url.PathEscape(org), url.PathEscape(repo), q.Encode())
-		var batch []Issue
-		status, _, err := c.do(ctx, http.MethodGet, endpoint, nil, &batch)
-		if err != nil {
-			if page == 1 && status == http.StatusNotFound {
-				return nil, ErrRepoNotFound
-			}
-			return nil, err
-		}
-		out = append(out, batch...)
-		if len(batch) < pageSize {
-			break
-		}
-	}
-	return out, nil
-}
-
-// GetIssue fetches a single Issue by number. Returns ErrRepoNotFound on
-// any 404 (Gitea doesn't distinguish "repo gone" from "issue number gone"
-// cleanly on this endpoint), or *HTTPError otherwise. Callers can
-// `IsNotFound(err)` to fold the 404 case.
-//
-// Added Wave 11 for openova-sandbox-mcp `gitea.issue.get`.
-func (c *Client) GetIssue(ctx context.Context, org, repo string, number int64) (Issue, error) {
-	if org == "" || repo == "" {
-		return Issue{}, errors.New("gitea: GetIssue requires non-empty org, repo")
-	}
-	if number <= 0 {
-		return Issue{}, errors.New("gitea: GetIssue requires positive issue number")
-	}
-	endpoint := fmt.Sprintf("/repos/%s/%s/issues/%d",
-		url.PathEscape(org), url.PathEscape(repo), number)
-	var out Issue
-	status, _, err := c.do(ctx, http.MethodGet, endpoint, nil, &out)
-	if err != nil {
-		if status == http.StatusNotFound {
-			return Issue{}, ErrRepoNotFound
-		}
-		return Issue{}, err
-	}
-	return out, nil
-}
-
-// CreateIssue opens a new Issue on (org, repo). Returns ErrRepoNotFound
-// on 404 (repo doesn't exist); other non-2xx surface as *HTTPError.
-// Idempotency: Gitea does NOT de-duplicate issues by title — calling
-// CreateIssue twice opens TWO issues. The MCP tool exposes this verbatim;
-// callers wanting find-or-create semantics should ListIssues first.
-//
-// Added Wave 11 for openova-sandbox-mcp `gitea.issue.create`.
-func (c *Client) CreateIssue(ctx context.Context, org, repo, title, body string) (Issue, error) {
-	if org == "" || repo == "" {
-		return Issue{}, errors.New("gitea: CreateIssue requires non-empty org, repo")
-	}
-	if title == "" {
-		return Issue{}, errors.New("gitea: CreateIssue requires non-empty title")
-	}
-	endpoint := fmt.Sprintf("/repos/%s/%s/issues",
-		url.PathEscape(org), url.PathEscape(repo))
-	var out Issue
-	status, _, err := c.do(ctx, http.MethodPost, endpoint, issueCreate{Title: title, Body: body}, &out)
-	if err != nil {
-		if status == http.StatusNotFound {
-			return Issue{}, ErrRepoNotFound
-		}
-		return Issue{}, err
-	}
-	return out, nil
-}
-
-// CommentOnIssue posts a comment on issue #number. Works on both true
-// issues and PR rows (Gitea conflates them on the /comments endpoint).
-// Returns ErrRepoNotFound on 404; other non-2xx surface as *HTTPError.
-//
-// Added Wave 11 for openova-sandbox-mcp `gitea.issue.comment`.
-func (c *Client) CommentOnIssue(ctx context.Context, org, repo string, number int64, body string) (IssueComment, error) {
-	if org == "" || repo == "" {
-		return IssueComment{}, errors.New("gitea: CommentOnIssue requires non-empty org, repo")
-	}
-	if number <= 0 {
-		return IssueComment{}, errors.New("gitea: CommentOnIssue requires positive issue number")
-	}
-	if body == "" {
-		return IssueComment{}, errors.New("gitea: CommentOnIssue requires non-empty body")
-	}
-	endpoint := fmt.Sprintf("/repos/%s/%s/issues/%d/comments",
-		url.PathEscape(org), url.PathEscape(repo), number)
-	var out IssueComment
-	status, _, err := c.do(ctx, http.MethodPost, endpoint, issueCommentCreate{Body: body}, &out)
-	if err != nil {
-		if status == http.StatusNotFound {
-			return IssueComment{}, ErrRepoNotFound
-		}
-		return IssueComment{}, err
-	}
-	return out, nil
-}
--- a/core/controllers/pkg/gitea/merge_issues_test.go
+++ b/core/controllers/pkg/gitea/merge_issues_test.go
@ -1,346 +0,0 @@
-// Wave-11 tests for MergePullRequest + Issue CRUD (issues.go).
-//
-// We don't extend the existing pullsFake / fakeGitea handlers — they were
-// frozen to lock the surface they cover. A focused per-feature fake
-// keeps the assertion radius small and avoids regressing the existing
-// list/get coverage when this file's expectations evolve.
-package gitea
-
-import (
-	"context"
-	"encoding/json"
-	"errors"
-	"net/http"
-	"net/http/httptest"
-	"strconv"
-	"strings"
-	"sync"
-	"testing"
-)
-
-// mergeIssuesFake handles ONLY the endpoints touched by MergePullRequest
-// + the four Issue methods. Anything else 404s so a test catching a typo
-// gets a clear "unhandled" failure instead of a silent pass.
-type mergeIssuesFake struct {
-	mu sync.Mutex
-
-	// merged is keyed by "<org>/<repo>/<number>" → the Do style the
-	// client posted. Lets a test assert "we passed style=squash".
-	merged map[string]string
-
-	// issuesByRepo is keyed by "<org>/<repo>" → ordered issue list.
-	// Mutation: CreateIssue appends a new entry with an
-	// auto-incrementing Number; GET /issues/{idx} reads it back.
-	issuesByRepo map[string][]Issue
-
-	// comments is keyed by "<org>/<repo>/<number>" → comment count.
-	// We don't store the comment bodies — the test asserts the
-	// returned IssueComment shape end-to-end.
-	comments map[string]int
-}
-
-func newMergeIssuesFake() *mergeIssuesFake {
-	return &mergeIssuesFake{
-		merged:       map[string]string{},
-		issuesByRepo: map[string][]Issue{},
-		comments:     map[string]int{},
-	}
-}
-
-func (f *mergeIssuesFake) handler() http.Handler {
-	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		if r.Header.Get("Authorization") == "" {
-			http.Error(w, "no auth", http.StatusUnauthorized)
-			return
-		}
-		p := r.URL.Path
-		// POST /api/v1/repos/{owner}/{repo}/pulls/{number}/merge
-		if r.Method == http.MethodPost && strings.HasSuffix(p, "/merge") {
-			rest := strings.TrimPrefix(p, "/api/v1/repos/")
-			rest = strings.TrimSuffix(rest, "/merge")
-			parts := strings.Split(rest, "/")
-			if len(parts) != 4 || parts[2] != "pulls" {
-				http.Error(w, "bad path", http.StatusBadRequest)
-				return
-			}
-			var body mergePullRequestPayload
-			_ = json.NewDecoder(r.Body).Decode(&body)
-			key := parts[0] + "/" + parts[1] + "/" + parts[3]
-			f.mu.Lock()
-			f.merged[key] = body.Do
-			f.mu.Unlock()
-			w.WriteHeader(http.StatusOK)
-			return
-		}
-		// GET /api/v1/repos/{owner}/{repo}/issues/{index}
-		if r.Method == http.MethodGet && strings.Contains(p, "/issues/") {
-			rest := strings.TrimPrefix(p, "/api/v1/repos/")
-			parts := strings.Split(rest, "/")
-			if len(parts) != 4 || parts[2] != "issues" {
-				http.Error(w, "bad path", http.StatusBadRequest)
-				return
-			}
-			repoKey := parts[0] + "/" + parts[1]
-			idx, _ := strconv.Atoi(parts[3])
-			f.mu.Lock()
-			defer f.mu.Unlock()
-			for _, i := range f.issuesByRepo[repoKey] {
-				if i.Number == int64(idx) {
-					writeJSON(w, http.StatusOK, i)
-					return
-				}
-			}
-			http.Error(w, "no issue", http.StatusNotFound)
-			return
-		}
-		// POST /api/v1/repos/{owner}/{repo}/issues/{index}/comments
-		if r.Method == http.MethodPost && strings.HasSuffix(p, "/comments") {
-			rest := strings.TrimPrefix(p, "/api/v1/repos/")
-			rest = strings.TrimSuffix(rest, "/comments")
-			parts := strings.Split(rest, "/")
-			if len(parts) != 4 || parts[2] != "issues" {
-				http.Error(w, "bad path", http.StatusBadRequest)
-				return
-			}
-			repoKey := parts[0] + "/" + parts[1]
-			idx, _ := strconv.Atoi(parts[3])
-			var body issueCommentCreate
-			_ = json.NewDecoder(r.Body).Decode(&body)
-			f.mu.Lock()
-			f.comments[repoKey+"/"+parts[3]] = f.comments[repoKey+"/"+parts[3]] + 1
-			cid := int64(f.comments[repoKey+"/"+parts[3]])
-			f.mu.Unlock()
-			writeJSON(w, http.StatusCreated, IssueComment{
-				ID:   cid + int64(idx)*100,
-				Body: body.Body,
-				URL:  "http://gitea/x",
-			})
-			return
-		}
-		// GET /api/v1/repos/{owner}/{repo}/issues?state=...&type=...
-		if r.Method == http.MethodGet && strings.HasSuffix(p, "/issues") {
-			rest := strings.TrimSuffix(strings.TrimPrefix(p, "/api/v1/repos/"), "/issues")
-			parts := strings.Split(rest, "/")
-			if len(parts) != 2 {
-				http.Error(w, "bad path", http.StatusBadRequest)
-				return
-			}
-			repoKey := parts[0] + "/" + parts[1]
-			f.mu.Lock()
-			defer f.mu.Unlock()
-			issues, ok := f.issuesByRepo[repoKey]
-			if !ok {
-				http.Error(w, "no repo", http.StatusNotFound)
-				return
-			}
-			page, _ := strconv.Atoi(r.URL.Query().Get("page"))
-			if page == 0 {
-				page = 1
-			}
-			limit, _ := strconv.Atoi(r.URL.Query().Get("limit"))
-			if limit == 0 {
-				limit = 50
-			}
-			start := (page - 1) * limit
-			end := start + limit
-			if start > len(issues) {
-				start = len(issues)
-			}
-			if end > len(issues) {
-				end = len(issues)
-			}
-			writeJSON(w, http.StatusOK, issues[start:end])
-			return
-		}
-		// POST /api/v1/repos/{owner}/{repo}/issues
-		if r.Method == http.MethodPost && strings.HasSuffix(p, "/issues") {
-			rest := strings.TrimSuffix(strings.TrimPrefix(p, "/api/v1/repos/"), "/issues")
-			parts := strings.Split(rest, "/")
-			if len(parts) != 2 {
-				http.Error(w, "bad path", http.StatusBadRequest)
-				return
-			}
-			repoKey := parts[0] + "/" + parts[1]
-			var body issueCreate
-			_ = json.NewDecoder(r.Body).Decode(&body)
-			f.mu.Lock()
-			defer f.mu.Unlock()
-			n := int64(len(f.issuesByRepo[repoKey]) + 1)
-			issue := Issue{
-				ID:     n + 1000,
-				Number: n,
-				Title:  body.Title,
-				Body:   body.Body,
-				State:  "open",
-				URL:    "http://gitea/x/" + strconv.Itoa(int(n)),
-			}
-			f.issuesByRepo[repoKey] = append(f.issuesByRepo[repoKey], issue)
-			writeJSON(w, http.StatusCreated, issue)
-			return
-		}
-		http.Error(w, "unhandled "+r.Method+" "+p, http.StatusNotFound)
-	})
-}
-
-func newMergeIssuesClient(t *testing.T, f *mergeIssuesFake) *Client {
-	t.Helper()
-	srv := httptest.NewServer(f.handler())
-	t.Cleanup(srv.Close)
-	c := New(srv.URL, "test-token")
-	c.HTTP = srv.Client()
-	return c
-}
-
-func TestMergePullRequest_DefaultStyle(t *testing.T) {
-	t.Parallel()
-	f := newMergeIssuesFake()
-	c := newMergeIssuesClient(t, f)
-
-	if err := c.MergePullRequest(context.Background(), "acme", "blueprints", 42, MergePROpts{}); err != nil {
-		t.Fatalf("MergePullRequest: %v", err)
-	}
-	if got := f.merged["acme/blueprints/42"]; got != "merge" {
-		t.Errorf("default style: got %q, want merge", got)
-	}
-}
-
-func TestMergePullRequest_ExplicitStyle(t *testing.T) {
-	t.Parallel()
-	f := newMergeIssuesFake()
-	c := newMergeIssuesClient(t, f)
-
-	if err := c.MergePullRequest(context.Background(), "acme", "blueprints", 7, MergePROpts{Style: "squash"}); err != nil {
-		t.Fatalf("MergePullRequest: %v", err)
-	}
-	if got := f.merged["acme/blueprints/7"]; got != "squash" {
-		t.Errorf("got %q, want squash", got)
-	}
-}
-
-func TestMergePullRequest_InvalidStyleRejected(t *testing.T) {
-	t.Parallel()
-	c := New("http://x", "tok")
-	err := c.MergePullRequest(context.Background(), "acme", "r", 1, MergePROpts{Style: "wat"})
-	if err == nil || !strings.Contains(err.Error(), "invalid style") {
-		t.Errorf("err = %v, want invalid style", err)
-	}
-}
-
-func TestMergePullRequest_RejectsEmptyArgs(t *testing.T) {
-	t.Parallel()
-	c := New("http://x", "tok")
-	if err := c.MergePullRequest(context.Background(), "", "r", 1, MergePROpts{}); err == nil {
-		t.Error("want error for empty org")
-	}
-	if err := c.MergePullRequest(context.Background(), "o", "r", 0, MergePROpts{}); err == nil {
-		t.Error("want error for non-positive number")
-	}
-}
-
-func TestCreateIssue_HappyPath(t *testing.T) {
-	t.Parallel()
-	f := newMergeIssuesFake()
-	c := newMergeIssuesClient(t, f)
-
-	issue, err := c.CreateIssue(context.Background(), "acme", "blueprints", "hello", "world")
-	if err != nil {
-		t.Fatalf("CreateIssue: %v", err)
-	}
-	if issue.Title != "hello" || issue.Body != "world" || issue.Number != 1 {
-		t.Errorf("unexpected issue: %+v", issue)
-	}
-}
-
-func TestCreateIssue_RejectsEmptyTitle(t *testing.T) {
-	t.Parallel()
-	c := New("http://x", "tok")
-	if _, err := c.CreateIssue(context.Background(), "o", "r", "", "body"); err == nil {
-		t.Error("want error for empty title")
-	}
-}
-
-func TestListIssues_AfterCreate(t *testing.T) {
-	t.Parallel()
-	f := newMergeIssuesFake()
-	c := newMergeIssuesClient(t, f)
-
-	for _, ti := range []string{"a", "b", "c"} {
-		if _, err := c.CreateIssue(context.Background(), "acme", "blueprints", ti, ""); err != nil {
-			t.Fatalf("CreateIssue %q: %v", ti, err)
-		}
-	}
-	out, err := c.ListIssues(context.Background(), "acme", "blueprints", ListIssuesOpts{})
-	if err != nil {
-		t.Fatalf("ListIssues: %v", err)
-	}
-	if len(out) != 3 {
-		t.Errorf("want 3 issues, got %d", len(out))
-	}
-	for i, want := range []string{"a", "b", "c"} {
-		if out[i].Title != want {
-			t.Errorf("issues[%d].Title = %q, want %q", i, out[i].Title, want)
-		}
-		if out[i].IsPullRequest() {
-			t.Errorf("issues[%d] reported IsPullRequest", i)
-		}
-	}
-}
-
-func TestListIssues_RepoNotFound(t *testing.T) {
-	t.Parallel()
-	f := newMergeIssuesFake()
-	c := newMergeIssuesClient(t, f)
-	_, err := c.ListIssues(context.Background(), "ghost", "missing", ListIssuesOpts{})
-	if !errors.Is(err, ErrRepoNotFound) {
-		t.Errorf("err = %v, want ErrRepoNotFound", err)
-	}
-}
-
-func TestGetIssue_AfterCreate(t *testing.T) {
-	t.Parallel()
-	f := newMergeIssuesFake()
-	c := newMergeIssuesClient(t, f)
-	issue, _ := c.CreateIssue(context.Background(), "acme", "blueprints", "first", "body")
-
-	got, err := c.GetIssue(context.Background(), "acme", "blueprints", issue.Number)
-	if err != nil {
-		t.Fatalf("GetIssue: %v", err)
-	}
-	if got.Title != "first" {
-		t.Errorf("got Title=%q, want first", got.Title)
-	}
-}
-
-func TestGetIssue_NotFound(t *testing.T) {
-	t.Parallel()
-	f := newMergeIssuesFake()
-	// Seed an empty issue list so the repo exists in the fake's eyes,
-	// but the requested number is absent → 404.
-	f.issuesByRepo["acme/blueprints"] = []Issue{}
-	c := newMergeIssuesClient(t, f)
-	_, err := c.GetIssue(context.Background(), "acme", "blueprints", 999)
-	if !IsNotFound(err) {
-		t.Errorf("err = %v, want IsNotFound==true", err)
-	}
-}
-
-func TestCommentOnIssue_HappyPath(t *testing.T) {
-	t.Parallel()
-	f := newMergeIssuesFake()
-	c := newMergeIssuesClient(t, f)
-	cm, err := c.CommentOnIssue(context.Background(), "acme", "blueprints", 7, "looks good")
-	if err != nil {
-		t.Fatalf("CommentOnIssue: %v", err)
-	}
-	if cm.Body != "looks good" {
-		t.Errorf("Body = %q, want looks good", cm.Body)
-	}
-}
-
-func TestCommentOnIssue_RejectsEmptyBody(t *testing.T) {
-	t.Parallel()
-	c := New("http://x", "tok")
-	if _, err := c.CommentOnIssue(context.Background(), "o", "r", 1, ""); err == nil {
-		t.Error("want error for empty body")
-	}
-}
--- a/core/controllers/pkg/gitea/pulls.go
+++ b/core/controllers/pkg/gitea/pulls.go
@ -1,213 +0,0 @@
-// Read-side PR operations on top of the canonical pkg/gitea Client.
-//
-// client.go already carries the write-side (CreatePullRequest + the
-// `findOpenPR` race fallback) but had no public read surface for the
-// MCP server's `gitea.pr.list` + `gitea.pr.get` tools (Wave 8). The two
-// helpers here add exactly that: a paginated list with state + filter
-// passthrough, and a single-PR fetch by number. Both reuse the existing
-// `Client.do` envelope so HTTP error mapping (ErrRepoNotFound) is
-// shared with the rest of the surface.
-//
-// New endpoints (Gitea Admin REST API):
-//
-//	GET /api/v1/repos/{owner}/{repo}/pulls?state=...&page=...&limit=50
-//	GET /api/v1/repos/{owner}/{repo}/pulls/{number}
-//
-// Why a separate file: client.go is already 800+ LOC. Wave 8 review
-// scope is the two new methods; isolating them keeps the diff scoped
-// and the canonical surface auditable from one place.
-package gitea
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"net/http"
-	"net/url"
-)
-
-// PullRequestState constrains the `state` query param on ListPullRequests.
-// Gitea accepts "open" | "closed" | "all"; empty defaults server-side to
-// "open" but we make the wire shape explicit here so callers don't drift.
-type PullRequestState string
-
-const (
-	// PRStateOpen lists only open PRs (Gitea default).
-	PRStateOpen PullRequestState = "open"
-	// PRStateClosed lists only closed/merged PRs.
-	PRStateClosed PullRequestState = "closed"
-	// PRStateAll lists every PR regardless of state.
-	PRStateAll PullRequestState = "all"
-)
-
-// ListPRsOpts threads optional filters through ListPullRequests without
-// expanding the positional signature.
-type ListPRsOpts struct {
-	// State filters by open/closed/all. Empty → server default ("open").
-	State PullRequestState
-	// Head filters by source branch (matches Gitea's `head=org:branch`).
-	// Empty → no head filter.
-	Head string
-	// Base filters by target branch. Empty → no base filter.
-	Base string
-}
-
-// ListPullRequests returns every PR on the repo matching opts, walking
-// Gitea's pagination (page=1..N, limit=50). Result order matches what
-// Gitea returns (typically newest-first by creation).
-//
-// Returns ErrRepoNotFound on a first-page 404. Subsequent pagination
-// failures bubble up as *HTTPError.
-//
-// Added Wave 8 for openova-sandbox-mcp `gitea.pr.list`.
-func (c *Client) ListPullRequests(ctx context.Context, org, repo string, opts ListPRsOpts) ([]PullRequest, error) {
-	if org == "" || repo == "" {
-		return nil, errors.New("gitea: ListPullRequests requires non-empty org, repo")
-	}
-	const pageSize = 50
-	out := make([]PullRequest, 0, pageSize)
-	for page := 1; ; page++ {
-		q := url.Values{}
-		q.Set("limit", fmt.Sprintf("%d", pageSize))
-		q.Set("page", fmt.Sprintf("%d", page))
-		if opts.State != "" {
-			q.Set("state", string(opts.State))
-		}
-		if opts.Head != "" {
-			// Gitea's `head` filter expects `<org>:<branch>` for same-repo
-			// PRs. Accept both forms — pass through verbatim when the
-			// caller already included the colon.
-			head := opts.Head
-			if !containsRune(head, ':') {
-				head = org + ":" + head
-			}
-			q.Set("head", head)
-		}
-		if opts.Base != "" {
-			q.Set("base", opts.Base)
-		}
-		endpoint := fmt.Sprintf("/repos/%s/%s/pulls?%s",
-			url.PathEscape(org), url.PathEscape(repo), q.Encode())
-		var batch []PullRequest
-		status, _, err := c.do(ctx, http.MethodGet, endpoint, nil, &batch)
-		if err != nil {
-			if page == 1 && status == http.StatusNotFound {
-				return nil, ErrRepoNotFound
-			}
-			return nil, err
-		}
-		out = append(out, batch...)
-		if len(batch) < pageSize {
-			break
-		}
-	}
-	return out, nil
-}
-
-// MergePROpts threads the MERGE payload fields the MCP tool exposes
-// without growing the positional signature.
-type MergePROpts struct {
-	// Style is one of "merge" | "rebase" | "rebase-merge" | "squash".
-	// Empty defaults to "merge" (the conservative, non-rewriting option
-	// — keeps the head SHA + parent chain stable so CI doesn't re-run
-	// on a synthetic merge commit).
-	Style string
-	// Title overrides the default merge-commit title (squash + merge
-	// styles only). Empty → Gitea picks the PR title.
-	Title string
-	// Message overrides the default merge-commit body. Empty → Gitea
-	// picks the PR body.
-	Message string
-}
-
-// mergePullRequestPayload — POST body for /pulls/{number}/merge.
-// Gitea's `MergePullRequestOption` uses CamelCase JSON field names
-// (`Do`, `MergeTitleField`, `MergeMessageField`) which is exactly the
-// shape this struct emits.
-type mergePullRequestPayload struct {
-	Do                string `json:"Do"`
-	MergeTitleField   string `json:"MergeTitleField,omitempty"`
-	MergeMessageField string `json:"MergeMessageField,omitempty"`
-}
-
-// MergePullRequest merges PR #number on (org, repo).
-//
-// Gitea returns 200 on a successful merge, 404 if the PR doesn't exist,
-// 405 if the PR isn't mergeable (work-in-progress, draft, conflicting),
-// 409 if the head changed between Get and Merge. We surface the typed
-// sentinel for 404 (both `repo gone` and `PR gone` end up here on
-// Gitea's wire) and a *HTTPError for 405/409 so callers can inspect
-// Status for retry/abort decisions.
-//
-// Added Wave 11 for openova-sandbox-mcp `gitea.pr.merge`.
-func (c *Client) MergePullRequest(ctx context.Context, org, repo string, number int64, opts MergePROpts) error {
-	if org == "" || repo == "" {
-		return errors.New("gitea: MergePullRequest requires non-empty org, repo")
-	}
-	if number <= 0 {
-		return errors.New("gitea: MergePullRequest requires positive PR number")
-	}
-	style := opts.Style
-	if style == "" {
-		style = "merge"
-	}
-	switch style {
-	case "merge", "rebase", "rebase-merge", "squash":
-		// ok
-	default:
-		return fmt.Errorf("gitea: MergePullRequest: invalid style %q (want merge|rebase|rebase-merge|squash)", style)
-	}
-	endpoint := fmt.Sprintf("/repos/%s/%s/pulls/%d/merge",
-		url.PathEscape(org), url.PathEscape(repo), number)
-	payload := mergePullRequestPayload{
-		Do:                style,
-		MergeTitleField:   opts.Title,
-		MergeMessageField: opts.Message,
-	}
-	status, _, err := c.do(ctx, http.MethodPost, endpoint, payload, nil)
-	if err != nil {
-		if status == http.StatusNotFound {
-			return ErrRepoNotFound
-		}
-		return err
-	}
-	return nil
-}
-
-// GetPullRequest fetches a single PR by number. Returns ErrRepoNotFound
-// on 404 with "repository" in the body (matching GetFile's heuristic),
-// otherwise a plain *HTTPError with Status==404 when the PR number itself
-// doesn't resolve. Callers can `IsNotFound(err)` to fold both cases.
-//
-// Added Wave 8 for openova-sandbox-mcp `gitea.pr.get`.
-func (c *Client) GetPullRequest(ctx context.Context, org, repo string, number int64) (PullRequest, error) {
-	if org == "" || repo == "" {
-		return PullRequest{}, errors.New("gitea: GetPullRequest requires non-empty org, repo")
-	}
-	if number <= 0 {
-		return PullRequest{}, errors.New("gitea: GetPullRequest requires positive PR number")
-	}
-	endpoint := fmt.Sprintf("/repos/%s/%s/pulls/%d",
-		url.PathEscape(org), url.PathEscape(repo), number)
-	var out PullRequest
-	status, _, err := c.do(ctx, http.MethodGet, endpoint, nil, &out)
-	if err != nil {
-		if status == http.StatusNotFound {
-			return PullRequest{}, ErrRepoNotFound
-		}
-		return PullRequest{}, err
-	}
-	return out, nil
-}
-
-// containsRune is a tiny strings.ContainsRune replacement kept inline so
-// pulls.go doesn't import "strings" for a single use; the rest of the
-// file uses url + fmt + http + errors only.
-func containsRune(s string, r rune) bool {
-	for _, c := range s {
-		if c == r {
-			return true
-		}
-	}
-	return false
-}
--- a/core/controllers/pkg/gitea/pulls_test.go
+++ b/core/controllers/pkg/gitea/pulls_test.go
@ -1,249 +0,0 @@
-package gitea
-
-import (
-	"context"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"net/http"
-	"net/http/httptest"
-	"strconv"
-	"strings"
-	"testing"
-)
-
-// pullsFake is a tiny httptest stand-in scoped to the two new endpoints
-// added in pulls.go: paginated list + get-by-number. We don't reuse the
-// big fakeGitea handler from client_test.go because that one's GET /pulls
-// branch is filter-by-head only (it was written before list-with-state
-// existed) and overriding it would risk regressing CreatePullRequest's
-// 409 path. A scoped fake keeps the new tests independent.
-type pullsFake struct {
-	// repos that exist (key = "owner/repo").
-	repos map[string]bool
-	// prs is keyed by "owner/repo/number".
-	prs map[string]PullRequest
-}
-
-func newPullsFake() *pullsFake {
-	return &pullsFake{
-		repos: map[string]bool{},
-		prs:   map[string]PullRequest{},
-	}
-}
-
-func (f *pullsFake) handler() http.Handler {
-	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		if r.Header.Get("Authorization") == "" {
-			http.Error(w, "no auth", http.StatusUnauthorized)
-			return
-		}
-		p := r.URL.Path
-
-		// GET /api/v1/repos/{owner}/{repo}/pulls/{number}
-		if r.Method == http.MethodGet &&
-			strings.HasPrefix(p, "/api/v1/repos/") &&
-			strings.Contains(p, "/pulls/") {
-			rest := strings.TrimPrefix(p, "/api/v1/repos/")
-			// rest = "owner/repo/pulls/123"
-			parts := strings.Split(rest, "/")
-			if len(parts) != 4 || parts[2] != "pulls" {
-				http.Error(w, "bad path", http.StatusBadRequest)
-				return
-			}
-			repoKey := parts[0] + "/" + parts[1]
-			if !f.repos[repoKey] {
-				http.Error(w, "no repo", http.StatusNotFound)
-				return
-			}
-			pr, ok := f.prs[repoKey+"/"+parts[3]]
-			if !ok {
-				http.Error(w, "no pr", http.StatusNotFound)
-				return
-			}
-			writeJSON(w, http.StatusOK, pr)
-			return
-		}
-
-		// GET /api/v1/repos/{owner}/{repo}/pulls?state=...
-		if r.Method == http.MethodGet &&
-			strings.HasPrefix(p, "/api/v1/repos/") &&
-			strings.HasSuffix(p, "/pulls") {
-			rest := strings.TrimSuffix(strings.TrimPrefix(p, "/api/v1/repos/"), "/pulls")
-			parts := strings.Split(rest, "/")
-			if len(parts) != 2 {
-				http.Error(w, "bad path", http.StatusBadRequest)
-				return
-			}
-			repoKey := parts[0] + "/" + parts[1]
-			if !f.repos[repoKey] {
-				http.Error(w, "no repo", http.StatusNotFound)
-				return
-			}
-			stateWanted := r.URL.Query().Get("state")
-			page, _ := strconv.Atoi(r.URL.Query().Get("page"))
-			if page == 0 {
-				page = 1
-			}
-			limit, _ := strconv.Atoi(r.URL.Query().Get("limit"))
-			if limit == 0 {
-				limit = 50
-			}
-			out := []PullRequest{}
-			for k, pr := range f.prs {
-				if !strings.HasPrefix(k, repoKey+"/") {
-					continue
-				}
-				if stateWanted != "" && stateWanted != "all" && pr.State != stateWanted {
-					continue
-				}
-				out = append(out, pr)
-			}
-			// Stable order by Number ascending so test assertions can index.
-			for i := 1; i < len(out); i++ {
-				for j := i; j > 0 && out[j-1].Number > out[j].Number; j-- {
-					out[j-1], out[j] = out[j], out[j-1]
-				}
-			}
-			// Apply pagination window.
-			start := (page - 1) * limit
-			end := start + limit
-			if start > len(out) {
-				start = len(out)
-			}
-			if end > len(out) {
-				end = len(out)
-			}
-			writeJSON(w, http.StatusOK, out[start:end])
-			return
-		}
-
-		http.Error(w, "unhandled "+r.Method+" "+p, http.StatusNotFound)
-	})
-}
-
-func newPullsClient(t *testing.T, f *pullsFake) *Client {
-	t.Helper()
-	srv := httptest.NewServer(f.handler())
-	t.Cleanup(srv.Close)
-	c := New(srv.URL, "test-token")
-	c.HTTP = srv.Client()
-	return c
-}
-
-func TestListPullRequests_StateFilter(t *testing.T) {
-	t.Parallel()
-	f := newPullsFake()
-	f.repos["acme/blueprints"] = true
-	for i := int64(1); i <= 3; i++ {
-		pr := PullRequest{ID: i, Number: i, State: "open", Title: fmt.Sprintf("open #%d", i)}
-		pr.Head.Ref = "feature/" + fmt.Sprint(i)
-		pr.Base.Ref = "main"
-		f.prs[fmt.Sprintf("acme/blueprints/%d", i)] = pr
-	}
-	for i := int64(10); i <= 11; i++ {
-		pr := PullRequest{ID: i, Number: i, State: "closed", Title: fmt.Sprintf("closed #%d", i)}
-		pr.Head.Ref = "old/" + fmt.Sprint(i)
-		pr.Base.Ref = "main"
-		f.prs[fmt.Sprintf("acme/blueprints/%d", i)] = pr
-	}
-	c := newPullsClient(t, f)
-
-	open, err := c.ListPullRequests(context.Background(), "acme", "blueprints", ListPRsOpts{State: PRStateOpen})
-	if err != nil {
-		t.Fatalf("ListPullRequests open: %v", err)
-	}
-	if len(open) != 3 {
-		t.Fatalf("want 3 open PRs, got %d (%v)", len(open), open)
-	}
-	for _, pr := range open {
-		if pr.State != "open" {
-			t.Errorf("unexpected state %q on open list", pr.State)
-		}
-	}
-
-	closed, err := c.ListPullRequests(context.Background(), "acme", "blueprints", ListPRsOpts{State: PRStateClosed})
-	if err != nil {
-		t.Fatalf("ListPullRequests closed: %v", err)
-	}
-	if len(closed) != 2 {
-		t.Fatalf("want 2 closed PRs, got %d", len(closed))
-	}
-
-	all, err := c.ListPullRequests(context.Background(), "acme", "blueprints", ListPRsOpts{State: PRStateAll})
-	if err != nil {
-		t.Fatalf("ListPullRequests all: %v", err)
-	}
-	if len(all) != 5 {
-		t.Fatalf("want 5 PRs (all), got %d", len(all))
-	}
-}
-
-func TestListPullRequests_RepoNotFound(t *testing.T) {
-	t.Parallel()
-	f := newPullsFake()
-	c := newPullsClient(t, f)
-
-	_, err := c.ListPullRequests(context.Background(), "ghost", "missing", ListPRsOpts{})
-	if !errors.Is(err, ErrRepoNotFound) {
-		t.Errorf("err = %v, want ErrRepoNotFound", err)
-	}
-}
-
-func TestListPullRequests_RejectsEmptyArgs(t *testing.T) {
-	t.Parallel()
-	c := New("http://x", "tok")
-	if _, err := c.ListPullRequests(context.Background(), "", "r", ListPRsOpts{}); err == nil {
-		t.Error("want error for empty org")
-	}
-	if _, err := c.ListPullRequests(context.Background(), "o", "", ListPRsOpts{}); err == nil {
-		t.Error("want error for empty repo")
-	}
-}
-
-func TestGetPullRequest_HappyPath(t *testing.T) {
-	t.Parallel()
-	f := newPullsFake()
-	f.repos["acme/blueprints"] = true
-	pr := PullRequest{ID: 42, Number: 42, State: "open", Title: "hello"}
-	pr.Head.Ref = "feature/x"
-	pr.Base.Ref = "main"
-	f.prs["acme/blueprints/42"] = pr
-	c := newPullsClient(t, f)
-
-	got, err := c.GetPullRequest(context.Background(), "acme", "blueprints", 42)
-	if err != nil {
-		t.Fatalf("GetPullRequest: %v", err)
-	}
-	if got.Number != 42 || got.Title != "hello" || got.Head.Ref != "feature/x" {
-		t.Errorf("unexpected PR: %+v", got)
-	}
-}
-
-func TestGetPullRequest_NotFound(t *testing.T) {
-	t.Parallel()
-	f := newPullsFake()
-	f.repos["acme/blueprints"] = true
-	c := newPullsClient(t, f)
-
-	_, err := c.GetPullRequest(context.Background(), "acme", "blueprints", 999)
-	if !IsNotFound(err) {
-		t.Errorf("err = %v, want IsNotFound==true", err)
-	}
-}
-
-func TestGetPullRequest_RejectsBadArgs(t *testing.T) {
-	t.Parallel()
-	c := New("http://x", "tok")
-	if _, err := c.GetPullRequest(context.Background(), "", "r", 1); err == nil {
-		t.Error("want error for empty org")
-	}
-	if _, err := c.GetPullRequest(context.Background(), "o", "r", 0); err == nil {
-		t.Error("want error for non-positive number")
-	}
-}
-
-// Compile-time check: PullRequest must JSON-decode into the same fields
-// pulls.go reads. Caught a regression in the original Wave-8 draft where
-// the alias name diverged from the canonical struct.
-var _ = json.Unmarshal
--- a/core/controllers/pkg/natsbus/subscriber.go
+++ b/core/controllers/pkg/natsbus/subscriber.go
@ -1,300 +0,0 @@
-// Package natsbus is a minimal NATS JetStream subscriber for the
-// in-cluster Group-C controllers (organization-controller,
-// sandbox-controller). It exists to close the consume-leg of DoD D35
-// (`catalyst.tenant.created` / `catalyst.order.placed` /
-// `catalyst.tenant.sandbox_requested` round-trip end-to-end) when the
-// publish-leg PR #1626 wired into core/services/shared/events landed.
-//
-// The package is deliberately self-contained inside core/controllers/
-// (separate Go module from core/services/shared/events) — copying the
-// ~80 LOC of JetStream connect + durable-consumer attach is cheaper
-// than dragging the entire core/services/shared/events package
-// (BrokerPublisher / MultiSubscriber / Kafka transport) into a
-// controller binary that only needs to subscribe.
-//
-// What the controllers do with each envelope is up to the caller —
-// the package surface is intentionally narrow:
-//
-//   - Connect(url) → *Subscriber, Close() teardown.
-//   - Subscriber.Subscribe(ctx, subject, durable, handler) starts a
-//     durable JetStream consumer and dispatches every envelope to
-//     handler. Handler returning nil → Ack; non-nil → Nak (5s
-//     backoff so transient downstream blips don't hot-loop).
-//   - Event mirrors core/services/shared/events.Event so the JSON
-//     wire format is identical (id / type / source / timestamp /
-//     tenant_id / data / metadata).
-//
-// The expected operational pattern is:
-//
-//	r.Reconcile is the canonical path for steady-state convergence.
-//	NATS subscribers in main.go observe domain events as they fire
-//	and enqueue the corresponding CR for reconcile (so the controller
-//	responds within ~50ms instead of waiting up to 30s for the next
-//	requeue). The 30s RequeueAfter inside r.Reconcile remains the
-//	fallback — NATS message loss never strands a CR; the next
-//	informer-driven Reconcile picks it up.
-//
-// Connection options mirror core/cmd/projector/internal/nats:
-// MaxReconnects=-1 (retry forever — JetStream rollouts shouldn't
-// crash-loop the controller), 2s ReconnectWait, 20s PingInterval.
-//
-// Per Inviolable Principle #4 every knob is env-driven; this package
-// reads nothing from os.Getenv directly — main.go owns the env
-// surface and passes URL + StreamName + DurableName into Connect /
-// Subscribe.
-package natsbus
-
-import (
-	"context"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"log/slog"
-	"time"
-
-	"github.com/nats-io/nats.go"
-	"github.com/nats-io/nats.go/jetstream"
-)
-
-// StreamCatalystSME is the canonical JetStream Stream backing every
-// SME convergence event (catalyst.tenant.*, catalyst.billing.*,
-// catalyst.domain.*, catalyst.provision.*). Mirrors
-// core/services/shared/events.StreamCatalystSME — kept in sync by the
-// constant lifting up to the chart / per-Sovereign overlay.
-const StreamCatalystSME = "CATALYST_SME"
-
-// Canonical subjects the Group-C controllers consume. Each constant
-// matches the publish-side subject derived by
-// core/services/shared/events.CanonicalSubject — kept in sync by the
-// D35 test plan.
-const (
-	// SubjectTenantCreated fires when tenant-service finalises a new
-	// tenant (publish-side PR #1626). organization-controller subscribes
-	// so the corresponding Organization CR converges within ~50ms of
-	// the marketplace checkout instead of waiting on the 30s fallback.
-	SubjectTenantCreated = "catalyst.tenant.created"
-
-	// SubjectOrderPlaced fires when billing-service records a paid
-	// order (publish-side PR #1626). organization-controller subscribes
-	// to observe the round-trip and trigger reconcile of the per-tenant
-	// Organization CR (so day-1 app installs in the basket land on the
-	// per-Org Gitea repo without the operator polling catalyst-api).
-	SubjectOrderPlaced = "catalyst.billing.order.placed"
-
-	// SubjectTenantSandboxRequested fires when the marketplace cart
-	// contained the sandbox product (publish-side tenant-service in
-	// PR #1633). sandbox-controller subscribes so the per-Sandbox
-	// reconcile loop runs immediately on cart completion.
-	SubjectTenantSandboxRequested = "catalyst.tenant.sandbox_requested"
-)
-
-// Event is the JSON envelope on every catalyst.* subject. The shape
-// mirrors core/services/shared/events.Event exactly — fields are
-// duplicated here (rather than imported) to keep the controllers
-// module free of a dependency on core/services/shared, which pulls
-// in franz-go + Kafka transports the controllers never touch.
-type Event struct {
-	ID        string            `json:"id"`
-	Type      string            `json:"type"`
-	Source    string            `json:"source"`
-	Timestamp time.Time         `json:"timestamp"`
-	TenantID  string            `json:"tenant_id"`
-	Data      json.RawMessage   `json:"data"`
-	Metadata  map[string]string `json:"metadata,omitempty"`
-}
-
-// Subscriber holds an open NATS+JetStream connection. Construct via
-// Connect; close via Close. Subscribe may be called from multiple
-// goroutines — each call attaches an independent durable consumer.
-type Subscriber struct {
-	nc *nats.Conn
-	js jetstream.JetStream
-}
-
-// Connect opens a NATS connection at url and binds a JetStream client
-// on top. Empty url falls back to nats.DefaultURL so unit tests can
-// exercise the package against a local nats-server without env wiring.
-//
-// Returns an error if the broker is unreachable; the caller (main.go)
-// is expected to either bail out (NATS is canonical on Sovereigns) or
-// log and continue (Catalyst-Zero / contabo, where REDPANDA_BROKERS
-// is the authoritative bus).
-func Connect(url string) (*Subscriber, error) {
-	if url == "" {
-		url = nats.DefaultURL
-	}
-	nc, err := nats.Connect(url,
-		nats.Name("catalyst-controllers"),
-		nats.MaxReconnects(-1),
-		nats.ReconnectWait(2*time.Second),
-		nats.PingInterval(20*time.Second),
-		nats.MaxPingsOutstanding(3),
-	)
-	if err != nil {
-		return nil, fmt.Errorf("natsbus: connect %s: %w", url, err)
-	}
-	js, err := jetstream.New(nc)
-	if err != nil {
-		nc.Close()
-		return nil, fmt.Errorf("natsbus: jetstream init: %w", err)
-	}
-	return &Subscriber{nc: nc, js: js}, nil
-}
-
-// Handler is the per-message callback. Returning a nil error Acks the
-// message (JetStream advances the consumer cursor); returning a
-// non-nil error Naks with a 5s backoff so transient handler failures
-// redeliver instead of stranding the envelope.
-//
-// Implementations SHOULD be idempotent — JetStream guarantees
-// at-least-once delivery, so the same Event.ID may arrive twice on a
-// broker rebalance.
-type Handler func(ctx context.Context, ev *Event) error
-
-// SubscribeOptions tunes Subscribe behaviour. Zero values yield sane
-// production defaults (Stream=CATALYST_SME, AckWait=30s, no MaxDeliver
-// cap so a permanently-failing handler does NOT silently drop events
-// — operator-visible nak loops are the right failure mode).
-type SubscribeOptions struct {
-	// Stream is the JetStream Stream the FilterSubject lives on.
-	// Defaults to StreamCatalystSME.
-	Stream string
-	// AckWait bounds how long JetStream waits for Ack before redeliver.
-	// Defaults to 30 seconds.
-	AckWait time.Duration
-	// NakBackoff is the backoff inserted before redelivery when the
-	// Handler returns a non-nil error. Defaults to 5 seconds.
-	NakBackoff time.Duration
-}
-
-// Subscribe attaches a durable consumer to subject on options.Stream
-// (default StreamCatalystSME) under the supplied durable name, and
-// dispatches every envelope to handler.
-//
-// Subscribe is non-blocking: it returns once the consumer has been
-// created AND the underlying Consume loop has been started. The
-// loop runs until ctx is cancelled. To stop, cancel ctx — the
-// underlying JetStream ConsumeContext is then stopped automatically.
-//
-// Durable names are stable across pod restarts so JetStream resumes
-// from the committed sequence after a controller-manager rollout.
-// MaxDeliver=-1 (retry forever) matches the
-// core/services/shared/events.MultiSubscriber convention: operator-
-// visible nak loops are the right failure mode for unrecoverable
-// handler errors, not silent drops.
-func (s *Subscriber) Subscribe(
-	ctx context.Context,
-	subject, durable string,
-	handler Handler,
-	opts SubscribeOptions,
-) error {
-	if s == nil || s.js == nil {
-		return errors.New("natsbus: subscriber not initialised")
-	}
-	if subject == "" {
-		return errors.New("natsbus: Subscribe requires subject")
-	}
-	if durable == "" {
-		return errors.New("natsbus: Subscribe requires durable name")
-	}
-	if handler == nil {
-		return errors.New("natsbus: Subscribe requires handler")
-	}
-	stream := opts.Stream
-	if stream == "" {
-		stream = StreamCatalystSME
-	}
-	ackWait := opts.AckWait
-	if ackWait <= 0 {
-		ackWait = 30 * time.Second
-	}
-	nakBackoff := opts.NakBackoff
-	if nakBackoff <= 0 {
-		nakBackoff = 5 * time.Second
-	}
-
-	cons, err := s.js.CreateOrUpdateConsumer(ctx, stream, jetstream.ConsumerConfig{
-		Durable:       durable,
-		Description:   fmt.Sprintf("controllers natsbus %s on %s", durable, subject),
-		AckPolicy:     jetstream.AckExplicitPolicy,
-		AckWait:       ackWait,
-		FilterSubject: subject,
-		DeliverPolicy: jetstream.DeliverAllPolicy,
-		MaxDeliver:    -1,
-	})
-	if err != nil {
-		return fmt.Errorf("natsbus: create consumer %s on %s/%s: %w", durable, stream, subject, err)
-	}
-
-	cc, err := cons.Consume(func(msg jetstream.Msg) {
-		dispatchOne(ctx, msg, handler, subject, durable, nakBackoff)
-	})
-	if err != nil {
-		return fmt.Errorf("natsbus: start consume %s: %w", durable, err)
-	}
-
-	slog.Info("natsbus: subscribed",
-		"subject", subject, "durable", durable, "stream", stream)
-
-	// Stop the JetStream consume context when ctx is cancelled. We do
-	// this in a goroutine so Subscribe returns immediately — the caller
-	// (main.go) wires the same ctx to manager.Start so SIGTERM unwinds
-	// every Subscribe at once.
-	go func() {
-		<-ctx.Done()
-		cc.Stop()
-		slog.Info("natsbus: stopped", "subject", subject, "durable", durable)
-	}()
-	return nil
-}
-
-// dispatchOne parses one JetStream message into an Event, invokes the
-// handler with a per-message timeout, and Acks / Naks per the result.
-// Malformed JSON is Ack'd-skipped (with an error log) so a poison-pill
-// envelope cannot hot-loop the consumer.
-func dispatchOne(
-	parent context.Context,
-	msg jetstream.Msg,
-	handler Handler,
-	subject, durable string,
-	nakBackoff time.Duration,
-) {
-	var ev Event
-	if err := json.Unmarshal(msg.Data(), &ev); err != nil {
-		slog.Error("natsbus: malformed envelope — ack to skip",
-			"subject", subject, "durable", durable,
-			"err", err, "body_size", len(msg.Data()))
-		_ = msg.Ack()
-		return
-	}
-
-	// Per-message timeout — 25s leaves 5s of slack inside the 30s
-	// default AckWait so the broker does not redeliver while the handler
-	// is still running.
-	hctx, cancel := context.WithTimeout(parent, 25*time.Second)
-	defer cancel()
-
-	if err := handler(hctx, &ev); err != nil {
-		slog.Warn("natsbus: handler error — nak for retry",
-			"subject", subject, "durable", durable,
-			"event_id", ev.ID, "event_type", ev.Type, "err", err)
-		if nakErr := msg.NakWithDelay(nakBackoff); nakErr != nil {
-			slog.Error("natsbus: nak failed",
-				"subject", subject, "durable", durable, "err", nakErr)
-		}
-		return
-	}
-	if ackErr := msg.Ack(); ackErr != nil {
-		slog.Error("natsbus: ack failed",
-			"subject", subject, "durable", durable,
-			"event_id", ev.ID, "err", ackErr)
-	}
-}
-
-// Close drains the underlying NATS connection. Idempotent.
-func (s *Subscriber) Close() {
-	if s == nil || s.nc == nil {
-		return
-	}
-	_ = s.nc.Drain()
-}
--- a/core/controllers/pkg/natsbus/subscriber_test.go
+++ b/core/controllers/pkg/natsbus/subscriber_test.go
@ -1,223 +0,0 @@
-// Unit tests for natsbus. Verifies the per-message dispatch contract
-// (Ack on handler success, Nak on handler error, Ack-to-skip on
-// malformed JSON) without spinning up an embedded NATS server. Live-
-// broker integration is covered by the D35 fresh-prov verifier.
-package natsbus
-
-import (
-	"context"
-	"encoding/json"
-	"errors"
-	"sync"
-	"testing"
-	"time"
-
-	"github.com/nats-io/nats.go"
-	"github.com/nats-io/nats.go/jetstream"
-)
-
-// fakeMsg implements just enough of jetstream.Msg for dispatchOne to
-// drive Ack / Nak / NakWithDelay outcomes. Method set mirrors the
-// interface so a typecheck against jetstream.Msg keeps the fake honest.
-type fakeMsg struct {
-	data []byte
-
-	mu          sync.Mutex
-	ackCount    int
-	nakCount    int
-	termCount   int
-	nakDelay    time.Duration
-	inProgress  int
-	headers     map[string][]string
-	subject     string
-	reply       string
-}
-
-var _ jetstream.Msg = (*fakeMsg)(nil)
-
-func (f *fakeMsg) Data() []byte { return f.data }
-func (f *fakeMsg) Headers() nats.Header {
-	out := nats.Header{}
-	for k, vs := range f.headers {
-		out[k] = vs
-	}
-	return out
-}
-func (f *fakeMsg) Subject() string { return f.subject }
-func (f *fakeMsg) Reply() string   { return f.reply }
-func (f *fakeMsg) Ack() error {
-	f.mu.Lock()
-	defer f.mu.Unlock()
-	f.ackCount++
-	return nil
-}
-func (f *fakeMsg) DoubleAck(context.Context) error {
-	return f.Ack()
-}
-func (f *fakeMsg) Nak() error {
-	f.mu.Lock()
-	defer f.mu.Unlock()
-	f.nakCount++
-	return nil
-}
-func (f *fakeMsg) NakWithDelay(d time.Duration) error {
-	f.mu.Lock()
-	defer f.mu.Unlock()
-	f.nakCount++
-	f.nakDelay = d
-	return nil
-}
-func (f *fakeMsg) InProgress() error {
-	f.mu.Lock()
-	defer f.mu.Unlock()
-	f.inProgress++
-	return nil
-}
-func (f *fakeMsg) Term() error {
-	f.mu.Lock()
-	defer f.mu.Unlock()
-	f.termCount++
-	return nil
-}
-func (f *fakeMsg) TermWithReason(string) error { return f.Term() }
-func (f *fakeMsg) Metadata() (*jetstream.MsgMetadata, error) {
-	return &jetstream.MsgMetadata{}, nil
-}
-
-// TestDispatchOne_HandlerSuccess pins: a handler returning nil Acks
-// the message exactly once and never Naks. Bodyguard for the D35
-// happy path — every successful round-trip moves the consumer cursor.
-func TestDispatchOne_HandlerSuccess(t *testing.T) {
-	payload := Event{
-		ID:        "evt-1",
-		Type:      "tenant.created",
-		Source:    "tenant-service",
-		Timestamp: time.Now().UTC(),
-		TenantID:  "tnt-abc",
-		Data:      json.RawMessage(`{"id":"tnt-abc","slug":"acme"}`),
-	}
-	body, err := json.Marshal(payload)
-	if err != nil {
-		t.Fatalf("marshal payload: %v", err)
-	}
-	msg := &fakeMsg{data: body, subject: SubjectTenantCreated}
-
-	var seen *Event
-	handler := Handler(func(_ context.Context, ev *Event) error {
-		seen = ev
-		return nil
-	})
-
-	dispatchOne(context.Background(), msg, handler, SubjectTenantCreated, "test-d", 5*time.Second)
-
-	if msg.ackCount != 1 {
-		t.Errorf("want Ack count 1, got %d", msg.ackCount)
-	}
-	if msg.nakCount != 0 {
-		t.Errorf("want Nak count 0, got %d", msg.nakCount)
-	}
-	if seen == nil || seen.ID != "evt-1" || seen.Type != "tenant.created" {
-		t.Errorf("handler did not receive the parsed envelope: %+v", seen)
-	}
-	if seen.TenantID != "tnt-abc" {
-		t.Errorf("envelope tenant_id mismatch: got %q want %q", seen.TenantID, "tnt-abc")
-	}
-}
-
-// TestDispatchOne_HandlerError pins: a handler returning a non-nil
-// error Naks (with the configured backoff) and does NOT Ack. Bodyguard
-// for transient downstream failures — JetStream must redeliver.
-func TestDispatchOne_HandlerError(t *testing.T) {
-	body, _ := json.Marshal(Event{ID: "evt-err", Type: "order.placed"})
-	msg := &fakeMsg{data: body, subject: SubjectOrderPlaced}
-
-	handler := Handler(func(context.Context, *Event) error {
-		return errors.New("downstream API unreachable")
-	})
-
-	const backoff = 7 * time.Second
-	dispatchOne(context.Background(), msg, handler, SubjectOrderPlaced, "test-d", backoff)
-
-	if msg.ackCount != 0 {
-		t.Errorf("want Ack count 0 on handler error, got %d", msg.ackCount)
-	}
-	if msg.nakCount != 1 {
-		t.Errorf("want Nak count 1 on handler error, got %d", msg.nakCount)
-	}
-	if msg.nakDelay != backoff {
-		t.Errorf("want Nak delay %v, got %v", backoff, msg.nakDelay)
-	}
-}
-
-// TestDispatchOne_MalformedJSON pins: a payload that fails json.Unmarshal
-// is Ack'd-skipped (the consumer cursor advances) so a poison pill cannot
-// hot-loop the subscriber. Caught by the operator log line, not the
-// transport.
-func TestDispatchOne_MalformedJSON(t *testing.T) {
-	msg := &fakeMsg{data: []byte("not-json{"), subject: SubjectTenantSandboxRequested}
-
-	called := false
-	handler := Handler(func(context.Context, *Event) error {
-		called = true
-		return nil
-	})
-
-	dispatchOne(context.Background(), msg, handler, SubjectTenantSandboxRequested, "test-d", 5*time.Second)
-
-	if called {
-		t.Error("handler should NOT be invoked on malformed JSON")
-	}
-	if msg.ackCount != 1 {
-		t.Errorf("want Ack count 1 to skip poison pill, got %d", msg.ackCount)
-	}
-	if msg.nakCount != 0 {
-		t.Errorf("want Nak count 0 on poison pill (Term/Ack only), got %d", msg.nakCount)
-	}
-}
-
-// TestDispatchOne_SandboxRequestedPayload pins the exact wire format
-// the publish-side (PR #1633 tenant.handlers.CreateOrg) emits — the
-// downstream sandbox-controller handler reads tenant_id, org_slug,
-// owner_id, owner_email, agent_catalogue out of the Data blob. If
-// the publish-side renames a field this test goes red so the consumer
-// stays in lockstep.
-func TestDispatchOne_SandboxRequestedPayload(t *testing.T) {
-	payload := Event{
-		ID:        "evt-sb",
-		Type:      "tenant.sandbox_requested",
-		Source:    "tenant-service",
-		Timestamp: time.Now().UTC(),
-		TenantID:  "tnt-sb",
-		Data: json.RawMessage(`{
-			"tenant_id":"tnt-sb",
-			"org_slug":"acme",
-			"owner_id":"u-1",
-			"owner_email":"ceo@acme.com",
-			"agent_catalogue":["qwen","claude"]
-		}`),
-	}
-	body, _ := json.Marshal(payload)
-	msg := &fakeMsg{data: body, subject: SubjectTenantSandboxRequested}
-
-	var got struct {
-		TenantID       string   `json:"tenant_id"`
-		OrgSlug        string   `json:"org_slug"`
-		OwnerEmail     string   `json:"owner_email"`
-		AgentCatalogue []string `json:"agent_catalogue"`
-	}
-	handler := Handler(func(_ context.Context, ev *Event) error {
-		return json.Unmarshal(ev.Data, &got)
-	})
-
-	dispatchOne(context.Background(), msg, handler, SubjectTenantSandboxRequested, "test-d", 5*time.Second)
-
-	if msg.ackCount != 1 {
-		t.Fatalf("expected Ack=1 on success, got Ack=%d Nak=%d", msg.ackCount, msg.nakCount)
-	}
-	if got.OrgSlug != "acme" || got.OwnerEmail != "ceo@acme.com" {
-		t.Errorf("payload fields not surfaced: %+v", got)
-	}
-	if len(got.AgentCatalogue) != 2 || got.AgentCatalogue[0] != "qwen" {
-		t.Errorf("agent_catalogue not surfaced: %+v", got.AgentCatalogue)
-	}
-}
--- a/core/controllers/sandbox/Dockerfile
+++ b/core/controllers/sandbox/Dockerfile
@ -1,53 +0,0 @@
-# sandbox-controller — Wave 1 of the Sandbox product.
-#
-# A Catalyst-built Go binary that reconciles Sandbox.sandbox.openova.io/v1
-# CRs into per-Sandbox namespace + RBAC + PVCs + placeholder Secret
-# manifests written to the per-Org `catalyst-tenant` Gitea repo. Flux on
-# the host cluster picks up the manifests and reconciles them into the
-# Org vcluster (sister of organization-controller — same patterns).
-#
-# Build context: invoked with the repository ROOT as the build context.
-# Mirrors core/controllers/organization/Containerfile (slice CC1 layout:
-# shared go.mod at core/controllers/, shared pkg at core/controllers/pkg).
-#
-# Two stages:
-#   build  — golang:1.23-alpine
-#   final  — alpine:3.20 minimal runtime (CA certs + the binary)
-
-FROM docker.io/library/golang:1.23-alpine AS build
-WORKDIR /workspace
-
-# Stage 1: cache module downloads — go.mod/go.sum at the shared root.
-COPY core/controllers/go.mod core/controllers/go.sum core/controllers/
-WORKDIR /workspace/core/controllers
-RUN go mod download
-
-# Stage 2: copy source + build. Same layout the organization-controller
-# Containerfile uses (Fix #42 follow-up — shared internal + pkg dirs
-# MUST be copied before the per-controller dir, else `go build` fails
-# resolving the github.com/openova-io/openova/core/controllers/pkg/gitea
-# import.
-WORKDIR /workspace
-COPY core/controllers/internal /workspace/core/controllers/internal
-COPY core/controllers/pkg /workspace/core/controllers/pkg
-COPY core/controllers/sandbox /workspace/core/controllers/sandbox
-
-WORKDIR /workspace/core/controllers/sandbox
-RUN CGO_ENABLED=0 GOOS=linux go build \
-    -ldflags="-s -w" \
-    -o /sandbox-controller ./cmd/sandbox-controller
-
-# Stage 3: minimal runtime.
-FROM docker.io/library/alpine:3.20
-RUN apk add --no-cache ca-certificates tzdata
-
-COPY --from=build /sandbox-controller /sandbox-controller
-
-# Alpine 3.20 already ships UID 65534 as `nobody`. The numeric form
-# satisfies runAsNonRoot=true + runAsUser=65534 in the chart's
-# Deployment.
-USER 65534:65534
-
-EXPOSE 8080 8081
-
-ENTRYPOINT ["/sandbox-controller"]
--- a/core/controllers/sandbox/cmd/sandbox-controller/main.go
+++ b/core/controllers/sandbox/cmd/sandbox-controller/main.go
@ -1,294 +0,0 @@
-// sandbox-controller — Wave 1 + Wave 8 + Wave 9 of the Sandbox product
-// (products/sandbox/docs/architecture.md §7).
-//
-// Production entry point. Reads configuration from environment vars,
-// constructs the controller-runtime manager, and starts the Sandbox
-// reconciler with leader election.
-package main
-
-import (
-	"context"
-	"flag"
-	"fmt"
-	"os"
-	"strconv"
-	"strings"
-
-	"k8s.io/apimachinery/pkg/runtime"
-	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
-	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
-	ctrl "sigs.k8s.io/controller-runtime"
-	"sigs.k8s.io/controller-runtime/pkg/healthz"
-	"sigs.k8s.io/controller-runtime/pkg/log/zap"
-	"sigs.k8s.io/controller-runtime/pkg/manager"
-	metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
-
-	"github.com/openova-io/openova/core/controllers/pkg/gitea"
-	"github.com/openova-io/openova/core/controllers/pkg/natsbus"
-	"github.com/openova-io/openova/core/controllers/sandbox/internal/controller"
-	"github.com/openova-io/openova/core/controllers/sandbox/internal/idlescaler"
-	"github.com/openova-io/openova/core/controllers/sandbox/internal/newapi"
-	sandboxapi "github.com/openova-io/openova/core/controllers/sandbox/internal/sandboxapi"
-)
-
-var scheme = runtime.NewScheme()
-
-func init() {
-	utilruntime.Must(clientgoscheme.AddToScheme(scheme))
-	utilruntime.Must(sandboxapi.AddToScheme(scheme))
-}
-
-func main() {
-	var (
-		metricsAddr          string
-		probeAddr            string
-		enableLeaderElection bool
-	)
-	flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
-	flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
-	flag.BoolVar(&enableLeaderElection, "leader-elect", true,
-		"Enable leader election for controller manager. Defaults to true so HA replicas don't double-write.")
-
-	opts := zap.Options{Development: false}
-	opts.BindFlags(flag.CommandLine)
-	flag.Parse()
-
-	ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
-	log := ctrl.Log.WithName("sandbox-controller")
-
-	giteaURL := mustEnv("CATALYST_GITEA_URL", log)
-	giteaToken := mustEnv("CATALYST_GITEA_TOKEN", log)
-	hostCluster := mustEnv("CATALYST_HOST_CLUSTER", log)
-	sovereignFQDN := mustEnv("CATALYST_SOVEREIGN_FQDN", log)
-
-	branch := envOr("CATALYST_GITEA_BRANCH", "main")
-	tenantRepo := envOr("CATALYST_TENANT_REPO_NAME", "catalyst-tenant")
-
-	// Wave 8 runtime env — per-Sandbox pty-server / MCP / NEWAPI for
-	// the rendered Pod manifests.
-	ptyServerImage := mustEnv("SANDBOX_PTY_SERVER_IMAGE", log)
-	mcpImage := mustEnv("SANDBOX_MCP_IMAGE", log)
-	sandboxNewapiURL := mustEnv("SANDBOX_NEWAPI_URL", log)
-	llmGatewayTokenSecret := envOr("SANDBOX_LLM_GATEWAY_TOKEN_SECRET", "sandbox-tokens")
-	byosSecretPrefix := envOr("SANDBOX_BYOS_SECRET_PREFIX", "sandbox-byos-claude-code")
-	idleTimeoutMinutes := envOrInt("SANDBOX_IDLE_TIMEOUT_MINUTES", 30)
-
-	// Wave 9 — NewAPI bridge wiring. Two env vars carry the bridge URL +
-	// admin bearer used by the controller to call POST
-	// /admin/tokens/sandbox (catalyst-api bridge handler, PR #1638).
-	// Both are REQUIRED in production — a sandbox-controller without
-	// the bridge wired silently ships Sandboxes without an LLM
-	// connection. Permit unset for compatibility with smoke tests
-	// that exercise only the gitops path (env both unset ⇒ controller
-	// runs without the token-mint path; log line announces it).
-	newapiBaseURL := strings.TrimSpace(os.Getenv("NEWAPI_BASE_URL"))
-	newapiAdmin := strings.TrimSpace(os.Getenv("NEWAPI_ADMIN_SECRET"))
-	defaultChannels := splitAndTrim(envOr("NEWAPI_DEFAULT_CHANNELS", ""), ",")
-
-	// D31 active-hot-standby — Sovereign-level toggle + region pair the
-	// controller threads into every per-Sandbox MCP Pod. The MCP
-	// server's sandbox.db.provision handler reads these at call time
-	// and, when valid, materialises a primary + replica Cluster.
-	// postgresql.cnpg.io pair instead of a single Cluster (DoD D31).
-	// Default-empty keeps every existing Sandbox on single-Cluster
-	// CNPG (zero regression). Bootstrap-kit slot 61 wires these from
-	// the per-Sovereign overlay's envsubst placeholders into the
-	// bp-sandbox HelmRelease values.
-	enableHotStandby := envOr("SOVEREIGN_ENABLE_HOT_STANDBY", "")
-	primaryRegion := envOr("SOVEREIGN_PRIMARY_REGION", "")
-	replicaRegion := envOr("SOVEREIGN_REPLICA_REGION", "")
-
-	mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
-		Scheme:                 scheme,
-		Metrics:                metricsserver.Options{BindAddress: metricsAddr},
-		HealthProbeBindAddress: probeAddr,
-		LeaderElection:         enableLeaderElection,
-		LeaderElectionID:       "sandbox-controller.sandbox.openova.io",
-	})
-	if err != nil {
-		log.Error(err, "manager init")
-		os.Exit(1)
-	}
-
-	if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
-		log.Error(err, "healthz")
-		os.Exit(1)
-	}
-	if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
-		log.Error(err, "readyz")
-		os.Exit(1)
-	}
-
-	var newapiClient newapi.Client
-	if newapiBaseURL != "" && newapiAdmin != "" {
-		c, err := newapi.New(newapiBaseURL, newapiAdmin, nil)
-		if err != nil {
-			log.Error(err, "newapi client init")
-			os.Exit(1)
-		}
-		newapiClient = c
-	} else {
-		log.Info("newapi bridge not wired — sandbox-controller running in gitops-only mode",
-			"newapi_base_url_set", newapiBaseURL != "",
-			"newapi_admin_secret_set", newapiAdmin != "",
-		)
-	}
-
-	r := &controller.Reconciler{
-		Client:                mgr.GetClient(),
-		Log:                   log.WithName("reconciler"),
-		GiteaClient:           gitea.New(giteaURL, giteaToken),
-		HostCluster:           hostCluster,
-		SovereignFQDN:         sovereignFQDN,
-		Branch:                branch,
-		TenantRepoName:        tenantRepo,
-		PtyServerImage:        ptyServerImage,
-		MCPImage:              mcpImage,
-		NewapiURL:             sandboxNewapiURL,
-		LLMGatewayTokenSecret: llmGatewayTokenSecret,
-		BYOSSecretPrefix:      byosSecretPrefix,
-		IdleTimeoutMinutes:    idleTimeoutMinutes,
-		NewAPIClient:          newapiClient,
-		DefaultChannels:       defaultChannels,
-		EnableHotStandby:      enableHotStandby,
-		PrimaryRegion:         primaryRegion,
-		ReplicaRegion:         replicaRegion,
-	}
-	if err := r.SetupWithManager(mgr); err != nil {
-		log.Error(err, "setup reconciler")
-		os.Exit(1)
-	}
-
-	// Wave 10 (PR #1641 follow-up) — IdleScaler reads the
-	// `openova.io/sandbox-idle-timeout-minutes` annotation the
-	// renderer writes on every pty-server StatefulSet, polls each
-	// pty-server Service for live activity, and scales replicas to 0
-	// once the idle window has elapsed. Leader-elected so HA
-	// controller replicas don't race.
-	scaler := idlescaler.New(mgr.GetClient(),
-		log.WithName("idle-scaler"),
-		idlescaler.Options{
-			DefaultIdleTimeoutMinutes: idleTimeoutMinutes,
-		})
-	if err := mgr.Add(scaler); err != nil {
-		log.Error(err, "add idle-scaler to manager")
-		os.Exit(1)
-	}
-
-	// D35 consume-leg — subscribe to `catalyst.tenant.sandbox_requested`
-	// so the publish from tenant-service nudges the matching Sandbox CR
-	// into a fresh Reconcile within ~50ms. Same wiring shape as the
-	// organization-controller's NATS bridge. Best-effort: NATS_URL
-	// unset → log + continue (informer requeue fallback intact).
-	natsURL := strings.TrimSpace(os.Getenv("NATS_URL"))
-	sandboxNs := envOr("SANDBOX_NAMESPACE", "catalyst-system")
-	if natsURL != "" {
-		if err := mgr.Add(manager.RunnableFunc(func(ctx context.Context) error {
-			sub, err := natsbus.Connect(natsURL)
-			if err != nil {
-				log.Error(err, "natsbus: connect failed — D35 consume-leg disabled",
-					"nats_url", natsURL)
-				return nil
-			}
-			bridge := &controller.NATSBridge{
-				Client:    mgr.GetClient(),
-				Log:       log.WithName("natsbridge"),
-				Namespace: sandboxNs,
-			}
-			if err := sub.Subscribe(ctx,
-				natsbus.SubjectTenantSandboxRequested,
-				"sandbox-controller-sandbox-requested",
-				bridge.HandleSandboxRequested,
-				natsbus.SubscribeOptions{},
-			); err != nil {
-				log.Error(err, "natsbus: subscribe tenant.sandbox_requested failed")
-			}
-			<-ctx.Done()
-			sub.Close()
-			return nil
-		})); err != nil {
-			log.Error(err, "natsbus: add runnable failed")
-			os.Exit(1)
-		}
-		log.Info("natsbus: D35 consume-leg wired",
-			"nats_url", natsURL,
-			"subjects", []string{natsbus.SubjectTenantSandboxRequested},
-			"sandbox_namespace", sandboxNs,
-		)
-	} else {
-		log.Info("natsbus: NATS_URL unset — D35 consume-leg disabled (informer-requeue fallback only)")
-	}
-
-	log.Info("starting manager",
-		"host_cluster", hostCluster,
-		"sovereign_fqdn", sovereignFQDN,
-		"gitea_url", giteaURL,
-		"tenant_repo", tenantRepo,
-		"pty_server_image", ptyServerImage,
-		"mcp_image", mcpImage,
-		"newapi_url", sandboxNewapiURL,
-		"llm_gateway_token_secret", llmGatewayTokenSecret,
-		"byos_secret_prefix", byosSecretPrefix,
-		"idle_timeout_minutes", idleTimeoutMinutes,
-		"newapi_wired", newapiClient != nil,
-		"default_channels", defaultChannels,
-	)
-	if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
-		log.Error(err, "manager start")
-		os.Exit(1)
-	}
-}
-
-func mustEnv(key string, log interface {
-	Error(err error, msg string, kvs ...any)
-},
-) string {
-	v := strings.TrimSpace(os.Getenv(key))
-	if v == "" {
-		log.Error(fmt.Errorf("missing env"), "required env var unset", "key", key)
-		os.Exit(2)
-	}
-	return v
-}
-
-func envOr(key, fallback string) string {
-	v := strings.TrimSpace(os.Getenv(key))
-	if v == "" {
-		return fallback
-	}
-	return v
-}
-
-// envOrInt parses an integer env var; non-integer / empty returns the
-// fallback. Used for SANDBOX_IDLE_TIMEOUT_MINUTES — operator drift
-// (mistyped value) shouldn't crash the controller.
-func envOrInt(key string, fallback int) int {
-	v := strings.TrimSpace(os.Getenv(key))
-	if v == "" {
-		return fallback
-	}
-	n, err := strconv.Atoi(v)
-	if err != nil || n <= 0 {
-		return fallback
-	}
-	return n
-}
-
-// splitAndTrim splits s on sep and returns the non-empty trimmed
-// pieces. "qwen,vllm , " → ["qwen","vllm"]. Empty s returns nil so
-// the caller's len()==0 check is unambiguous.
-func splitAndTrim(s, sep string) []string {
-	if strings.TrimSpace(s) == "" {
-		return nil
-	}
-	parts := strings.Split(s, sep)
-	out := make([]string, 0, len(parts))
-	for _, p := range parts {
-		p = strings.TrimSpace(p)
-		if p == "" {
-			continue
-		}
-		out = append(out, p)
-	}
-	return out
-}
--- a/core/controllers/sandbox/internal/controller/nats_bridge.go
+++ b/core/controllers/sandbox/internal/controller/nats_bridge.go
@ -1,223 +0,0 @@
-// nats_bridge wires the canonical Catalyst NATS subject
-// `catalyst.tenant.sandbox_requested` (D35 consume leg, sandbox-controller side)
-// into the sandbox-controller's reconcile loop.
-//
-// Why this lives in sandbox-controller, not just in tenant-service:
-// the tenant-service SandboxOrchestrator (PR #1633) already consumes
-// `catalyst.tenant.sandbox_requested` and creates the Sandbox CR. The
-// missing leg was an in-cluster controller that, after the CR
-// materialised, OBSERVES the same envelope on its broker side and
-// triggers a fresh Reconcile within ~50ms instead of waiting for the
-// 30s informer requeue. That tightens the cart-completion → CR-Ready
-// loop end-to-end and closes D35: NATS round-trips end-to-end with
-// the controllers as the consume-side leg.
-//
-// The bridge looks up the matching Sandbox CR by the same name
-// derivation tenant-service uses (sanitised owner email/UID inside the
-// sandbox namespace) and stamps two annotations:
-//
-//   - openova.io/last-event-observed-at: RFC3339 timestamp from the
-//     broker envelope. Stable across duplicate JetStream delivery so
-//     the annotation patch is byte-equal on replay.
-//   - openova.io/last-event-subject: the canonical subject string.
-//
-// Patching either annotation triggers an informer event →
-// controller-runtime enqueues the CR's NamespacedName → Reconcile
-// runs within ~50ms.
-//
-// The 30s RequeueAfter in r.Reconcile remains untouched — this bridge
-// is an accelerator, not the only path. NATS message loss never
-// strands a CR.
-//
-// Per HARD CONSTRAINT: no credential write-paths. The bridge reads
-// only the Event envelope + the matching CR; it never touches Secrets
-// or NewAPI bearer tokens.
-
-package controller
-
-import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"strings"
-	"time"
-
-	"github.com/go-logr/logr"
-	apierrors "k8s.io/apimachinery/pkg/api/errors"
-	"k8s.io/apimachinery/pkg/types"
-	"sigs.k8s.io/controller-runtime/pkg/client"
-
-	"github.com/openova-io/openova/core/controllers/pkg/natsbus"
-	sandboxapi "github.com/openova-io/openova/core/controllers/sandbox/internal/sandboxapi"
-)
-
-// Annotation keys stamped on the matching Sandbox CR when a canonical
-// NATS envelope is observed. Identical to the organization-controller
-// keys for operator-visible symmetry across the two Group-C
-// controllers — `kubectl get sandboxes,organizations -o jsonpath`
-// surfaces the same field across both kinds.
-const (
-	AnnotationLastNATSObservedAt = "openova.io/last-event-observed-at"
-	AnnotationLastNATSSubject    = "openova.io/last-event-subject"
-)
-
-// DefaultSandboxNamespace is the namespace tenant-service writes
-// Sandbox CRs into when SANDBOX_NAMESPACE is unset. Mirrors the
-// publish-side default in core/services/tenant/handlers/sandbox_consumer.go.
-const DefaultSandboxNamespace = "catalyst-system"
-
-// NATSBridge is the consume-leg adapter for the sandbox-controller.
-type NATSBridge struct {
-	Client client.Client
-	Log    logr.Logger
-
-	// Namespace is the sandbox-namespace tenant-service writes Sandbox
-	// CRs into. Defaults to DefaultSandboxNamespace when empty.
-	Namespace string
-}
-
-// HandleSandboxRequested reacts to a `catalyst.tenant.sandbox_requested`
-// envelope. The publish-side (tenant.handlers.CreateOrg in PR #1633)
-// stamps owner_email + owner_id + org_slug + agents on the Data
-// payload. We derive the deterministic Sandbox CR name using the same
-// rules tenant-service applies (sanitised email leaf, "sandbox-"
-// prefix, RFC1123-bounded) and patch the observation annotations.
-func (b *NATSBridge) HandleSandboxRequested(ctx context.Context, ev *natsbus.Event) error {
-	if ev == nil {
-		return nil
-	}
-	var payload struct {
-		TenantID   string `json:"tenant_id"`
-		OrgSlug    string `json:"org_slug"`
-		OwnerID    string `json:"owner_id"`
-		OwnerEmail string `json:"owner_email"`
-	}
-	if err := json.Unmarshal(ev.Data, &payload); err != nil {
-		b.Log.Error(err, "sandbox_requested: malformed Data payload — ack to skip",
-			"event_id", ev.ID)
-		return nil
-	}
-
-	name := sandboxCRNameFromEvent(payload.OwnerEmail, payload.OwnerID)
-	if name == "" {
-		b.Log.Error(fmt.Errorf("empty derived name"),
-			"sandbox_requested: payload has neither owner_email nor owner_id — ack to skip",
-			"event_id", ev.ID, "tenant_id", payload.TenantID)
-		return nil
-	}
-
-	ns := b.Namespace
-	if ns == "" {
-		ns = DefaultSandboxNamespace
-	}
-
-	var sb sandboxapi.Sandbox
-	if err := b.Client.Get(ctx, types.NamespacedName{Namespace: ns, Name: name}, &sb); err != nil {
-		if apierrors.IsNotFound(err) {
-			// Cold-start ordering: the broker delivered our copy of
-			// the envelope before the tenant-service orchestrator
-			// finished writing the CR. Soft-miss; tenant-service's
-			// Sandbox CR Create will fire an informer event of its
-			// own when it lands, so we don't need to retry.
-			b.Log.Info("nats observation: no matching Sandbox CR — ack and skip",
-				"subject", natsbus.SubjectTenantSandboxRequested,
-				"namespace", ns, "name", name, "event_id", ev.ID)
-			return nil
-		}
-		return fmt.Errorf("get sandbox %s/%s: %w", ns, name, err)
-	}
-
-	observedAt := ev.Timestamp.UTC().Format(time.RFC3339Nano)
-	if observedAt == "" || ev.Timestamp.IsZero() {
-		observedAt = time.Now().UTC().Format(time.RFC3339Nano)
-	}
-
-	// Byte-stable patch on duplicate JetStream delivery: skip when
-	// the annotations already match.
-	cur := sb.GetAnnotations()
-	if cur != nil &&
-		cur[AnnotationLastNATSObservedAt] == observedAt &&
-		cur[AnnotationLastNATSSubject] == natsbus.SubjectTenantSandboxRequested {
-		b.Log.V(1).Info("nats observation: duplicate envelope — skip patch",
-			"subject", natsbus.SubjectTenantSandboxRequested,
-			"namespace", ns, "name", name, "event_id", ev.ID)
-		return nil
-	}
-
-	desired := &sandboxapi.Sandbox{}
-	sb.DeepCopyInto(desired)
-	anns := desired.GetAnnotations()
-	if anns == nil {
-		anns = map[string]string{}
-	}
-	anns[AnnotationLastNATSObservedAt] = observedAt
-	anns[AnnotationLastNATSSubject] = natsbus.SubjectTenantSandboxRequested
-	desired.SetAnnotations(anns)
-
-	if err := b.Client.Patch(ctx, desired, client.MergeFrom(&sb)); err != nil {
-		return fmt.Errorf("patch sandbox %s/%s: %w", ns, name, err)
-	}
-	b.Log.Info("nats observation stamped — reconcile enqueued",
-		"subject", natsbus.SubjectTenantSandboxRequested,
-		"namespace", ns, "name", name,
-		"event_id", ev.ID, "observed_at", observedAt)
-	return nil
-}
-
-// sandboxCRNameFromEvent mirrors core/services/tenant/handlers/sandbox_consumer.go
-// `sandboxCRName(email, ownerID)`. The two functions MUST stay in
-// sync — tenant-service writes the CR under this name, and
-// sandbox-controller's NATSBridge looks it up by the same name.
-//
-// Rules (verbatim from the publish-side):
-//
-//  1. Prefer the email; fall back to ownerID when email is empty.
-//  2. Sanitise to a DNS-1123 leaf via sanitizeSandboxLeaf.
-//  3. Empty post-sanitise → literal "user" so the consumer never
-//     returns an empty-name lookup.
-//  4. Final name = "sandbox-" + leaf, truncated to 63 chars and
-//     trailing-hyphen-stripped.
-func sandboxCRNameFromEvent(email, ownerID string) string {
-	candidate := strings.TrimSpace(email)
-	if candidate == "" {
-		candidate = strings.TrimSpace(ownerID)
-	}
-	leaf := sanitizeSandboxLeaf(candidate)
-	if leaf == "" {
-		leaf = "user"
-	}
-	name := "sandbox-" + leaf
-	if len(name) > 63 {
-		name = name[:63]
-	}
-	name = strings.TrimRight(name, "-")
-	return name
-}
-
-// sanitizeSandboxLeaf mirrors core/services/tenant/handlers/sandbox_consumer.go
-// `sanitizeSandboxLeaf`. Lowercases, replaces @ + . + + + _ with -,
-// strips everything outside [a-z0-9-], collapses double-hyphens, and
-// trims leading/trailing hyphens.
-func sanitizeSandboxLeaf(in string) string {
-	out := strings.ToLower(in)
-	out = strings.ReplaceAll(out, "@", "-at-")
-	out = strings.ReplaceAll(out, ".", "-")
-	out = strings.ReplaceAll(out, "+", "-plus-")
-	out = strings.ReplaceAll(out, "_", "-")
-	var b strings.Builder
-	b.Grow(len(out))
-	for _, r := range out {
-		switch {
-		case r >= 'a' && r <= 'z', r >= '0' && r <= '9', r == '-':
-			b.WriteRune(r)
-		default:
-			b.WriteRune('-')
-		}
-	}
-	out = b.String()
-	for strings.Contains(out, "--") {
-		out = strings.ReplaceAll(out, "--", "-")
-	}
-	out = strings.Trim(out, "-")
-	return out
-}
--- a/core/controllers/sandbox/internal/controller/nats_bridge_test.go
+++ b/core/controllers/sandbox/internal/controller/nats_bridge_test.go
@ -1,193 +0,0 @@
-// Unit tests for the sandbox-controller NATS consume-leg bridge (D35).
-//
-// Mirrors organization/internal/controller/nats_bridge_test.go for
-// `catalyst.tenant.sandbox_requested`. The bridge surface is the same
-// signature the live JetStream subscriber drives, so these tests
-// exercise the same code path the runtime uses.
-
-package controller
-
-import (
-	"context"
-	"encoding/json"
-	"testing"
-	"time"
-
-	"github.com/go-logr/logr/testr"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/apimachinery/pkg/runtime"
-	"k8s.io/apimachinery/pkg/types"
-	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
-	"sigs.k8s.io/controller-runtime/pkg/client/fake"
-
-	"github.com/openova-io/openova/core/controllers/pkg/natsbus"
-	sandboxapi "github.com/openova-io/openova/core/controllers/sandbox/internal/sandboxapi"
-)
-
-func newSandboxBridgeFixture(t *testing.T, ns string, objs ...runtime.Object) *NATSBridge {
-	t.Helper()
-	scheme := runtime.NewScheme()
-	if err := clientgoscheme.AddToScheme(scheme); err != nil {
-		t.Fatalf("clientgo addtoscheme: %v", err)
-	}
-	if err := sandboxapi.AddToScheme(scheme); err != nil {
-		t.Fatalf("sandboxapi addtoscheme: %v", err)
-	}
-	cb := fake.NewClientBuilder().WithScheme(scheme)
-	if len(objs) > 0 {
-		cb = cb.WithRuntimeObjects(objs...)
-	}
-	return &NATSBridge{
-		Client:    cb.Build(),
-		Log:       testr.New(t),
-		Namespace: ns,
-	}
-}
-
-// TestNATSBridge_SandboxRequested_HappyPath pins the D35 sandbox round-trip:
-// an envelope with owner_email matching a real Sandbox CR results in
-// both observation annotations being patched.
-func TestNATSBridge_SandboxRequested_HappyPath(t *testing.T) {
-	const ns = "catalyst-system"
-	// tenant-service derives sandbox name as "sandbox-" + sanitised
-	// email → ceo@acme.com → "sandbox-ceo-at-acme-com".
-	sb := &sandboxapi.Sandbox{
-		ObjectMeta: metav1.ObjectMeta{
-			Namespace: ns,
-			Name:      "sandbox-ceo-at-acme-com",
-		},
-	}
-	bridge := newSandboxBridgeFixture(t, ns, sb)
-
-	ts := time.Date(2026, 5, 18, 15, 0, 0, 123456789, time.UTC)
-	body, _ := json.Marshal(map[string]any{
-		"tenant_id":   "tnt-9",
-		"org_slug":    "acme",
-		"owner_id":    "u-1",
-		"owner_email": "ceo@acme.com",
-	})
-	ev := &natsbus.Event{
-		ID:        "evt-sb-1",
-		Type:      "tenant.sandbox_requested",
-		Source:    "tenant-service",
-		Timestamp: ts,
-		TenantID:  "tnt-9",
-		Data:      body,
-	}
-	if err := bridge.HandleSandboxRequested(context.Background(), ev); err != nil {
-		t.Fatalf("HandleSandboxRequested: %v", err)
-	}
-
-	var got sandboxapi.Sandbox
-	if err := bridge.Client.Get(context.Background(),
-		types.NamespacedName{Namespace: ns, Name: "sandbox-ceo-at-acme-com"}, &got); err != nil {
-		t.Fatalf("get sandbox: %v", err)
-	}
-	anns := got.GetAnnotations()
-	if anns[AnnotationLastNATSSubject] != natsbus.SubjectTenantSandboxRequested {
-		t.Errorf("subject annotation: got %q want %q",
-			anns[AnnotationLastNATSSubject], natsbus.SubjectTenantSandboxRequested)
-	}
-	wantObservedAt := ts.Format(time.RFC3339Nano)
-	if anns[AnnotationLastNATSObservedAt] != wantObservedAt {
-		t.Errorf("observed-at annotation: got %q want %q",
-			anns[AnnotationLastNATSObservedAt], wantObservedAt)
-	}
-}
-
-// TestNATSBridge_SandboxRequested_OwnerIDFallback pins: when owner_email
-// is absent, the bridge falls back to owner_id for the CR name
-// derivation. Mirrors tenant-service's sandboxCRName fallback rule
-// (PR #1633) — both sides must stay in lockstep.
-func TestNATSBridge_SandboxRequested_OwnerIDFallback(t *testing.T) {
-	const ns = "catalyst-system"
-	sb := &sandboxapi.Sandbox{
-		ObjectMeta: metav1.ObjectMeta{
-			Namespace: ns,
-			Name:      "sandbox-u-1",
-		},
-	}
-	bridge := newSandboxBridgeFixture(t, ns, sb)
-
-	body, _ := json.Marshal(map[string]any{
-		"tenant_id": "tnt-no-email",
-		"owner_id":  "u-1",
-	})
-	ev := &natsbus.Event{
-		ID:        "evt-sb-no-email",
-		Type:      "tenant.sandbox_requested",
-		Timestamp: time.Now().UTC(),
-		Data:      body,
-	}
-	if err := bridge.HandleSandboxRequested(context.Background(), ev); err != nil {
-		t.Fatalf("HandleSandboxRequested: %v", err)
-	}
-
-	var got sandboxapi.Sandbox
-	if err := bridge.Client.Get(context.Background(),
-		types.NamespacedName{Namespace: ns, Name: "sandbox-u-1"}, &got); err != nil {
-		t.Fatalf("get sandbox: %v", err)
-	}
-	if _, ok := got.GetAnnotations()[AnnotationLastNATSSubject]; !ok {
-		t.Error("owner_id fallback failed — CR not annotated")
-	}
-}
-
-// TestNATSBridge_SandboxRequested_NoMatchingCR pins: cold-start ordering
-// (broker delivered before tenant-service finished creating the CR)
-// is a soft miss — return nil so the dispatcher Acks, do not Nak.
-func TestNATSBridge_SandboxRequested_NoMatchingCR(t *testing.T) {
-	bridge := newSandboxBridgeFixture(t, "catalyst-system")
-	body, _ := json.Marshal(map[string]any{
-		"owner_email": "ghost@nowhere.io",
-	})
-	ev := &natsbus.Event{
-		ID:        "evt-miss-sb",
-		Type:      "tenant.sandbox_requested",
-		Timestamp: time.Now().UTC(),
-		Data:      body,
-	}
-	if err := bridge.HandleSandboxRequested(context.Background(), ev); err != nil {
-		t.Fatalf("HandleSandboxRequested on missing CR returned error (should soft-miss): %v", err)
-	}
-}
-
-// TestNATSBridge_SandboxRequested_MalformedData pins poison-pill
-// behaviour: malformed JSON returns nil so the dispatcher Acks-to-skip.
-func TestNATSBridge_SandboxRequested_MalformedData(t *testing.T) {
-	bridge := newSandboxBridgeFixture(t, "catalyst-system")
-	ev := &natsbus.Event{
-		ID:        "evt-bad-sb",
-		Type:      "tenant.sandbox_requested",
-		Timestamp: time.Now().UTC(),
-		Data:      []byte("not-json{"),
-	}
-	if err := bridge.HandleSandboxRequested(context.Background(), ev); err != nil {
-		t.Errorf("malformed Data should not Nak, got: %v", err)
-	}
-}
-
-// TestSandboxCRName_MatchesTenantServiceConvention pins the name
-// derivation rules verbatim against the publish-side convention. If
-// tenant-service changes its naming rule, this test goes red so the
-// bridge stays in lockstep.
-func TestSandboxCRName_MatchesTenantServiceConvention(t *testing.T) {
-	cases := []struct {
-		email, ownerID, want string
-	}{
-		{"ceo@acme.com", "u-1", "sandbox-ceo-at-acme-com"},
-		{"", "u-99", "sandbox-u-99"},
-		{"Mixed.Case+User@Globex.io", "", "sandbox-mixed-case-plus-user-at-globex-io"},
-		{"", "", "sandbox-user"},
-		{"a@b", "", "sandbox-a-at-b"},
-	}
-	for _, c := range cases {
-		t.Run(c.email+"|"+c.ownerID, func(t *testing.T) {
-			got := sandboxCRNameFromEvent(c.email, c.ownerID)
-			if got != c.want {
-				t.Errorf("sandboxCRNameFromEvent(%q,%q) = %q, want %q",
-					c.email, c.ownerID, got, c.want)
-			}
-		})
-	}
-}
--- a/core/controllers/sandbox/internal/controller/sandbox_controller.go
+++ b/core/controllers/sandbox/internal/controller/sandbox_controller.go
@ -1,437 +0,0 @@
-// Package controller hosts the Sandbox reconciler — the Wave 1 + Wave 8
-// slice of the Sandbox product (#1615 brief + products/sandbox/docs/
-// architecture.md §7).
-//
-// Per architecture.md §7 the sandbox-controller is the sister of
-// organization-controller. It reconciles a Sandbox CR into manifests
-// the per-Org Flux Kustomization (host cluster) materializes inside
-// the Org vcluster. Wave 8 adds the pty-server StatefulSet + MCP
-// Deployment + Service + HTTPRoute (in addition to the Wave-1
-// namespace + RBAC + PVCs + placeholder Secret).
-//
-// Idempotency: every "ensure" step is find-or-create + byte-equal
-// short-circuit. Re-reconciling on a steady-state CR writes nothing
-// downstream.
-
-package controller
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"strings"
-	"time"
-
-	"github.com/go-logr/logr"
-	apierrors "k8s.io/apimachinery/pkg/api/errors"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	ctrl "sigs.k8s.io/controller-runtime"
-	"sigs.k8s.io/controller-runtime/pkg/client"
-
-	"github.com/openova-io/openova/core/controllers/pkg/gitea"
-	"github.com/openova-io/openova/core/controllers/sandbox/internal/gitops"
-	"github.com/openova-io/openova/core/controllers/sandbox/internal/newapi"
-	sandboxapi "github.com/openova-io/openova/core/controllers/sandbox/internal/sandboxapi"
-)
-
-// Annotation keys the reconciler stamps onto the Sandbox CR to carry
-// the per-Sandbox NewAPI token lifecycle. The token VALUE itself
-// never lands on the CR — only its expiry + last-rotation instant.
-// The rendered Secret in the per-Org Gitea repo carries the bytes.
-const (
-	annotationTokenExpiresAt = "openova.io/sandbox-token-expires-at"
-	annotationTokenRotatedAt = "openova.io/sandbox-token-rotated-at"
-)
-
-// DefaultTokenRotationLeadTime is how far in advance the reconciler
-// re-mints the per-Sandbox NewAPI token before its expiry. The
-// bridge handler currently issues 7-day tokens (SandboxTokenTTL in
-// platform/newapi/internal/handler/sandbox_token.go) — picking a 1-
-// day lead means a steady-state reconcile re-mints once per day,
-// keeping the rendered Secret byte-stable between reconciles in the
-// 6-day fresh-token window.
-//
-// The Wave 9 brief calls for "15 days before expiry" — that target
-// applies once the bridge TTL is bumped to 30+ days. Until then 24h
-// is the operationally-sane default; per-Sovereign overlays can
-// override via Reconciler.TokenRotationLeadTime (e.g. set to 15d
-// when the bridge's TTL is bumped).
-const DefaultTokenRotationLeadTime = 24 * time.Hour
-
-// Reconciler reconciles Sandbox CRs.
-type Reconciler struct {
-	client.Client
-	Log logr.Logger
-
-	GiteaClient   *gitea.Client
-	HostCluster   string
-	SovereignFQDN string
-	Branch        string
-	TenantRepoName string
-
-	// Wave 8 per-Sandbox runtime knobs (plumbed from chart env).
-	PtyServerImage        string
-	MCPImage              string
-	NewapiURL             string
-	LLMGatewayTokenSecret string
-	BYOSSecretPrefix      string
-	IdleTimeoutMinutes    int
-
-	// D31 active-hot-standby — Sovereign-level toggle + region pair the
-	// controller threads from its chart env (SOVEREIGN_ENABLE_HOT_STANDBY,
-	// SOVEREIGN_PRIMARY_REGION, SOVEREIGN_REPLICA_REGION) into every
-	// per-Sandbox MCP Pod via gitops.Inputs. The MCP server's
-	// sandbox.db.provision handler reads them at call time and renders a
-	// primary + replica Cluster.postgresql.cnpg.io pair when valid.
-	// Default-empty keeps every existing Sandbox on single-Cluster CNPG
-	// (zero regression). Bootstrap-kit slot 61 wires the per-Sovereign
-	// overlay's envsubst placeholders into the bp-sandbox HelmRelease
-	// values; the chart surfaces them as the controller's env.
-	EnableHotStandby string
-	PrimaryRegion    string
-	ReplicaRegion    string
-
-	// Wave 9 — NewAPI bridge client used by Reconcile to mint
-	// per-Sandbox LLM-gateway tokens (POST /admin/tokens/sandbox,
-	// PR #1638). When nil the reconciler renders the Wave 1+8
-	// manifests but skips the token-mint path — the controller is
-	// operable on a Sovereign whose bridge handler is not yet rolled
-	// out (e.g. fresh prov mid-handover) without silently shipping a
-	// Sandbox without an LLM connection. main.go logs a warning in
-	// that case.
-	NewAPIClient newapi.Client
-
-	// DefaultChannels is the operator-configured list of NewAPI
-	// channel names every freshly-minted Sandbox token is allowed to
-	// call. Currently a single channel per Sovereign ("qwen" today,
-	// see products/sandbox/docs/newapi-proxy-contract.md §2); future
-	// per-tier work will allow per-Sandbox overrides via spec.
-	DefaultChannels []string
-
-	// TokenRotationLeadTime overrides DefaultTokenRotationLeadTime. The
-	// controller re-mints when the previously-issued token's expiry is
-	// within this window of now. Zero ⇒ DefaultTokenRotationLeadTime.
-	TokenRotationLeadTime time.Duration
-
-	// Now is the wall-clock source. Defaults to time.Now when nil;
-	// injected by tests for deterministic rotation behaviour.
-	Now func() time.Time
-}
-
-func (r *Reconciler) SetupWithManager(mgr ctrl.Manager) error {
-	return ctrl.NewControllerManagedBy(mgr).
-		For(&sandboxapi.Sandbox{}).
-		Complete(r)
-}
-
-func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
-	log := r.Log.WithValues("sandbox", req.NamespacedName.String())
-	log.Info("reconcile")
-
-	var sb sandboxapi.Sandbox
-	if err := r.Get(ctx, req.NamespacedName, &sb); err != nil {
-		if apierrors.IsNotFound(err) {
-			return ctrl.Result{}, nil
-		}
-		return ctrl.Result{}, fmt.Errorf("get sandbox: %w", err)
-	}
-
-	if strings.TrimSpace(sb.Spec.Owner.OrgRef.Slug) == "" {
-		return r.fail(ctx, &sb, "OwnerOrgRefMissing",
-			"spec.owner.orgRef.slug must be non-empty (the parent Organization slug)")
-	}
-	if strings.TrimSpace(sb.Spec.Owner.Email) == "" {
-		return r.fail(ctx, &sb, "OwnerEmailMissing",
-			"spec.owner.email must be non-empty")
-	}
-
-	ownerUID := sanitizeEmail(sb.Spec.Owner.Email)
-	if ownerUID == "" {
-		return r.fail(ctx, &sb, "OwnerEmailInvalid",
-			fmt.Sprintf("spec.owner.email %q did not yield a DNS-safe owner UID", sb.Spec.Owner.Email))
-	}
-
-	// ── Per-Sandbox NewAPI bearer ──────────────────────────────────────
-	// When wired (r.NewAPIClient non-nil), the controller drives the
-	// full token lifecycle:
-	//
-	//   - No prior token (annotation absent) → mint fresh.
-	//   - Token within tokenRotationLeadTime of expiry → re-mint, bump
-	//     the `kubectl.kubernetes.io/restartedAt` annotation on the
-	//     rendered Secret so Wave 8's pty-server StatefulSet picks up
-	//     a rolling restart.
-	//   - Steady state (token healthy) → leave the previously-rendered
-	//     Secret manifest in Gitea untouched (PutFile's byte-equal
-	//     guard short-circuits).
-	//
-	// When the bridge call fails the reconciler records a Failed
-	// condition (TokenMintFailed) and requeues 30s — namespace/RBAC/PVC
-	// manifests are NOT rendered until the bridge is reachable, so a
-	// Sandbox without an LLM gateway never lands in steady state.
-	now := time.Now
-	if r.Now != nil {
-		now = r.Now
-	}
-	leadTime := r.TokenRotationLeadTime
-	if leadTime <= 0 {
-		leadTime = DefaultTokenRotationLeadTime
-	}
-
-	var (
-		tokenValue     string
-		tokenExpiresAt string
-		tokenRotatedAt string
-	)
-	if r.NewAPIClient != nil {
-		nowT := now()
-		mustMint, prevExpiry := r.shouldMintToken(&sb, nowT, leadTime)
-		if mustMint {
-			channels := r.channelsForSandbox(&sb)
-			if len(channels) == 0 {
-				return r.fail(ctx, &sb, "NoAllowedChannels",
-					"sandbox-controller has no DefaultChannels configured AND spec exposes none — refusing to mint a token with empty allowed_channels")
-			}
-			sandboxID := string(sb.UID)
-			if strings.TrimSpace(sandboxID) == "" {
-				// Fresh CR without a UID stamped (only happens in
-				// pathological hand-rolled fixtures). Fall back to the
-				// stable namespace/name pair.
-				sandboxID = fmt.Sprintf("%s/%s", sb.Namespace, sb.Name)
-			}
-			// Tier-bound MCP capabilities (PR #1671) — derived from
-			// spec.capabilities (operator override) or spec.planId via
-			// sandboxapi.ResolveCapabilities. Empty list is permitted by
-			// the bridge handler and produces an introspection-only
-			// token; the controller never short-circuits on a missing
-			// capability list because the operator can grant on-demand
-			// by patching spec.capabilities.
-			caps := sandboxapi.ResolveCapabilities(&sb.Spec)
-			mint, mintErr := r.NewAPIClient.MintSandboxToken(ctx, newapi.MintRequest{
-				OrgID:           sb.Spec.Owner.OrgRef.Slug,
-				UserID:          sb.Spec.Owner.Email,
-				SandboxID:       sandboxID,
-				AllowedChannels: channels,
-				Capabilities:    caps,
-			})
-			if mintErr != nil {
-				r.Log.Error(mintErr, "newapi mint failed",
-					"sandbox", sb.Namespace+"/"+sb.Name,
-					"prev_expiry", prevExpiry.Format(time.RFC3339))
-				return r.fail(ctx, &sb, "TokenMintFailed", mintErr.Error())
-			}
-			tokenValue = mint.Token
-			tokenExpiresAt = mint.ExpiresAt.UTC().Format(time.RFC3339)
-			tokenRotatedAt = nowT.UTC().Format(time.RFC3339)
-
-			// Persist the rotation marker on the CR BEFORE the Gitea
-			// write so a crash between this point and the PutFile pass
-			// surfaces on the next reconcile as "prev_expiry already
-			// past, re-mint" rather than "token rendered but CR has no
-			// expiry annotation, mint again". Both paths converge but
-			// stamping first keeps the operator-visible state honest.
-			if err := r.stampTokenAnnotations(ctx, &sb, tokenExpiresAt, tokenRotatedAt); err != nil {
-				return ctrl.Result{}, fmt.Errorf("stamp annotations: %w", err)
-			}
-		} else {
-			tokenExpiresAt = prevExpiry.UTC().Format(time.RFC3339)
-			// tokenRotatedAt left empty — renderer drops the
-			// kubectl.kubernetes.io/restartedAt annotation only when
-			// non-empty, so steady-state reconciles never bump it.
-		}
-	}
-
-	in := gitops.Inputs{
-		Name:                  sb.Name,
-		OwnerUID:              ownerUID,
-		OwnerEmail:            sb.Spec.Owner.Email,
-		OrgSlug:               sb.Spec.Owner.OrgRef.Slug,
-		SovereignFQDN:         r.SovereignFQDN,
-		Quota:                 sb.Spec.Quota,
-		Repos:                 sb.Spec.Repos,
-		PreviewDomain:         sb.Spec.PreviewDomain,
-		AgentCatalogue:        sb.Spec.AgentCatalogue,
-		PtyServerImage:        r.PtyServerImage,
-		MCPImage:              r.MCPImage,
-		NewapiURL:             r.NewapiURL,
-		LLMGatewayTokenSecret: r.LLMGatewayTokenSecret,
-		BYOSSecretPrefix:      r.BYOSSecretPrefix,
-		IdleTimeoutMinutes:    r.IdleTimeoutMinutes,
-		IdleScalingDisabled:   sb.Spec.IdleScaling != nil && !sb.Spec.IdleScaling.Enabled,
-		NewAPIToken:           tokenValue,
-		NewAPITokenSecretName: fmt.Sprintf("sandbox-%s-newapi-token", ownerUID),
-		NewAPITokenExpiresAt:  tokenExpiresAt,
-		NewAPITokenRotatedAt:  tokenRotatedAt,
-		EnableHotStandby:      r.EnableHotStandby,
-		PrimaryRegion:         r.PrimaryRegion,
-		ReplicaRegion:         r.ReplicaRegion,
-	}
-	manifests, err := gitops.Render(in)
-	if err != nil {
-		return r.fail(ctx, &sb, "ManifestRenderFailed", err.Error())
-	}
-
-	branch := r.Branch
-	if branch == "" {
-		branch = "main"
-	}
-	repo := r.TenantRepoName
-	if repo == "" {
-		repo = "catalyst-tenant"
-	}
-
-	prefix := fmt.Sprintf("sandbox/%s", ownerUID)
-	for path, data := range manifests {
-		fullPath := fmt.Sprintf("%s/%s", prefix, path)
-		if _, _, err := r.GiteaClient.PutFile(ctx,
-			sb.Spec.Owner.OrgRef.Slug, repo, branch, fullPath, data,
-			fmt.Sprintf("sandbox-controller: reconcile %s for sandbox %s/%s",
-				fullPath, sb.Namespace, sb.Name)); err != nil {
-			return r.fail(ctx, &sb, "GitopsWriteFailed",
-				fmt.Sprintf("write %s: %s", fullPath, err))
-		}
-	}
-
-	desired := sandboxapi.SandboxStatus{
-		Phase:      "Provisioning",
-		GitopsPath: prefix,
-		Conditions: []sandboxapi.SandboxCondition{
-			{
-				Type:               "Ready",
-				Status:             "True",
-				Reason:             "GitopsReconciled",
-				Message:            fmt.Sprintf("Wave 1+8 manifests reconciled to gitea %s/%s@%s:%s", sb.Spec.Owner.OrgRef.Slug, repo, branch, prefix),
-				LastTransitionTime: metav1.NewTime(time.Now()),
-			},
-		},
-		ObservedGeneration: sb.Generation,
-	}
-	if err := r.patchStatus(ctx, &sb, desired); err != nil {
-		return ctrl.Result{}, fmt.Errorf("patch status: %w", err)
-	}
-
-	log.Info("reconcile ok",
-		"org", sb.Spec.Owner.OrgRef.Slug,
-		"owner_uid", ownerUID,
-		"gitops_path", prefix,
-		"files", len(manifests),
-	)
-	return ctrl.Result{}, nil
-}
-
-func (r *Reconciler) fail(ctx context.Context, sb *sandboxapi.Sandbox, reason, message string) (ctrl.Result, error) {
-	r.Log.Error(errors.New(reason), message,
-		"sandbox", sb.Namespace+"/"+sb.Name,
-		"owner", sb.Spec.Owner.Email)
-	st := sandboxapi.SandboxStatus{
-		Phase: "Failed",
-		Conditions: []sandboxapi.SandboxCondition{
-			{
-				Type:               "Ready",
-				Status:             "False",
-				Reason:             reason,
-				Message:            message,
-				LastTransitionTime: metav1.NewTime(time.Now()),
-			},
-		},
-		ObservedGeneration: sb.Generation,
-	}
-	_ = r.patchStatus(ctx, sb, st)
-	switch reason {
-	case "OwnerOrgRefMissing", "OwnerEmailMissing", "OwnerEmailInvalid":
-		return ctrl.Result{}, nil
-	}
-	return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
-}
-
-func (r *Reconciler) patchStatus(ctx context.Context, sb *sandboxapi.Sandbox, desired sandboxapi.SandboxStatus) error {
-	updated := sb.DeepCopyObject().(*sandboxapi.Sandbox)
-	updated.Status = desired
-	return r.Status().Update(ctx, updated)
-}
-
-// shouldMintToken inspects the CR's annotations and decides whether
-// the reconciler should call the NewAPI bridge handler this pass.
-// Returns (true, zeroExpiry) on first issuance or unparseable
-// annotation; (true, prevExpiry) when the previously-issued token is
-// within leadTime of expiry; (false, prevExpiry) when the token is
-// healthy.
-func (r *Reconciler) shouldMintToken(sb *sandboxapi.Sandbox, nowT time.Time, leadTime time.Duration) (bool, time.Time) {
-	raw := strings.TrimSpace(sb.GetAnnotations()[annotationTokenExpiresAt])
-	if raw == "" {
-		return true, time.Time{}
-	}
-	prev, err := time.Parse(time.RFC3339, raw)
-	if err != nil {
-		// Corrupt annotation — re-mint and overwrite. Operator-debug
-		// path is the log line in the mint branch above.
-		return true, time.Time{}
-	}
-	// Re-mint when expiry is within leadTime of now (covers the
-	// already-expired case too: nowT.Add(leadTime).After(prev) is
-	// trivially true when prev < nowT).
-	if !prev.After(nowT.Add(leadTime)) {
-		return true, prev
-	}
-	return false, prev
-}
-
-// channelsForSandbox derives the AllowedChannels list for a freshly
-// minted token. Wave 9: the operator-supplied DefaultChannels are
-// the source of truth. Future waves (per architecture.md §3) will
-// add a spec.allowedChannels overlay for per-Sandbox restriction.
-func (r *Reconciler) channelsForSandbox(_ *sandboxapi.Sandbox) []string {
-	if len(r.DefaultChannels) == 0 {
-		return nil
-	}
-	out := make([]string, 0, len(r.DefaultChannels))
-	for _, c := range r.DefaultChannels {
-		c = strings.TrimSpace(c)
-		if c == "" {
-			continue
-		}
-		out = append(out, c)
-	}
-	return out
-}
-
-// stampTokenAnnotations patches the Sandbox CR with the new expiry +
-// rotation timestamps. Uses a deep-copy + Update against the cached
-// client so the patch is one round-trip; the controller-runtime
-// cache reflects the change on the next reconcile.
-//
-// IMPORTANT: an Update() bumps the metadata.resourceVersion. The
-// subsequent status update (patchStatus) operates on the same local
-// `sb` value; we sync the bumped ResourceVersion back onto sb so the
-// status-subresource patch does not 409 on stale-version.
-func (r *Reconciler) stampTokenAnnotations(ctx context.Context, sb *sandboxapi.Sandbox, expiresAt, rotatedAt string) error {
-	updated := sb.DeepCopyObject().(*sandboxapi.Sandbox)
-	if updated.Annotations == nil {
-		updated.Annotations = map[string]string{}
-	}
-	updated.Annotations[annotationTokenExpiresAt] = expiresAt
-	updated.Annotations[annotationTokenRotatedAt] = rotatedAt
-	if err := r.Update(ctx, updated); err != nil {
-		return err
-	}
-	// Reflect changes back onto the local copy so the rest of this
-	// reconcile reads consistent annotations + the post-Update
-	// resourceVersion (required by the cached client's optimistic-
-	// concurrency check on the next .Status().Update call).
-	sb.Annotations = updated.Annotations
-	sb.ResourceVersion = updated.ResourceVersion
-	return nil
-}
-
-// sanitizeEmail converts an email into a DNS-label-safe leaf.
-func sanitizeEmail(email string) string {
-	out := strings.ToLower(strings.TrimSpace(email))
-	out = strings.ReplaceAll(out, "@", "-at-")
-	out = strings.ReplaceAll(out, ".", "-")
-	out = strings.ReplaceAll(out, "+", "-plus-")
-	out = strings.ReplaceAll(out, "_", "-")
-	if len(out) > 200 {
-		out = out[:200]
-	}
-	out = strings.Trim(out, "-")
-	return out
-}
--- a/core/controllers/sandbox/internal/controller/sandbox_controller_test.go
+++ b/core/controllers/sandbox/internal/controller/sandbox_controller_test.go
@ -1,883 +0,0 @@
-// sandbox_controller_test.go — Wave 1 + Wave 8 happy-path + drift +
-// idempotency coverage for the sandbox reconciler.
-
-package controller
-
-import (
-	"context"
-	"encoding/base64"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"net/http"
-	"net/http/httptest"
-	"strings"
-	"sync"
-	"testing"
-	"time"
-
-	"github.com/go-logr/logr"
-	"k8s.io/apimachinery/pkg/runtime"
-	"k8s.io/apimachinery/pkg/types"
-	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
-	ctrl "sigs.k8s.io/controller-runtime"
-	"sigs.k8s.io/controller-runtime/pkg/client"
-	"sigs.k8s.io/controller-runtime/pkg/client/fake"
-
-	"github.com/openova-io/openova/core/controllers/pkg/gitea"
-	"github.com/openova-io/openova/core/controllers/sandbox/internal/newapi"
-	sandboxapi "github.com/openova-io/openova/core/controllers/sandbox/internal/sandboxapi"
-
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-)
-
-// stubNewAPI is an in-process newapi.Client used by the reconciler
-// tests. Captures every MintRequest + replies with the configured
-// MintResponse / error.
-type stubNewAPI struct {
-	mu        sync.Mutex
-	calls     []newapi.MintRequest
-	resp      newapi.MintResponse
-	err       error
-	mintError func(newapi.MintRequest) (*newapi.MintResponse, error)
-}
-
-func (s *stubNewAPI) MintSandboxToken(_ context.Context, req newapi.MintRequest) (*newapi.MintResponse, error) {
-	s.mu.Lock()
-	defer s.mu.Unlock()
-	s.calls = append(s.calls, req)
-	if s.mintError != nil {
-		return s.mintError(req)
-	}
-	if s.err != nil {
-		return nil, s.err
-	}
-	r := s.resp
-	return &r, nil
-}
-
-func (s *stubNewAPI) callCount() int {
-	s.mu.Lock()
-	defer s.mu.Unlock()
-	return len(s.calls)
-}
-
-type giteaServer struct {
-	t *testing.T
-
-	mu sync.Mutex
-
-	files map[string]fileEntry
-
-	createFiles int
-	updateFiles int
-
-	server *httptest.Server
-}
-
-type fileEntry struct {
-	sha     string
-	content []byte
-}
-
-func newGiteaServer(t *testing.T) *giteaServer {
-	gs := &giteaServer{
-		t:     t,
-		files: map[string]fileEntry{},
-	}
-	gs.server = httptest.NewServer(http.HandlerFunc(gs.handle))
-	t.Cleanup(gs.server.Close)
-	return gs
-}
-
-func (g *giteaServer) URL() string { return g.server.URL }
-
-func (g *giteaServer) handle(w http.ResponseWriter, r *http.Request) {
-	g.mu.Lock()
-	defer g.mu.Unlock()
-
-	if r.Header.Get("Authorization") == "" {
-		http.Error(w, "no auth", http.StatusUnauthorized)
-		return
-	}
-	p := r.URL.Path
-
-	if strings.HasPrefix(p, "/api/v1/repos/") && strings.Contains(p, "/contents/") {
-		const prefix = "/api/v1/repos/"
-		rest := p[len(prefix):]
-		idx := strings.Index(rest, "/contents/")
-		if idx < 0 {
-			http.Error(w, "bad path", http.StatusBadRequest)
-			return
-		}
-		ownerRepo := rest[:idx]
-		filePath := rest[idx+len("/contents/"):]
-		key := ownerRepo + "/" + filePath
-
-		switch r.Method {
-		case http.MethodGet:
-			f, ok := g.files[key]
-			if !ok {
-				http.Error(w, "not found", http.StatusNotFound)
-				return
-			}
-			writeJSON(w, http.StatusOK, gitea.File{
-				Path:          filePath,
-				SHA:           f.sha,
-				Type:          "file",
-				ContentBase64: base64.StdEncoding.EncodeToString(f.content),
-			})
-			return
-		case http.MethodPost, http.MethodPut:
-			var body struct {
-				Message string `json:"message"`
-				Content string `json:"content"`
-				Branch  string `json:"branch"`
-				SHA     string `json:"sha"`
-			}
-			if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
-				http.Error(w, err.Error(), http.StatusBadRequest)
-				return
-			}
-			data, err := base64.StdEncoding.DecodeString(body.Content)
-			if err != nil {
-				http.Error(w, "bad b64", http.StatusBadRequest)
-				return
-			}
-			if r.Method == http.MethodPost {
-				if _, exists := g.files[key]; exists {
-					http.Error(w, "exists", http.StatusUnprocessableEntity)
-					return
-				}
-				g.createFiles++
-			} else {
-				g.updateFiles++
-			}
-			g.files[key] = fileEntry{
-				sha:     fmt.Sprintf("sha-%d", g.createFiles+g.updateFiles),
-				content: data,
-			}
-			writeJSON(w, http.StatusCreated, map[string]any{
-				"content": gitea.File{
-					Path: filePath,
-					SHA:  g.files[key].sha,
-					Type: "file",
-				},
-			})
-			return
-		}
-	}
-
-	g.t.Logf("giteaServer: unhandled %s %s", r.Method, r.URL.Path)
-	http.Error(w, "not found", http.StatusNotFound)
-}
-
-func writeJSON(w http.ResponseWriter, code int, v any) {
-	w.Header().Set("Content-Type", "application/json")
-	w.WriteHeader(code)
-	_ = json.NewEncoder(w).Encode(v)
-}
-
-func makeReconciler(t *testing.T, objs ...client.Object) (*Reconciler, *giteaServer) {
-	t.Helper()
-
-	scheme := runtime.NewScheme()
-	if err := clientgoscheme.AddToScheme(scheme); err != nil {
-		t.Fatalf("add clientgo scheme: %v", err)
-	}
-	if err := sandboxapi.AddToScheme(scheme); err != nil {
-		t.Fatalf("add sandboxapi scheme: %v", err)
-	}
-
-	cl := fake.NewClientBuilder().
-		WithScheme(scheme).
-		WithStatusSubresource(&sandboxapi.Sandbox{}).
-		WithObjects(objs...).
-		Build()
-
-	gs := newGiteaServer(t)
-
-	r := &Reconciler{
-		Client:                cl,
-		Log:                   logr.Discard(),
-		GiteaClient:           gitea.New(gs.URL(), "test-token"),
-		HostCluster:           "ct-eu-mgt-prod",
-		SovereignFQDN:         "omantel.omani.works",
-		Branch:                "main",
-		TenantRepoName:        "catalyst-tenant",
-		PtyServerImage:        "ghcr.io/openova-io/openova/sandbox-pty-server:test-sha",
-		MCPImage:              "ghcr.io/openova-io/openova/sandbox-mcp:test-sha",
-		NewapiURL:             "https://newapi.omantel.omani.works/v1",
-		LLMGatewayTokenSecret: "sandbox-tokens",
-		BYOSSecretPrefix:      "sandbox-byos-claude-code",
-		IdleTimeoutMinutes:    30,
-	}
-	return r, gs
-}
-
-func sampleSandbox() *sandboxapi.Sandbox {
-	return &sandboxapi.Sandbox{
-		TypeMeta: metav1.TypeMeta{
-			APIVersion: sandboxapi.GroupVersion.String(),
-			Kind:       "Sandbox",
-		},
-		ObjectMeta: metav1.ObjectMeta{
-			Name:       "emrah",
-			Namespace:  "acme",
-			Generation: 1,
-			UID:        "00000000-0000-0000-0000-000000000001",
-		},
-		Spec: sandboxapi.SandboxSpec{
-			Owner: sandboxapi.SandboxOwner{
-				Email:  "ceo@acme.com",
-				OrgRef: sandboxapi.SandboxOrgRef{Slug: "acme"},
-			},
-			Quota: sandboxapi.SandboxQuota{
-				CPU:                "4",
-				Memory:             "8Gi",
-				Storage:            "50Gi",
-				ConcurrentSessions: 3,
-			},
-			Repos: []sandboxapi.SandboxRepo{
-				{GiteaRepo: "acme/eventforge"},
-				{GiteaRepo: "acme/internal-tools"},
-			},
-			AgentCatalogue: []string{"claude-code", "cursor-agent"},
-			PreviewDomain:  "sb-emrah.rzk7.openova.io",
-		},
-	}
-}
-
-func TestReconcile_HappyPath(t *testing.T) {
-	t.Parallel()
-	sb := sampleSandbox()
-	r, gs := makeReconciler(t, sb)
-
-	res, err := r.Reconcile(context.Background(), ctrl.Request{
-		NamespacedName: types.NamespacedName{Name: sb.Name, Namespace: sb.Namespace},
-	})
-	if err != nil {
-		t.Fatalf("reconcile error: %v", err)
-	}
-	if res.RequeueAfter != 0 {
-		t.Errorf("happy path should not requeue: got %v", res)
-	}
-
-	// Wave 1 + Wave 8: 6 fixed + 1 kust + 2 repo PVCs + 4 wave-8 = 13.
-	expectedFiles := 6 + 1 + 2 + 4
-	if gs.createFiles != expectedFiles {
-		t.Errorf("expected %d file creates, got %d", expectedFiles, gs.createFiles)
-	}
-	if gs.updateFiles != 0 {
-		t.Errorf("expected 0 file updates on first reconcile, got %d", gs.updateFiles)
-	}
-
-	wantPrefix := "acme/catalyst-tenant/sandbox/ceo-at-acme-com/"
-	for key := range gs.files {
-		if !strings.HasPrefix(key, wantPrefix) {
-			t.Errorf("file %q not under expected prefix %q", key, wantPrefix)
-		}
-	}
-
-	var got sandboxapi.Sandbox
-	if err := r.Get(context.Background(),
-		client.ObjectKey{Name: sb.Name, Namespace: sb.Namespace}, &got); err != nil {
-		t.Fatalf("get post-reconcile: %v", err)
-	}
-	if got.Status.ObservedGeneration != 1 {
-		t.Errorf("observedGeneration: got %d want 1", got.Status.ObservedGeneration)
-	}
-	if got.Status.Phase != "Provisioning" {
-		t.Errorf("phase: got %q want %q", got.Status.Phase, "Provisioning")
-	}
-	if got.Status.GitopsPath != "sandbox/ceo-at-acme-com" {
-		t.Errorf("gitopsPath: got %q", got.Status.GitopsPath)
-	}
-	if len(got.Status.Conditions) != 1 ||
-		got.Status.Conditions[0].Type != "Ready" ||
-		got.Status.Conditions[0].Status != "True" ||
-		got.Status.Conditions[0].Reason != "GitopsReconciled" {
-		t.Errorf("expected Ready=True/GitopsReconciled, got %+v", got.Status.Conditions)
-	}
-}
-
-func TestReconcile_Idempotent(t *testing.T) {
-	t.Parallel()
-	sb := sampleSandbox()
-	r, gs := makeReconciler(t, sb)
-
-	if _, err := r.Reconcile(context.Background(), ctrl.Request{
-		NamespacedName: types.NamespacedName{Name: sb.Name, Namespace: sb.Namespace},
-	}); err != nil {
-		t.Fatalf("first reconcile: %v", err)
-	}
-	firstCreates := gs.createFiles
-	firstUpdates := gs.updateFiles
-
-	if _, err := r.Reconcile(context.Background(), ctrl.Request{
-		NamespacedName: types.NamespacedName{Name: sb.Name, Namespace: sb.Namespace},
-	}); err != nil {
-		t.Fatalf("second reconcile: %v", err)
-	}
-
-	if delta := gs.createFiles - firstCreates; delta != 0 {
-		t.Errorf("idempotency: expected zero new creates, got %d", delta)
-	}
-	if delta := gs.updateFiles - firstUpdates; delta != 0 {
-		t.Errorf("idempotency: expected zero file updates, got %d", delta)
-	}
-}
-
-func TestReconcile_OwnerOrgRefMissing(t *testing.T) {
-	t.Parallel()
-	sb := sampleSandbox()
-	sb.Spec.Owner.OrgRef.Slug = ""
-	r, gs := makeReconciler(t, sb)
-
-	res, err := r.Reconcile(context.Background(), ctrl.Request{
-		NamespacedName: types.NamespacedName{Name: sb.Name, Namespace: sb.Namespace},
-	})
-	if err != nil {
-		t.Fatalf("reconcile (drift): %v", err)
-	}
-	if res.RequeueAfter != 0 {
-		t.Errorf("drift should not requeue: got %v", res)
-	}
-	if gs.createFiles != 0 || gs.updateFiles != 0 {
-		t.Errorf("drift: no Gitea writes expected, got creates=%d updates=%d",
-			gs.createFiles, gs.updateFiles)
-	}
-
-	var got sandboxapi.Sandbox
-	if err := r.Get(context.Background(),
-		client.ObjectKey{Name: sb.Name, Namespace: sb.Namespace}, &got); err != nil {
-		t.Fatalf("get: %v", err)
-	}
-	if got.Status.Phase != "Failed" {
-		t.Errorf("phase: got %q want Failed", got.Status.Phase)
-	}
-	if len(got.Status.Conditions) != 1 ||
-		got.Status.Conditions[0].Status != "False" ||
-		got.Status.Conditions[0].Reason != "OwnerOrgRefMissing" {
-		t.Errorf("expected OwnerOrgRefMissing False condition, got %+v", got.Status.Conditions)
-	}
-}
-
-func TestReconcile_OwnerEmailMissing(t *testing.T) {
-	t.Parallel()
-	sb := sampleSandbox()
-	sb.Spec.Owner.Email = ""
-	r, gs := makeReconciler(t, sb)
-
-	if _, err := r.Reconcile(context.Background(), ctrl.Request{
-		NamespacedName: types.NamespacedName{Name: sb.Name, Namespace: sb.Namespace},
-	}); err != nil {
-		t.Fatalf("reconcile (drift): %v", err)
-	}
-	if gs.createFiles != 0 {
-		t.Errorf("drift: no Gitea writes expected, got %d creates", gs.createFiles)
-	}
-
-	var got sandboxapi.Sandbox
-	if err := r.Get(context.Background(),
-		client.ObjectKey{Name: sb.Name, Namespace: sb.Namespace}, &got); err != nil {
-		t.Fatalf("get: %v", err)
-	}
-	if len(got.Status.Conditions) != 1 ||
-		got.Status.Conditions[0].Reason != "OwnerEmailMissing" {
-		t.Errorf("expected OwnerEmailMissing False condition, got %+v", got.Status.Conditions)
-	}
-}
-
-func TestReconcile_Missing_NoError(t *testing.T) {
-	t.Parallel()
-	r, _ := makeReconciler(t)
-	res, err := r.Reconcile(context.Background(), ctrl.Request{
-		NamespacedName: types.NamespacedName{Name: "ghost", Namespace: "acme"},
-	})
-	if err != nil {
-		t.Fatalf("reconcile of missing CR should be a no-op, got: %v", err)
-	}
-	if res.RequeueAfter != 0 {
-		t.Errorf("missing CR should not requeue, got %v", res)
-	}
-}
-
-// TestReconcile_Wave8RuntimeShape asserts the Wave 8 runtime manifests
-// (pty-server StatefulSet, MCP Deployment, Service, HTTPRoute) carry
-// the right identity + env wiring + BYOS branching + hostname derivation.
-func TestReconcile_Wave8RuntimeShape(t *testing.T) {
-	t.Parallel()
-	sb := sampleSandbox()
-	r, gs := makeReconciler(t, sb)
-
-	if _, err := r.Reconcile(context.Background(), ctrl.Request{
-		NamespacedName: types.NamespacedName{Name: sb.Name, Namespace: sb.Namespace},
-	}); err != nil {
-		t.Fatalf("reconcile: %v", err)
-	}
-
-	prefix := "acme/catalyst-tenant/sandbox/ceo-at-acme-com/"
-	get := func(name string) string {
-		gs.mu.Lock()
-		defer gs.mu.Unlock()
-		entry, ok := gs.files[prefix+name]
-		if !ok {
-			t.Fatalf("expected rendered file %q in gitea stub", prefix+name)
-		}
-		return string(entry.content)
-	}
-
-	ss := get("statefulset-pty-server.yaml")
-	for _, want := range []string{
-		"kind: StatefulSet",
-		"name: pty-server",
-		"namespace: sandbox-ceo-at-acme-com",
-		"replicas: 3",
-		`image: "ghcr.io/openova-io/openova/sandbox-pty-server:test-sha"`,
-		"PTY_SERVER_ADDR",
-		"SANDBOX_OWNER_UID",
-		`value: "ceo-at-acme-com"`,
-		"ORG_ID",
-		`value: "acme"`,
-		"NEWAPI_URL",
-		`value: "https://newapi.omantel.omani.works/v1"`,
-		"OPENAI_BASE_URL",
-		"LLM_GATEWAY_TOKEN",
-		"OPENAI_API_KEY",
-		"ANTHROPIC_API_KEY",
-		`name: "sandbox-byos-claude-code-ceo-at-acme-com"`,
-		"key: access_token",
-		"openova.io/sandbox-idle-timeout-minutes",
-		"name: repo-acme-eventforge",
-		"mountPath: /workspace/acme-eventforge",
-		"name: repo-acme-internal-tools",
-	} {
-		if !strings.Contains(ss, want) {
-			t.Errorf("statefulset-pty-server.yaml missing %q", want)
-		}
-	}
-
-	dep := get("deployment-mcp.yaml")
-	for _, want := range []string{
-		"kind: Deployment",
-		"name: openova-sandbox-mcp",
-		`image: "ghcr.io/openova-io/openova/sandbox-mcp:test-sha"`,
-		"PTY_SERVER_URL",
-		"pty-server.sandbox-ceo-at-acme-com.svc.cluster.local:7681",
-	} {
-		if !strings.Contains(dep, want) {
-			t.Errorf("deployment-mcp.yaml missing %q", want)
-		}
-	}
-
-	svc := get("service-pty-server.yaml")
-	for _, want := range []string{
-		"kind: Service",
-		"name: pty-server",
-		"port: 7681",
-		"targetPort: 7681",
-	} {
-		if !strings.Contains(svc, want) {
-			t.Errorf("service-pty-server.yaml missing %q", want)
-		}
-	}
-
-	rt := get("httproute-pty-server.yaml")
-	for _, want := range []string{
-		"kind: HTTPRoute",
-		`- "sandbox.omantel.omani.works"`,
-		"value: /sessions/ceo-at-acme-com/",
-		// Sandbox HTTPRoute now attaches to the canonical Cilium Gateway
-		// (cilium-gateway/kube-system) so the wildcard *.<sov-fqdn>
-		// listener serves traffic to sandbox.<sov-fqdn>. The previous
-		// "catalyst-public/catalyst-system/https" parentRefs pointed at a
-		// Gateway that doesn't exist on a Sovereign.
-		"name: cilium-gateway",
-		"namespace: kube-system",
-		"name: pty-server",
-		"port: 7681",
-	} {
-		if !strings.Contains(rt, want) {
-			t.Errorf("httproute-pty-server.yaml missing %q", want)
-		}
-	}
-
-	kust := get("kustomization.yaml")
-	for _, want := range []string{
-		"statefulset-pty-server.yaml",
-		"service-pty-server.yaml",
-		"deployment-mcp.yaml",
-		"httproute-pty-server.yaml",
-	} {
-		if !strings.Contains(kust, want) {
-			t.Errorf("kustomization.yaml missing %q", want)
-		}
-	}
-}
-
-// TestReconcile_Wave8NoBYOSWhenAgentMissing asserts that a Sandbox
-// without claude-code in spec.agentCatalogue does NOT wire the
-// ANTHROPIC_API_KEY env into the rendered StatefulSet.
-func TestReconcile_Wave8NoBYOSWhenAgentMissing(t *testing.T) {
-	t.Parallel()
-	sb := sampleSandbox()
-	sb.Spec.AgentCatalogue = []string{"cursor-agent"}
-	r, gs := makeReconciler(t, sb)
-
-	if _, err := r.Reconcile(context.Background(), ctrl.Request{
-		NamespacedName: types.NamespacedName{Name: sb.Name, Namespace: sb.Namespace},
-	}); err != nil {
-		t.Fatalf("reconcile: %v", err)
-	}
-
-	gs.mu.Lock()
-	entry, ok := gs.files["acme/catalyst-tenant/sandbox/ceo-at-acme-com/statefulset-pty-server.yaml"]
-	gs.mu.Unlock()
-	if !ok {
-		t.Fatalf("expected statefulset-pty-server.yaml")
-	}
-	body := string(entry.content)
-	if strings.Contains(body, "ANTHROPIC_API_KEY") {
-		t.Errorf("expected NO ANTHROPIC_API_KEY env when claude-code not in agentCatalogue")
-	}
-	if strings.Contains(body, "sandbox-byos-claude-code-ceo-at-acme-com") {
-		t.Errorf("expected NO BYOS Secret reference when claude-code not in agentCatalogue")
-	}
-}
-
-// TestReconcile_NewAPI_MintsAndRendersSecret exercises the Wave 9 mint
-// path: NewAPIClient wired + no prior token annotation → the
-// controller calls the bridge once, stamps both lifecycle annotations
-// on the CR, and renders secret-newapi-token.yaml under the Gitea
-// prefix with the expected token bytes.
-func TestReconcile_NewAPI_MintsAndRendersSecret(t *testing.T) {
-	t.Parallel()
-	sb := sampleSandbox()
-	r, gs := makeReconciler(t, sb)
-
-	fixedNow := time.Date(2026, 5, 18, 12, 0, 0, 0, time.UTC)
-	exp := fixedNow.Add(7 * 24 * time.Hour)
-	stub := &stubNewAPI{resp: newapi.MintResponse{Token: "jwt-fresh", ExpiresAt: exp}}
-	r.NewAPIClient = stub
-	r.DefaultChannels = []string{"qwen"}
-	r.Now = func() time.Time { return fixedNow }
-
-	if _, err := r.Reconcile(context.Background(), ctrl.Request{
-		NamespacedName: types.NamespacedName{Name: sb.Name, Namespace: sb.Namespace},
-	}); err != nil {
-		t.Fatalf("reconcile: %v", err)
-	}
-
-	if stub.callCount() != 1 {
-		t.Errorf("mint calls: got %d want 1", stub.callCount())
-	}
-	gotReq := stub.calls[0]
-	if gotReq.OrgID != "acme" {
-		t.Errorf("mint req OrgID: got %q", gotReq.OrgID)
-	}
-	if gotReq.UserID != "ceo@acme.com" {
-		t.Errorf("mint req UserID: got %q", gotReq.UserID)
-	}
-	if gotReq.SandboxID != string(sb.UID) {
-		t.Errorf("mint req SandboxID: got %q want %q", gotReq.SandboxID, sb.UID)
-	}
-	if len(gotReq.AllowedChannels) != 1 || gotReq.AllowedChannels[0] != "qwen" {
-		t.Errorf("mint req channels: got %v", gotReq.AllowedChannels)
-	}
-
-	// The rendered Secret manifest must exist + carry the token bytes
-	// + expiry annotation + rotation marker (first issuance is also a
-	// rotation event, so kubectl.kubernetes.io/restartedAt is present).
-	secretKey := "acme/catalyst-tenant/sandbox/ceo-at-acme-com/secret-newapi-token.yaml"
-	entry, ok := gs.files[secretKey]
-	if !ok {
-		t.Fatalf("expected secret-newapi-token.yaml under %q; files=%v",
-			secretKey, gsKeys(gs))
-	}
-	if !strings.Contains(string(entry.content), "LLM_GATEWAY_TOKEN: \"jwt-fresh\"") {
-		t.Errorf("rendered Secret missing token bytes: %s", string(entry.content))
-	}
-	if !strings.Contains(string(entry.content), "openova.io/sandbox-token-expires-at: \""+exp.UTC().Format(time.RFC3339)+"\"") {
-		t.Errorf("rendered Secret missing expires-at annotation: %s", string(entry.content))
-	}
-	if !strings.Contains(string(entry.content), "kubectl.kubernetes.io/restartedAt:") {
-		t.Errorf("rendered Secret missing restartedAt annotation: %s", string(entry.content))
-	}
-
-	// The Sandbox CR must carry both lifecycle annotations.
-	var got sandboxapi.Sandbox
-	if err := r.Get(context.Background(),
-		client.ObjectKey{Name: sb.Name, Namespace: sb.Namespace}, &got); err != nil {
-		t.Fatalf("get post-reconcile: %v", err)
-	}
-	if got.Annotations[annotationTokenExpiresAt] != exp.UTC().Format(time.RFC3339) {
-		t.Errorf("CR expires-at annotation: got %q", got.Annotations[annotationTokenExpiresAt])
-	}
-	if got.Annotations[annotationTokenRotatedAt] != fixedNow.UTC().Format(time.RFC3339) {
-		t.Errorf("CR rotated-at annotation: got %q", got.Annotations[annotationTokenRotatedAt])
-	}
-
-	// kustomization.yaml must reference the new secret.
-	kustKey := "acme/catalyst-tenant/sandbox/ceo-at-acme-com/kustomization.yaml"
-	kustEntry, ok := gs.files[kustKey]
-	if !ok {
-		t.Fatalf("expected kustomization.yaml at %q", kustKey)
-	}
-	if !strings.Contains(string(kustEntry.content), "secret-newapi-token.yaml") {
-		t.Errorf("kustomization.yaml missing secret-newapi-token entry: %s", string(kustEntry.content))
-	}
-}
-
-// TestReconcile_NewAPI_RotationOnExpiry verifies that a token whose
-// expiry sits within the rotation lead-time triggers a fresh mint +
-// fresh restart marker.
-func TestReconcile_NewAPI_RotationOnExpiry(t *testing.T) {
-	t.Parallel()
-	fixedNow := time.Date(2026, 5, 18, 12, 0, 0, 0, time.UTC)
-	expSoon := fixedNow.Add(30 * time.Minute) // inside default 24h lead time
-	sb := sampleSandbox()
-	sb.Annotations = map[string]string{
-		annotationTokenExpiresAt: expSoon.UTC().Format(time.RFC3339),
-		annotationTokenRotatedAt: fixedNow.Add(-6 * 24 * time.Hour).UTC().Format(time.RFC3339),
-	}
-	r, gs := makeReconciler(t, sb)
-
-	newExp := fixedNow.Add(7 * 24 * time.Hour)
-	stub := &stubNewAPI{resp: newapi.MintResponse{Token: "jwt-rotated", ExpiresAt: newExp}}
-	r.NewAPIClient = stub
-	r.DefaultChannels = []string{"qwen"}
-	r.Now = func() time.Time { return fixedNow }
-
-	if _, err := r.Reconcile(context.Background(), ctrl.Request{
-		NamespacedName: types.NamespacedName{Name: sb.Name, Namespace: sb.Namespace},
-	}); err != nil {
-		t.Fatalf("reconcile: %v", err)
-	}
-
-	if stub.callCount() != 1 {
-		t.Errorf("expected exactly one mint call, got %d", stub.callCount())
-	}
-	secretKey := "acme/catalyst-tenant/sandbox/ceo-at-acme-com/secret-newapi-token.yaml"
-	entry := gs.files[secretKey]
-	if !strings.Contains(string(entry.content), "LLM_GATEWAY_TOKEN: \"jwt-rotated\"") {
-		t.Errorf("rotation did not write new token: %s", string(entry.content))
-	}
-	var got sandboxapi.Sandbox
-	if err := r.Get(context.Background(),
-		client.ObjectKey{Name: sb.Name, Namespace: sb.Namespace}, &got); err != nil {
-		t.Fatalf("get: %v", err)
-	}
-	if got.Annotations[annotationTokenExpiresAt] != newExp.UTC().Format(time.RFC3339) {
-		t.Errorf("rotation did not bump expires-at: got %q",
-			got.Annotations[annotationTokenExpiresAt])
-	}
-}
-
-// TestReconcile_NewAPI_NoMintWhenHealthy verifies the steady-state
-// path: a CR with a token whose expiry is well outside the rotation
-// lead-time triggers zero mint calls AND the rendered Secret carries
-// the previous bytes.
-func TestReconcile_NewAPI_NoMintWhenHealthy(t *testing.T) {
-	t.Parallel()
-	fixedNow := time.Date(2026, 5, 18, 12, 0, 0, 0, time.UTC)
-	farExp := fixedNow.Add(5 * 24 * time.Hour) // outside default 24h lead
-	sb := sampleSandbox()
-	sb.Annotations = map[string]string{
-		annotationTokenExpiresAt: farExp.UTC().Format(time.RFC3339),
-		annotationTokenRotatedAt: fixedNow.Add(-2 * 24 * time.Hour).UTC().Format(time.RFC3339),
-	}
-	r, gs := makeReconciler(t, sb)
-
-	stub := &stubNewAPI{} // any call would explode (empty MintResponse)
-	r.NewAPIClient = stub
-	r.DefaultChannels = []string{"qwen"}
-	r.Now = func() time.Time { return fixedNow }
-
-	if _, err := r.Reconcile(context.Background(), ctrl.Request{
-		NamespacedName: types.NamespacedName{Name: sb.Name, Namespace: sb.Namespace},
-	}); err != nil {
-		t.Fatalf("reconcile: %v", err)
-	}
-	if stub.callCount() != 0 {
-		t.Errorf("steady-state should not call mint, got %d", stub.callCount())
-	}
-	// The Secret manifest is NOT rendered because tokenValue is empty
-	// when the controller decides not to mint. The previous Secret
-	// content remains in Gitea untouched (we trust PutFile's byte-
-	// equal guard) — for this in-memory test there was no prior file,
-	// so the in-memory store simply doesn't have a secret-newapi-token
-	// entry. The kustomization.yaml must therefore NOT reference it.
-	kustKey := "acme/catalyst-tenant/sandbox/ceo-at-acme-com/kustomization.yaml"
-	kust := gs.files[kustKey]
-	if strings.Contains(string(kust.content), "secret-newapi-token.yaml") {
-		t.Errorf("kustomization should not reference secret-newapi-token when not minted")
-	}
-}
-
-// TestReconcile_NewAPI_MintFailureSurfacesCondition exercises the
-// failure path: the bridge returns a non-2xx → controller records a
-// Failed/TokenMintFailed condition + requeues + NO manifests written.
-func TestReconcile_NewAPI_MintFailureSurfacesCondition(t *testing.T) {
-	t.Parallel()
-	sb := sampleSandbox()
-	r, gs := makeReconciler(t, sb)
-	stub := &stubNewAPI{err: errors.New("newapi: POST .../admin/tokens/sandbox: status 503: outage")}
-	r.NewAPIClient = stub
-	r.DefaultChannels = []string{"qwen"}
-	r.Now = func() time.Time { return time.Date(2026, 5, 18, 12, 0, 0, 0, time.UTC) }
-
-	res, err := r.Reconcile(context.Background(), ctrl.Request{
-		NamespacedName: types.NamespacedName{Name: sb.Name, Namespace: sb.Namespace},
-	})
-	if err != nil {
-		t.Fatalf("reconcile error: %v", err)
-	}
-	if res.RequeueAfter == 0 {
-		t.Errorf("expected non-zero requeue on bridge failure")
-	}
-	if gs.createFiles != 0 {
-		t.Errorf("no Gitea writes expected on token-mint failure, got %d creates", gs.createFiles)
-	}
-	var got sandboxapi.Sandbox
-	if err := r.Get(context.Background(),
-		client.ObjectKey{Name: sb.Name, Namespace: sb.Namespace}, &got); err != nil {
-		t.Fatalf("get: %v", err)
-	}
-	if got.Status.Phase != "Failed" {
-		t.Errorf("phase: got %q want Failed", got.Status.Phase)
-	}
-	if len(got.Status.Conditions) != 1 ||
-		got.Status.Conditions[0].Reason != "TokenMintFailed" ||
-		got.Status.Conditions[0].Status != "False" {
-		t.Errorf("expected TokenMintFailed False condition, got %+v", got.Status.Conditions)
-	}
-}
-
-// TestReconcile_NewAPI_NoChannelsConfigured surfaces the misconfig
-// path: operator didn't wire DefaultChannels → fail-loud rather than
-// minting a token with an empty allowed_channels list (the bridge
-// would 400 anyway, but the controller fails earlier with a more
-// helpful Reason).
-func TestReconcile_NewAPI_NoChannelsConfigured(t *testing.T) {
-	t.Parallel()
-	sb := sampleSandbox()
-	r, gs := makeReconciler(t, sb)
-	stub := &stubNewAPI{}
-	r.NewAPIClient = stub
-	r.DefaultChannels = nil // misconfig
-	r.Now = func() time.Time { return time.Date(2026, 5, 18, 12, 0, 0, 0, time.UTC) }
-
-	if _, err := r.Reconcile(context.Background(), ctrl.Request{
-		NamespacedName: types.NamespacedName{Name: sb.Name, Namespace: sb.Namespace},
-	}); err != nil {
-		t.Fatalf("reconcile: %v", err)
-	}
-	if stub.callCount() != 0 {
-		t.Errorf("misconfig should not call bridge, got %d calls", stub.callCount())
-	}
-	if gs.createFiles != 0 {
-		t.Errorf("misconfig: no gitea writes expected, got %d", gs.createFiles)
-	}
-	var got sandboxapi.Sandbox
-	if err := r.Get(context.Background(),
-		client.ObjectKey{Name: sb.Name, Namespace: sb.Namespace}, &got); err != nil {
-		t.Fatalf("get: %v", err)
-	}
-	if len(got.Status.Conditions) != 1 || got.Status.Conditions[0].Reason != "NoAllowedChannels" {
-		t.Errorf("expected NoAllowedChannels condition, got %+v", got.Status.Conditions)
-	}
-}
-
-// TestReconcile_NewAPI_CapabilitiesFromPlan exercises the tier-bound
-// capability path (PR #1671): when the CR carries spec.planId without
-// an explicit spec.capabilities overlay, the controller resolves the
-// plan's capability allowlist and threads it into the MintRequest.
-func TestReconcile_NewAPI_CapabilitiesFromPlan(t *testing.T) {
-	t.Parallel()
-	sb := sampleSandbox()
-	sb.Spec.PlanID = sandboxapi.PlanSandboxPro
-
-	r, _ := makeReconciler(t, sb)
-	stub := &stubNewAPI{resp: newapi.MintResponse{
-		Token: "jwt-pro", ExpiresAt: time.Now().Add(7 * 24 * time.Hour),
-	}}
-	r.NewAPIClient = stub
-	r.DefaultChannels = []string{"qwen"}
-
-	if _, err := r.Reconcile(context.Background(), ctrl.Request{
-		NamespacedName: types.NamespacedName{Name: sb.Name, Namespace: sb.Namespace},
-	}); err != nil {
-		t.Fatalf("reconcile: %v", err)
-	}
-
-	if stub.callCount() != 1 {
-		t.Fatalf("mint calls: got %d want 1", stub.callCount())
-	}
-	gotCaps := stub.calls[0].Capabilities
-	wantSubset := []string{
-		"gitea.repo.list",     // Free baseline.
-		"sandbox.db.*",        // Pro extra.
-		"sandbox.storage.*",   // Pro extra.
-		"flux.status",         // Pro extra.
-	}
-	got := make(map[string]bool, len(gotCaps))
-	for _, c := range gotCaps {
-		got[c] = true
-	}
-	for _, w := range wantSubset {
-		if !got[w] {
-			t.Errorf("Pro plan capability %q missing from MintRequest: %v", w, gotCaps)
-		}
-	}
-	// Pro plan MUST NOT grant Ent-only capabilities.
-	for _, forbidden := range []string{
-		"sandbox.deploy.production", "sandbox.stripe.*", "flux.reconcile",
-	} {
-		if got[forbidden] {
-			t.Errorf("Pro plan unexpectedly granted Ent capability %q: %v", forbidden, gotCaps)
-		}
-	}
-}
-
-// TestReconcile_NewAPI_CapabilitiesSpecOverride asserts that an explicit
-// spec.capabilities overlay wins over the plan default — the operator
-// can tighten or widen the per-Sandbox grant by patching the CR.
-func TestReconcile_NewAPI_CapabilitiesSpecOverride(t *testing.T) {
-	t.Parallel()
-	sb := sampleSandbox()
-	sb.Spec.PlanID = sandboxapi.PlanSandboxEnt
-	// Override: drop every Ent grant down to read-only intersect.
-	sb.Spec.Capabilities = []string{"gitea.repo.list", "k8s.read.get"}
-
-	r, _ := makeReconciler(t, sb)
-	stub := &stubNewAPI{resp: newapi.MintResponse{
-		Token: "jwt-override", ExpiresAt: time.Now().Add(7 * 24 * time.Hour),
-	}}
-	r.NewAPIClient = stub
-	r.DefaultChannels = []string{"qwen"}
-
-	if _, err := r.Reconcile(context.Background(), ctrl.Request{
-		NamespacedName: types.NamespacedName{Name: sb.Name, Namespace: sb.Namespace},
-	}); err != nil {
-		t.Fatalf("reconcile: %v", err)
-	}
-
-	gotCaps := stub.calls[0].Capabilities
-	if len(gotCaps) != 2 {
-		t.Fatalf("override caps len: got %d (%v) want 2", len(gotCaps), gotCaps)
-	}
-	if gotCaps[0] != "gitea.repo.list" || gotCaps[1] != "k8s.read.get" {
-		t.Errorf("override caps: got %v want [gitea.repo.list k8s.read.get]", gotCaps)
-	}
-}
-
-func gsKeys(gs *giteaServer) []string {
-	gs.mu.Lock()
-	defer gs.mu.Unlock()
-	out := make([]string, 0, len(gs.files))
-	for k := range gs.files {
-		out = append(out, k)
-	}
-	return out
-}
--- a/core/controllers/sandbox/internal/gitops/manifests.go
+++ b/core/controllers/sandbox/internal/gitops/manifests.go
@ -1,790 +0,0 @@
-// Package gitops renders the per-Sandbox manifests the sandbox-controller
-// writes into the per-Org `catalyst-tenant` Gitea repo under
-// `sandbox/<owner-uid>/`.
-//
-// Per products/sandbox/docs/architecture.md §7 the sandbox-controller
-// is the sister of organization-controller — it reconciles a
-// per-Sandbox namespace + RBAC + PVCs + placeholder Secret INSIDE the
-// Org vcluster (not the host cluster). The controller writes manifests
-// to the per-Org Gitea repo following the SAME idiom
-// organization-controller uses for vcluster manifests
-// (core/controllers/organization/internal/gitops/manifests.go) — Flux
-// on the host picks them up and reconciles into the Org vcluster.
-//
-// Wave 1 materialized only namespace + RBAC + PVCs + placeholder
-// Secret. Wave 8 (this slice — PR follow-up to #1622) extends the
-// renderer to ALSO spawn the per-Sandbox runtime:
-//
-//   - Namespace `sandbox-<owner-uid>`
-//   - ResourceQuota (mirrors spec.quota)
-//   - ServiceAccount `sandbox` + Role + RoleBinding
-//   - One PVC per spec.repos[] entry
-//   - Placeholder Secret `sandbox-tokens`
-//   - NEW: StatefulSet `pty-server` (replicas = spec.quota.concurrentSessions)
-//   - NEW: Deployment `openova-sandbox-mcp`
-//   - NEW: Service `pty-server` ClusterIP :7681
-//   - NEW: HTTPRoute exposing `sandbox.<sov-fqdn>/sessions/<owner-uid>/*`
-//
-// Per Inviolable Principle #4 (no hardcoded values) every knob comes
-// from Inputs — nothing in the template literals encodes a cluster /
-// region / version / image / hostname.
-package gitops
-
-import (
-	"bytes"
-	"fmt"
-	"sort"
-	"strings"
-	"text/template"
-
-	sandboxapi "github.com/openova-io/openova/core/controllers/sandbox/internal/sandboxapi"
-)
-
-// Inputs is the subset of Sandbox spec + controller-level metadata the
-// renderer needs.
-type Inputs struct {
-	Name                  string
-	OwnerUID              string
-	OwnerEmail            string
-	OrgSlug               string
-	SovereignFQDN         string
-	Quota                 sandboxapi.SandboxQuota
-	Repos                 []sandboxapi.SandboxRepo
-	PreviewDomain         string
-	AgentCatalogue        []string
-	PtyServerImage        string
-	MCPImage              string
-	NewapiURL             string
-	LLMGatewayTokenSecret string
-	BYOSSecretPrefix      string
-	IdleTimeoutMinutes    int
-
-	// IdleScalingDisabled (TBD-D8b #1725) — when true the renderer
-	// stamps `openova.io/sandbox-idle-scaling-disabled=true` on the
-	// pty-server StatefulSet so the cluster-wide idle scaler skips it
-	// on every pass. Default false preserves the existing scale-to-zero
-	// policy. Sourced from Sandbox.spec.idleScaling.enabled (false →
-	// disabled true; nil OR true → disabled false).
-	IdleScalingDisabled bool
-
-	// Wave 9 — per-Sandbox NewAPI bearer rendered into a dedicated
-	// Secret manifest. When NewAPIToken is non-empty the renderer
-	// emits secret-newapi-token.yaml carrying stringData
-	// LLM_GATEWAY_TOKEN + openova.io/sandbox-token-expires-at
-	// annotation; when NewAPITokenRotatedAt is also non-empty the
-	// rendered Secret additionally carries
-	// kubectl.kubernetes.io/restartedAt so Wave 8's pty-server
-	// StatefulSet picks up rolling restarts on token rotation.
-	NewAPIToken           string
-	NewAPITokenSecretName string
-	NewAPITokenExpiresAt  string
-	NewAPITokenRotatedAt  string
-
-	// D31 active-hot-standby — Sovereign-level toggle + region pair the
-	// sandbox-controller propagates into every per-Sandbox MCP Pod via
-	// SOVEREIGN_ENABLE_HOT_STANDBY / SOVEREIGN_PRIMARY_REGION /
-	// SOVEREIGN_REPLICA_REGION env. The MCP server's sandbox.db.provision
-	// handler reads them at call time and, when valid, materialises a
-	// primary + replica Cluster.postgresql.cnpg.io pair instead of a
-	// single Cluster (mirrors the bp-cnpg-pair pattern). Default empty
-	// (zero regression): every Sandbox stays on single-Cluster CNPG.
-	// Sourced from the sandbox-controller's own env (chart values
-	// `cnpg.activeHotStandby.*` plumbed by bootstrap-kit slot 61 from
-	// the per-Sovereign overlay's envsubst placeholders).
-	EnableHotStandby string
-	PrimaryRegion    string
-	ReplicaRegion    string
-}
-
-const namespaceTemplate = `apiVersion: v1
-kind: Namespace
-metadata:
-  name: {{ .NamespaceName }}
-  labels:
-    openova.io/organization: {{ .OrgSlug }}
-    openova.io/sovereign: {{ .SovereignFQDN }}
-    openova.io/sandbox: {{ .Name }}
-    openova.io/sandbox-owner: {{ .OwnerUID }}
-    openova.io/managed-by: catalyst
-  annotations:
-    openova.io/sandbox-owner-email: {{ .OwnerEmail | quote }}
-{{- if .PreviewDomain }}
-    openova.io/sandbox-preview-domain: {{ .PreviewDomain | quote }}
-{{- end }}
-`
-
-const resourceQuotaTemplate = `apiVersion: v1
-kind: ResourceQuota
-metadata:
-  name: sandbox-quota
-  namespace: {{ .NamespaceName }}
-  labels:
-    openova.io/sandbox: {{ .Name }}
-    openova.io/managed-by: catalyst
-spec:
-  hard:
-    requests.cpu: {{ .Quota.CPU | quote }}
-    limits.cpu: {{ .Quota.CPU | quote }}
-    requests.memory: {{ .Quota.Memory | quote }}
-    limits.memory: {{ .Quota.Memory | quote }}
-    requests.storage: {{ .Quota.Storage | quote }}
-    count/pods: {{ .Quota.ConcurrentSessions | quote }}
-`
-
-const serviceAccountTemplate = `apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: sandbox
-  namespace: {{ .NamespaceName }}
-  labels:
-    openova.io/sandbox: {{ .Name }}
-    openova.io/managed-by: catalyst
-`
-
-const roleTemplate = `apiVersion: rbac.authorization.k8s.io/v1
-kind: Role
-metadata:
-  name: sandbox
-  namespace: {{ .NamespaceName }}
-  labels:
-    openova.io/sandbox: {{ .Name }}
-    openova.io/managed-by: catalyst
-rules:
-  - apiGroups: [""]
-    resources: ["pods", "pods/log", "pods/exec", "services", "configmaps", "secrets", "persistentvolumeclaims", "events"]
-    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
-  - apiGroups: ["apps"]
-    resources: ["deployments", "statefulsets", "replicasets"]
-    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
-  - apiGroups: ["batch"]
-    resources: ["jobs", "cronjobs"]
-    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
-`
-
-const roleBindingTemplate = `apiVersion: rbac.authorization.k8s.io/v1
-kind: RoleBinding
-metadata:
-  name: sandbox
-  namespace: {{ .NamespaceName }}
-  labels:
-    openova.io/sandbox: {{ .Name }}
-    openova.io/managed-by: catalyst
-roleRef:
-  apiGroup: rbac.authorization.k8s.io
-  kind: Role
-  name: sandbox
-subjects:
-  - kind: ServiceAccount
-    name: sandbox
-    namespace: {{ .NamespaceName }}
-`
-
-const pvcTemplate = `apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ .PVCName }}
-  namespace: {{ .NamespaceName }}
-  labels:
-    openova.io/sandbox: {{ .Name }}
-    openova.io/sandbox-repo: {{ .RepoSlug | quote }}
-    openova.io/managed-by: catalyst
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: {{ .RepoStorage | quote }}
-`
-
-const secretTemplate = `apiVersion: v1
-kind: Secret
-metadata:
-  name: sandbox-tokens
-  namespace: {{ .NamespaceName }}
-  labels:
-    openova.io/sandbox: {{ .Name }}
-    openova.io/managed-by: catalyst
-type: Opaque
-stringData:
-  placeholder: ""
-`
-
-// newapiTokenSecretTemplate renders the per-Sandbox NewAPI bearer
-// Secret (Wave 9). Materialized into the Org vcluster's
-// sandbox-<owner-uid> namespace by Flux; Wave 8's pty-server
-// StatefulSet mounts the LLM_GATEWAY_TOKEN key as an env var on
-// every Sandbox-agent Pod.
-//
-// The Secret carries TWO operator-visible annotations:
-//   - openova.io/sandbox-token-expires-at — absolute expiry of the
-//     embedded JWT (operator + rotation observer).
-//   - kubectl.kubernetes.io/restartedAt   — rotation marker; Wave 8's
-//     pty-server StatefulSet propagates this onto its Pod template via
-//     a stringData → annotation reference so a fresh Secret triggers
-//     a rolling restart.
-const newapiTokenSecretTemplate = `apiVersion: v1
-kind: Secret
-metadata:
-  name: {{ .SecretName }}
-  namespace: {{ .NamespaceName }}
-  labels:
-    openova.io/sandbox: {{ .Name }}
-    openova.io/sandbox-owner: {{ .OwnerUID }}
-    openova.io/managed-by: catalyst
-  annotations:
-    openova.io/sandbox-token-expires-at: {{ .ExpiresAt | quote }}
-{{- if .RotatedAt }}
-    kubectl.kubernetes.io/restartedAt: {{ .RotatedAt | quote }}
-{{- end }}
-type: Opaque
-stringData:
-  LLM_GATEWAY_TOKEN: {{ .Token | quote }}
-`
-
-const ptyServerStatefulSetTemplate = `apiVersion: apps/v1
-kind: StatefulSet
-metadata:
-  name: pty-server
-  namespace: {{ .NamespaceName }}
-  labels:
-    openova.io/sandbox: {{ .Name }}
-    openova.io/sandbox-owner: {{ .OwnerUID }}
-    openova.io/managed-by: catalyst
-    app.kubernetes.io/name: pty-server
-    app.kubernetes.io/component: pty-server
-  annotations:
-    openova.io/sandbox-idle-timeout-minutes: {{ .IdleTimeoutMinutes | quote }}
-{{- if .IdleScalingDisabled }}
-    openova.io/sandbox-idle-scaling-disabled: "true"
-{{- end }}
-spec:
-  serviceName: pty-server
-  replicas: {{ .Replicas }}
-  selector:
-    matchLabels:
-      app.kubernetes.io/name: pty-server
-      openova.io/sandbox: {{ .Name }}
-  template:
-    metadata:
-      labels:
-        app.kubernetes.io/name: pty-server
-        app.kubernetes.io/component: pty-server
-        openova.io/sandbox: {{ .Name }}
-        openova.io/sandbox-owner: {{ .OwnerUID }}
-        openova.io/managed-by: catalyst
-    spec:
-      serviceAccountName: sandbox
-      automountServiceAccountToken: true
-      securityContext:
-        runAsNonRoot: true
-        runAsUser: 65532
-        runAsGroup: 65532
-        fsGroup: 65532
-        seccompProfile:
-          type: RuntimeDefault
-      containers:
-        - name: pty-server
-          image: {{ .PtyServerImage | quote }}
-          imagePullPolicy: IfNotPresent
-          ports:
-            - name: http
-              containerPort: 7681
-          env:
-            - name: PTY_SERVER_ADDR
-              value: ":7681"
-            - name: SANDBOX_OWNER_UID
-              value: {{ .OwnerUID | quote }}
-            - name: SANDBOX_OWNER_EMAIL
-              value: {{ .OwnerEmail | quote }}
-            - name: ORG_ID
-              value: {{ .OrgSlug | quote }}
-            - name: SOVEREIGN_FQDN
-              value: {{ .SovereignFQDN | quote }}
-            - name: NEWAPI_URL
-              value: {{ .NewapiURL | quote }}
-            - name: OPENAI_BASE_URL
-              value: {{ .NewapiURL | quote }}
-            - name: LLM_GATEWAY_URL
-              value: {{ .NewapiURL | quote }}
-            - name: LLM_GATEWAY_TOKEN
-              valueFrom:
-                secretKeyRef:
-                  name: {{ .LLMGatewayTokenSecret | quote }}
-                  key: llm-gateway-token
-                  optional: true
-            - name: OPENAI_API_KEY
-              valueFrom:
-                secretKeyRef:
-                  name: {{ .LLMGatewayTokenSecret | quote }}
-                  key: llm-gateway-token
-                  optional: true
-{{- if .ClaudeCodeBYOSActive }}
-            - name: ANTHROPIC_API_KEY
-              valueFrom:
-                secretKeyRef:
-                  name: {{ .BYOSSecretName | quote }}
-                  key: access_token
-                  optional: true
-            - name: ANTHROPIC_BASE_URL
-              value: ""
-{{- end }}
-          volumeMounts:
-{{- range .RuntimeRepos }}
-            - name: repo-{{ .Slug }}
-              mountPath: /workspace/{{ .Slug }}
-{{- end }}
-          readinessProbe:
-            httpGet:
-              path: /healthz
-              port: http
-            initialDelaySeconds: 3
-            periodSeconds: 5
-          livenessProbe:
-            httpGet:
-              path: /healthz
-              port: http
-            initialDelaySeconds: 10
-            periodSeconds: 15
-          resources:
-            requests:
-              cpu: "100m"
-              memory: "256Mi"
-            limits:
-              cpu: {{ .Quota.CPU | quote }}
-              memory: {{ .Quota.Memory | quote }}
-          securityContext:
-            allowPrivilegeEscalation: false
-            capabilities:
-              drop: ["ALL"]
-            readOnlyRootFilesystem: false
-      volumes:
-{{- range .RuntimeRepos }}
-        - name: repo-{{ .Slug }}
-          persistentVolumeClaim:
-            claimName: repo-{{ .Slug }}
-{{- end }}
-      terminationGracePeriodSeconds: 30
-`
-
-const mcpDeploymentTemplate = `apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: openova-sandbox-mcp
-  namespace: {{ .NamespaceName }}
-  labels:
-    openova.io/sandbox: {{ .Name }}
-    openova.io/sandbox-owner: {{ .OwnerUID }}
-    openova.io/managed-by: catalyst
-    app.kubernetes.io/name: openova-sandbox-mcp
-    app.kubernetes.io/component: mcp-server
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app.kubernetes.io/name: openova-sandbox-mcp
-      openova.io/sandbox: {{ .Name }}
-  template:
-    metadata:
-      labels:
-        app.kubernetes.io/name: openova-sandbox-mcp
-        app.kubernetes.io/component: mcp-server
-        openova.io/sandbox: {{ .Name }}
-        openova.io/sandbox-owner: {{ .OwnerUID }}
-        openova.io/managed-by: catalyst
-    spec:
-      serviceAccountName: sandbox
-      automountServiceAccountToken: true
-      securityContext:
-        runAsNonRoot: true
-        runAsUser: 65532
-        runAsGroup: 65532
-        seccompProfile:
-          type: RuntimeDefault
-      containers:
-        - name: mcp
-          image: {{ .MCPImage | quote }}
-          imagePullPolicy: IfNotPresent
-          env:
-            - name: SANDBOX_OWNER_UID
-              value: {{ .OwnerUID | quote }}
-            - name: SANDBOX_OWNER_EMAIL
-              value: {{ .OwnerEmail | quote }}
-            - name: ORG_ID
-              value: {{ .OrgSlug | quote }}
-            - name: SOVEREIGN_FQDN
-              value: {{ .SovereignFQDN | quote }}
-            - name: PTY_SERVER_URL
-              value: "http://pty-server.{{ .NamespaceName }}.svc.cluster.local:7681"
-            - name: LLM_GATEWAY_TOKEN
-              valueFrom:
-                secretKeyRef:
-                  name: {{ .LLMGatewayTokenSecret | quote }}
-                  key: llm-gateway-token
-                  optional: true
-            # ── D31 active-hot-standby — Sovereign-level toggle + region
-            # pair. When SOVEREIGN_ENABLE_HOT_STANDBY parses truthy AND
-            # both region values are non-empty AND distinct, sandbox.db.
-            # provision materialises a primary + replica Cluster.
-            # postgresql.cnpg.io pair instead of a single Cluster (DoD
-            # D31). Default-off keeps every existing Sandbox on single-
-            # Cluster CNPG (zero regression). The values flow:
-            #   bootstrap-kit slot 19a envsubst (per-Sovereign overlay)
-            #   -> bp-sandbox HelmRelease values
-            #   -> sandbox-controller env (host cluster)
-            #   -> here, into every per-Sandbox MCP Pod
-            - name: SOVEREIGN_ENABLE_HOT_STANDBY
-              value: {{ .EnableHotStandby | quote }}
-            - name: SOVEREIGN_PRIMARY_REGION
-              value: {{ .PrimaryRegion | quote }}
-            - name: SOVEREIGN_REPLICA_REGION
-              value: {{ .ReplicaRegion | quote }}
-          resources:
-            requests:
-              cpu: "50m"
-              memory: "128Mi"
-            limits:
-              cpu: "500m"
-              memory: "512Mi"
-          securityContext:
-            allowPrivilegeEscalation: false
-            capabilities:
-              drop: ["ALL"]
-            readOnlyRootFilesystem: true
-      terminationGracePeriodSeconds: 10
-`
-
-const ptyServerServiceTemplate = `apiVersion: v1
-kind: Service
-metadata:
-  name: pty-server
-  namespace: {{ .NamespaceName }}
-  labels:
-    openova.io/sandbox: {{ .Name }}
-    openova.io/managed-by: catalyst
-    app.kubernetes.io/name: pty-server
-spec:
-  type: ClusterIP
-  selector:
-    app.kubernetes.io/name: pty-server
-    openova.io/sandbox: {{ .Name }}
-  ports:
-    - name: http
-      port: 7681
-      targetPort: 7681
-      protocol: TCP
-`
-
-const httpRouteTemplate = `apiVersion: gateway.networking.k8s.io/v1
-kind: HTTPRoute
-metadata:
-  name: pty-server
-  namespace: {{ .NamespaceName }}
-  labels:
-    openova.io/sandbox: {{ .Name }}
-    openova.io/managed-by: catalyst
-spec:
-  # Attach to the canonical Cilium Gateway on the host cluster. PR #1641
-  # originally targeted "catalyst-public/catalyst-system/https" — that
-  # Gateway does not exist on a Sovereign. The real public Gateway is
-  # cilium-gateway/kube-system (clusters/_template/sovereign-tls/
-  # cilium-gateway.yaml), matching the placement organization-controller's
-  # tenant_route.go and products/catalyst/chart/templates/httproute.yaml
-  # already use. sectionName is intentionally omitted so the HTTPRoute
-  # attaches to every listener whose hostname matches "sandbox.<sov-fqdn>"
-  # — currently the wildcard *.${SOVEREIGN_FQDN} HTTPS listener
-  # (https-<sov-fqdn-dashed>) per infra/hetzner/main.tf
-  # locals.parent_domains_listeners_yaml fallback path.
-  parentRefs:
-    - name: cilium-gateway
-      namespace: kube-system
-  hostnames:
-    - "sandbox.{{ .SovereignFQDN }}"
-  rules:
-    - matches:
-        - path:
-            type: PathPrefix
-            value: /sessions/{{ .OwnerUID }}/
-      filters:
-        - type: URLRewrite
-          urlRewrite:
-            path:
-              type: ReplacePrefixMatch
-              replacePrefixMatch: /sessions/
-      backendRefs:
-        - name: pty-server
-          port: 7681
-`
-
-const kustomizationTemplate = `apiVersion: kustomize.config.k8s.io/v1beta1
-kind: Kustomization
-resources:
-  - namespace.yaml
-  - resourcequota.yaml
-  - serviceaccount.yaml
-  - role.yaml
-  - rolebinding.yaml
-  - secret.yaml
-{{- if .HasNewAPIToken }}
-  - secret-newapi-token.yaml
-{{- end }}
-{{- range .RepoPaths }}
-  - {{ . }}
-{{- end }}
-  - statefulset-pty-server.yaml
-  - service-pty-server.yaml
-  - deployment-mcp.yaml
-  - httproute-pty-server.yaml
-`
-
-const pvcRepoStorageDefault = "5Gi"
-
-const (
-	defaultLLMGatewayTokenSecret = "sandbox-tokens"
-	defaultBYOSSecretPrefix      = "sandbox-byos-claude-code"
-	defaultIdleTimeoutMinutes    = 30
-	defaultConcurrentSessions    = 1
-)
-
-// Render returns (path, bytes) tuples the reconciler writes into the
-// per-Org Gitea repo under `sandbox/<owner-uid>/`.
-func Render(in Inputs) (map[string][]byte, error) {
-	if strings.TrimSpace(in.Name) == "" {
-		return nil, fmt.Errorf("Inputs.Name is required")
-	}
-	if strings.TrimSpace(in.OwnerUID) == "" {
-		return nil, fmt.Errorf("Inputs.OwnerUID is required")
-	}
-	if strings.TrimSpace(in.OrgSlug) == "" {
-		return nil, fmt.Errorf("Inputs.OrgSlug is required")
-	}
-	if strings.TrimSpace(in.PtyServerImage) == "" {
-		return nil, fmt.Errorf("Inputs.PtyServerImage is required (Wave 8 pty-server StatefulSet has no default image)")
-	}
-	if strings.TrimSpace(in.MCPImage) == "" {
-		return nil, fmt.Errorf("Inputs.MCPImage is required (Wave 8 openova-sandbox-mcp Deployment has no default image)")
-	}
-	if strings.TrimSpace(in.NewapiURL) == "" {
-		return nil, fmt.Errorf("Inputs.NewapiURL is required (newapi-proxy-contract.md §1 — pty-server env LLM_GATEWAY_URL)")
-	}
-	if strings.TrimSpace(in.SovereignFQDN) == "" {
-		return nil, fmt.Errorf("Inputs.SovereignFQDN is required (HTTPRoute hostname binding)")
-	}
-
-	if strings.TrimSpace(in.LLMGatewayTokenSecret) == "" {
-		in.LLMGatewayTokenSecret = defaultLLMGatewayTokenSecret
-	}
-	if strings.TrimSpace(in.BYOSSecretPrefix) == "" {
-		in.BYOSSecretPrefix = defaultBYOSSecretPrefix
-	}
-	if in.IdleTimeoutMinutes <= 0 {
-		in.IdleTimeoutMinutes = defaultIdleTimeoutMinutes
-	}
-
-	ns := fmt.Sprintf("sandbox-%s", in.OwnerUID)
-
-	repos := make([]sandboxapi.SandboxRepo, len(in.Repos))
-	copy(repos, in.Repos)
-	sort.SliceStable(repos, func(i, j int) bool {
-		return repos[i].GiteaRepo < repos[j].GiteaRepo
-	})
-
-	type baseCtx struct {
-		Inputs
-		NamespaceName string
-	}
-	base := baseCtx{Inputs: in, NamespaceName: ns}
-
-	out := make(map[string][]byte, 12+len(repos))
-
-	for path, raw := range map[string]string{
-		"namespace.yaml":      namespaceTemplate,
-		"resourcequota.yaml":  resourceQuotaTemplate,
-		"serviceaccount.yaml": serviceAccountTemplate,
-		"role.yaml":           roleTemplate,
-		"rolebinding.yaml":    roleBindingTemplate,
-		"secret.yaml":         secretTemplate,
-	} {
-		buf, err := renderTemplate(path, raw, base)
-		if err != nil {
-			return nil, err
-		}
-		out[path] = buf
-	}
-
-	type pvcCtx struct {
-		Inputs
-		NamespaceName string
-		PVCName       string
-		RepoSlug      string
-		RepoStorage   string
-	}
-	repoPaths := make([]string, 0, len(repos))
-	for _, repo := range repos {
-		slug := sanitizeRepoSlug(repo.GiteaRepo)
-		pvcName := fmt.Sprintf("repo-%s", slug)
-		path := fmt.Sprintf("pvc-%s.yaml", slug)
-		ctx := pvcCtx{
-			Inputs:        in,
-			NamespaceName: ns,
-			PVCName:       pvcName,
-			RepoSlug:      repo.GiteaRepo,
-			RepoStorage:   pvcRepoStorageDefault,
-		}
-		buf, err := renderTemplate(path, pvcTemplate, ctx)
-		if err != nil {
-			return nil, err
-		}
-		out[path] = buf
-		repoPaths = append(repoPaths, path)
-	}
-
-	// NewAPI per-Sandbox bearer Secret — opt-in (only when the caller
-	// supplied a non-empty token; reconciler skips this manifest when
-	// the bridge is unreachable so namespace + RBAC + PVCs still land
-	// without the token-mint side-effect).
-	if strings.TrimSpace(in.NewAPIToken) != "" {
-		secretName := strings.TrimSpace(in.NewAPITokenSecretName)
-		if secretName == "" {
-			secretName = fmt.Sprintf("sandbox-%s-newapi-token", in.OwnerUID)
-		}
-		type tokenCtx struct {
-			Inputs
-			NamespaceName string
-			SecretName    string
-			Token         string
-			ExpiresAt     string
-			RotatedAt     string
-		}
-		buf, err := renderTemplate("secret-newapi-token.yaml", newapiTokenSecretTemplate, tokenCtx{
-			Inputs:        in,
-			NamespaceName: ns,
-			SecretName:    secretName,
-			Token:         in.NewAPIToken,
-			ExpiresAt:     in.NewAPITokenExpiresAt,
-			RotatedAt:     in.NewAPITokenRotatedAt,
-		})
-		if err != nil {
-			return nil, err
-		}
-		out["secret-newapi-token.yaml"] = buf
-	}
-
-	// Kustomization stitching — sorted repoPaths keeps output stable.
-	sort.Strings(repoPaths)
-	type kustCtx struct {
-		Inputs
-		NamespaceName  string
-		RepoPaths      []string
-		HasNewAPIToken bool
-	}
-	kustBuf, err := renderTemplate("kustomization.yaml", kustomizationTemplate, kustCtx{
-		Inputs:         in,
-		NamespaceName:  ns,
-		RepoPaths:      repoPaths,
-		HasNewAPIToken: strings.TrimSpace(in.NewAPIToken) != "",
-	})
-	if err != nil {
-		return nil, err
-	}
-	out["kustomization.yaml"] = kustBuf
-
-	// Wave 8 runtime — pty-server StatefulSet, MCP Deployment,
-	// pty-server Service, HTTPRoute.
-	type runtimeRepo struct {
-		Slug string
-	}
-	runtimeRepos := make([]runtimeRepo, 0, len(repos))
-	for _, r := range repos {
-		runtimeRepos = append(runtimeRepos, runtimeRepo{Slug: sanitizeRepoSlug(r.GiteaRepo)})
-	}
-	replicas := in.Quota.ConcurrentSessions
-	if replicas <= 0 {
-		replicas = defaultConcurrentSessions
-	}
-	byosActive := agentInCatalogue(in.AgentCatalogue, "claude-code")
-	byosSecretName := fmt.Sprintf("%s-%s", in.BYOSSecretPrefix, in.OwnerUID)
-
-	type runtimeCtx struct {
-		Inputs
-		NamespaceName        string
-		Replicas             int
-		RuntimeRepos         []runtimeRepo
-		ClaudeCodeBYOSActive bool
-		BYOSSecretName       string
-	}
-	rctx := runtimeCtx{
-		Inputs:               in,
-		NamespaceName:        ns,
-		Replicas:             replicas,
-		RuntimeRepos:         runtimeRepos,
-		ClaudeCodeBYOSActive: byosActive,
-		BYOSSecretName:       byosSecretName,
-	}
-	for path, raw := range map[string]string{
-		"statefulset-pty-server.yaml": ptyServerStatefulSetTemplate,
-		"service-pty-server.yaml":     ptyServerServiceTemplate,
-		"deployment-mcp.yaml":         mcpDeploymentTemplate,
-		"httproute-pty-server.yaml":   httpRouteTemplate,
-	} {
-		buf, err := renderTemplate(path, raw, rctx)
-		if err != nil {
-			return nil, err
-		}
-		out[path] = buf
-	}
-	_ = base
-
-	return out, nil
-}
-
-func agentInCatalogue(catalogue []string, agent string) bool {
-	want := strings.ToLower(strings.TrimSpace(agent))
-	for _, a := range catalogue {
-		if strings.ToLower(strings.TrimSpace(a)) == want {
-			return true
-		}
-	}
-	return false
-}
-
-func renderTemplate(name, raw string, data any) ([]byte, error) {
-	t, err := template.New(name).Funcs(funcs()).Parse(raw)
-	if err != nil {
-		return nil, fmt.Errorf("template parse %s: %w", name, err)
-	}
-	var buf bytes.Buffer
-	if err := t.Execute(&buf, data); err != nil {
-		return nil, fmt.Errorf("template execute %s: %w", name, err)
-	}
-	return buf.Bytes(), nil
-}
-
-func funcs() template.FuncMap {
-	return template.FuncMap{
-		"quote": func(v any) string { return fmt.Sprintf("%q", fmt.Sprintf("%v", v)) },
-	}
-}
-
-func sanitizeRepoSlug(s string) string {
-	s = strings.ToLower(strings.TrimSpace(s))
-	var b strings.Builder
-	for _, r := range s {
-		switch {
-		case r >= 'a' && r <= 'z', r >= '0' && r <= '9':
-			b.WriteRune(r)
-		case r == '/' || r == '_' || r == '.' || r == ' ':
-			b.WriteRune('-')
-		case r == '-':
-			b.WriteRune('-')
-		}
-	}
-	out := b.String()
-	for strings.Contains(out, "--") {
-		out = strings.ReplaceAll(out, "--", "-")
-	}
-	out = strings.Trim(out, "-")
-	if len(out) > 200 {
-		out = strings.Trim(out[:200], "-")
-	}
-	return out
-}
--- a/core/controllers/sandbox/internal/idlescaler/idlescaler.go
+++ b/core/controllers/sandbox/internal/idlescaler/idlescaler.go
@ -1,444 +0,0 @@
-// Package idlescaler hosts the IdleScaler — the Wave 10 (PR #1641 follow-up)
-// goroutine that scales pty-server StatefulSets to 0 replicas after the
-// configured idle window has elapsed (architecture.md §1 idle policy).
-//
-// PR #1641 shipped the `openova.io/sandbox-idle-timeout-minutes`
-// annotation on every pty-server StatefulSet but no controller was
-// reading it. This package closes that loop:
-//
-//  1. Every Interval (default 60s) the IdleScaler lists every
-//     StatefulSet labeled `app.kubernetes.io/component=pty-server` AND
-//     `openova.io/managed-by=catalyst` across all `sandbox-*` namespaces
-//     visible to the controller's client.
-//
-//  2. For each StatefulSet, it reads the idle-timeout annotation. If
-//     absent or unparseable, it falls back to the controller-level
-//     default (typically env SANDBOX_IDLE_TIMEOUT_MINUTES, 30 min).
-//
-//  3. It polls the StatefulSet's pty-server Service at
-//     `http://pty-server.<ns>.svc.cluster.local:7681/idle` (the Service
-//     name + port are written by the renderer in
-//     core/controllers/sandbox/internal/gitops/manifests.go) — the
-//     handler is contributed by products/sandbox/pty-server/internal/
-//     server/routes.go and returns the in-memory lastActivityAt +
-//     activeSessions counters.
-//
-//  4. It stamps `openova.io/sandbox-last-activity-at` (RFC3339) onto
-//     the StatefulSet so a future operator inspecting `kubectl get
-//     statefulset -o yaml` can see what the scaler observed.
-//
-//  5. If `now - lastActivityAt > idleTimeout` AND activeSessions == 0
-//     AND spec.replicas > 0, the IdleScaler patches spec.replicas = 0.
-//     The Sandbox reconciler will bump replicas back to
-//     spec.quota.concurrentSessions the next time anything touches the
-//     parent Sandbox CR (a tab connect, a session create, a CR edit).
-//
-// Mutation scope: the IdleScaler ONLY ever scales pty-server
-// StatefulSets — its OWN managed resource (architecture.md §7 — those
-// StatefulSets are written by the sandbox-controller renderer). It
-// never patches anything outside the `sandbox-*` namespace + the
-// `app.kubernetes.io/component=pty-server` label.
-package idlescaler
-
-import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"net/http"
-	"strconv"
-	"strings"
-	"time"
-
-	"github.com/go-logr/logr"
-	appsv1 "k8s.io/api/apps/v1"
-	apierrors "k8s.io/apimachinery/pkg/api/errors"
-	"k8s.io/apimachinery/pkg/labels"
-	"sigs.k8s.io/controller-runtime/pkg/client"
-)
-
-const (
-	// LabelComponent is the StatefulSet label the renderer writes on
-	// every pty-server StatefulSet (manifests.go ptyServerStatefulSetTemplate).
-	LabelComponent = "app.kubernetes.io/component"
-	// LabelManagedBy is the secondary safety filter — we never touch
-	// a StatefulSet that didn't come from us.
-	LabelManagedBy = "openova.io/managed-by"
-	// ComponentValue is the LabelComponent value we filter on.
-	ComponentValue = "pty-server"
-	// ManagedByValue is the LabelManagedBy value we filter on.
-	ManagedByValue = "catalyst"
-
-	// AnnIdleTimeoutMinutes — set by the renderer (manifests.go
-	// `openova.io/sandbox-idle-timeout-minutes`). Per-StatefulSet
-	// override of the controller-level default.
-	AnnIdleTimeoutMinutes = "openova.io/sandbox-idle-timeout-minutes"
-	// AnnLastActivityAt — written by the IdleScaler. RFC3339 (UTC).
-	// External observers (operators, dashboards) can read this without
-	// hitting the pty-server endpoint directly.
-	AnnLastActivityAt = "openova.io/sandbox-last-activity-at"
-	// AnnIdleScalingDisabled — set by the renderer when Sandbox CR
-	// carries `spec.idleScaling.enabled=false`. The IdleScaler skips
-	// the StatefulSet on every pass (TBD-D8b #1725 — long-running
-	// agent workloads that idle for hours but must stay Running).
-	// Truthy values: 1, t, true (case-insensitive).
-	AnnIdleScalingDisabled = "openova.io/sandbox-idle-scaling-disabled"
-
-	// NamespacePrefix limits the scaler to namespaces the renderer
-	// creates (`sandbox-<owner-uid>`). Any StatefulSet that somehow
-	// carries our labels outside this prefix is ignored.
-	NamespacePrefix = "sandbox-"
-
-	// PtyServicePort mirrors the renderer's ptyServerServiceTemplate
-	// (port 7681). If that constant changes, this must change too.
-	PtyServicePort = 7681
-	// IdlePath is the endpoint exposed by pty-server (routes.go).
-	IdlePath = "/idle"
-)
-
-// idleDTO mirrors products/sandbox/pty-server/internal/server/routes.go
-// idleDTO. Kept local (no cross-module type import) — both sides change
-// in the same PR per the architecture-doc cross-reference idiom.
-type idleDTO struct {
-	LastActivityAt time.Time `json:"lastActivityAt"`
-	ActiveSessions int       `json:"activeSessions"`
-}
-
-// Options configures the IdleScaler.
-type Options struct {
-	// Interval is the poll cadence. Defaults to 60s.
-	Interval time.Duration
-	// DefaultIdleTimeoutMinutes is the fallback when a StatefulSet has
-	// no idle-timeout annotation (or it's unparseable). The controller
-	// already plumbs SANDBOX_IDLE_TIMEOUT_MINUTES through env — pass
-	// the same value here so behaviour is consistent.
-	DefaultIdleTimeoutMinutes int
-	// HTTPTimeout bounds a single /idle probe. Defaults to 5s.
-	HTTPTimeout time.Duration
-	// HTTPClient is injectable for tests. Defaults to http.DefaultClient
-	// with HTTPTimeout applied.
-	HTTPClient *http.Client
-	// ProbeURL is injectable for tests. nil = use cluster-DNS form
-	// `http://pty-server.<ns>.svc.cluster.local:7681/idle`.
-	ProbeURL func(namespace string) string
-	// Now is injectable for tests. Defaults to time.Now().UTC.
-	Now func() time.Time
-}
-
-// Scaler is the IdleScaler runtime. Construct via New, register with
-// the controller-runtime manager via mgr.Add(s) (Scaler implements
-// manager.Runnable + manager.LeaderElectionRunnable so only the
-// elected leader scales — peers stay idle).
-type Scaler struct {
-	client client.Client
-	log    logr.Logger
-
-	interval       time.Duration
-	defaultTimeout time.Duration
-	httpClient     *http.Client
-	probeURL       func(string) string
-	now            func() time.Time
-}
-
-// New returns a Scaler ready to register with a controller-runtime manager.
-func New(c client.Client, log logr.Logger, opts Options) *Scaler {
-	if opts.Interval <= 0 {
-		opts.Interval = 60 * time.Second
-	}
-	if opts.DefaultIdleTimeoutMinutes <= 0 {
-		opts.DefaultIdleTimeoutMinutes = 30
-	}
-	if opts.HTTPTimeout <= 0 {
-		opts.HTTPTimeout = 5 * time.Second
-	}
-	httpc := opts.HTTPClient
-	if httpc == nil {
-		httpc = &http.Client{Timeout: opts.HTTPTimeout}
-	}
-	probe := opts.ProbeURL
-	if probe == nil {
-		probe = func(ns string) string {
-			return fmt.Sprintf("http://pty-server.%s.svc.cluster.local:%d%s",
-				ns, PtyServicePort, IdlePath)
-		}
-	}
-	now := opts.Now
-	if now == nil {
-		now = func() time.Time { return time.Now().UTC() }
-	}
-	return &Scaler{
-		client:         c,
-		log:            log,
-		interval:       opts.Interval,
-		defaultTimeout: time.Duration(opts.DefaultIdleTimeoutMinutes) * time.Minute,
-		httpClient:     httpc,
-		probeURL:       probe,
-		now:            now,
-	}
-}
-
-// Start runs the scaler loop until ctx is cancelled. Satisfies
-// controller-runtime's manager.Runnable interface — register via
-// `mgr.Add(scaler)`.
-func (s *Scaler) Start(ctx context.Context) error {
-	s.log.Info("idle-scaler starting",
-		"interval", s.interval,
-		"default_timeout", s.defaultTimeout)
-
-	// Tick once on startup so we don't wait `interval` before the
-	// first reconciliation pass.
-	if err := s.runOnce(ctx); err != nil {
-		s.log.Error(err, "idle-scaler initial pass failed (non-fatal — will retry)")
-	}
-
-	t := time.NewTicker(s.interval)
-	defer t.Stop()
-	for {
-		select {
-		case <-ctx.Done():
-			s.log.Info("idle-scaler shutting down")
-			return nil
-		case <-t.C:
-			if err := s.runOnce(ctx); err != nil {
-				s.log.Error(err, "idle-scaler tick failed (non-fatal — will retry)")
-			}
-		}
-	}
-}
-
-// NeedLeaderElection makes the scaler a singleton across HA replicas.
-// Peers stay idle so we never race a scale-to-zero against a scale-back-up.
-func (s *Scaler) NeedLeaderElection() bool { return true }
-
-// runOnce is one IdleScaler pass.
-func (s *Scaler) runOnce(ctx context.Context) error {
-	sel, err := labels.Parse(fmt.Sprintf("%s=%s,%s=%s",
-		LabelComponent, ComponentValue,
-		LabelManagedBy, ManagedByValue))
-	if err != nil {
-		return fmt.Errorf("build label selector: %w", err)
-	}
-
-	var list appsv1.StatefulSetList
-	if err := s.client.List(ctx, &list, &client.ListOptions{LabelSelector: sel}); err != nil {
-		return fmt.Errorf("list pty-server statefulsets: %w", err)
-	}
-
-	now := s.now()
-	scanned := 0
-	idled := 0
-	for i := range list.Items {
-		ss := &list.Items[i]
-		if !strings.HasPrefix(ss.Namespace, NamespacePrefix) {
-			// Defence in depth — we never touch SS outside
-			// `sandbox-*` even if a stray label leaked elsewhere.
-			continue
-		}
-		scanned++
-		if didScale, err := s.processOne(ctx, ss, now); err != nil {
-			s.log.Error(err, "idle-scaler: process statefulset failed",
-				"namespace", ss.Namespace, "name", ss.Name)
-			continue
-		} else if didScale {
-			idled++
-		}
-	}
-	s.log.V(1).Info("idle-scaler pass done", "scanned", scanned, "idled", idled)
-	return nil
-}
-
-// processOne returns (didScale, err). didScale is true if this pass
-// scaled spec.replicas to 0.
-func (s *Scaler) processOne(ctx context.Context, ss *appsv1.StatefulSet, now time.Time) (bool, error) {
-	log := s.log.WithValues("namespace", ss.Namespace, "name", ss.Name)
-
-	// TBD-D8b #1725 — per-Sandbox opt-out. The renderer stamps the
-	// disabled annotation when Sandbox.spec.idleScaling.enabled=false.
-	// Skip entirely: no probe, no annotation patch, no scale decision.
-	if isIdleScalingDisabled(ss) {
-		log.V(1).Info("idle-scaler: skipping (idle-scaling disabled per CR)")
-		return false, nil
-	}
-
-	timeout := s.timeoutFor(ss)
-
-	// Probe the in-cluster service for the in-memory activity counter.
-	dto, probeErr := s.probe(ctx, ss.Namespace)
-
-	// Decide the canonical lastActivity to stamp.
-	//
-	// Probe-success path: trust the live counter.
-	// Probe-failure path: keep the existing annotation (don't reset)
-	//                     — a service may be unreachable briefly during
-	//                     Pod restart; the next tick will catch up.
-	var lastActivity time.Time
-	var activeSessions int
-	if probeErr == nil {
-		lastActivity = dto.LastActivityAt.UTC()
-		activeSessions = dto.ActiveSessions
-	} else {
-		// Existing annotation as fallback.
-		if existing, ok := ss.Annotations[AnnLastActivityAt]; ok {
-			if t, perr := time.Parse(time.RFC3339, existing); perr == nil {
-				lastActivity = t.UTC()
-			}
-		}
-		// If we have neither a probe nor a prior annotation, we
-		// can't make an idle decision yet. Skip — next tick.
-		if lastActivity.IsZero() {
-			log.V(1).Info("idle-scaler: probe failed and no prior annotation, skipping",
-				"err", probeErr.Error())
-			return false, nil
-		}
-		log.V(1).Info("idle-scaler: probe failed, using prior annotation",
-			"last_activity", lastActivity.Format(time.RFC3339),
-			"err", probeErr.Error())
-	}
-
-	// Stamp the annotation (probe-success only — otherwise we'd
-	// overwrite a stale value with the same stale value, which is a
-	// no-op patch but still chatty).
-	if probeErr == nil {
-		if err := s.stampAnnotation(ctx, ss, lastActivity); err != nil {
-			log.Error(err, "idle-scaler: stamp last-activity annotation failed")
-			// non-fatal — fall through to the scale decision so a
-			// degraded annotation-patch path can't keep a Pod alive
-			// forever.
-		}
-	}
-
-	if activeSessions > 0 {
-		// Sessions are open; never scale down even if lastActivity
-		// drifts (lastActivity covers the trailing edge after the
-		// last WS frame — Touch() also fires on attach/detach).
-		return false, nil
-	}
-
-	idleFor := now.Sub(lastActivity)
-	if idleFor < timeout {
-		log.V(1).Info("idle-scaler: not yet idle",
-			"idle_for", idleFor.String(),
-			"timeout", timeout.String())
-		return false, nil
-	}
-
-	// Already scaled to zero? Skip the patch — idempotent.
-	if ss.Spec.Replicas != nil && *ss.Spec.Replicas == 0 {
-		return false, nil
-	}
-
-	log.Info("idle-scaler: scaling pty-server to 0",
-		"idle_for", idleFor.String(),
-		"timeout", timeout.String(),
-		"last_activity", lastActivity.Format(time.RFC3339))
-	if err := s.scaleToZero(ctx, ss); err != nil {
-		return false, fmt.Errorf("scale to zero: %w", err)
-	}
-	// Wave 15 (PR #1674 follow-up) — emit the canonical idle-timeout
-	// counter so the Grafana "Idle-Timeout Scale-Down Events / hour"
-	// panel ticks. Labelled by namespace to match the dashboard's
-	// `sum by (namespace) (rate(...))` aggregation.
-	idleTimeoutEvents.WithLabelValues(ss.Namespace).Inc()
-	return true, nil
-}
-
-// isIdleScalingDisabled reports whether the StatefulSet carries the
-// `openova.io/sandbox-idle-scaling-disabled` annotation set to a truthy
-// value (TBD-D8b #1725). The annotation is renderer-stamped from
-// Sandbox.spec.idleScaling.enabled=false; absence (or any other value)
-// keeps the StatefulSet subject to the scaler. The check is
-// quote-tolerant for the same reason timeoutFor is.
-func isIdleScalingDisabled(ss *appsv1.StatefulSet) bool {
-	v, ok := ss.Annotations[AnnIdleScalingDisabled]
-	if !ok {
-		return false
-	}
-	v = strings.Trim(strings.TrimSpace(v), "\"'")
-	switch strings.ToLower(v) {
-	case "1", "t", "true", "yes", "y":
-		return true
-	}
-	return false
-}
-
-func (s *Scaler) timeoutFor(ss *appsv1.StatefulSet) time.Duration {
-	if v, ok := ss.Annotations[AnnIdleTimeoutMinutes]; ok {
-		// The renderer writes the annotation as a quoted integer
-		// (manifests.go uses {{ .IdleTimeoutMinutes | quote }}); the
-		// API server stores annotations verbatim so we get the raw
-		// string back. strconv.Atoi handles the unquoted form; we
-		// trim quotes defensively in case operators hand-edited.
-		v = strings.Trim(strings.TrimSpace(v), "\"'")
-		if n, err := strconv.Atoi(v); err == nil && n > 0 {
-			return time.Duration(n) * time.Minute
-		}
-	}
-	return s.defaultTimeout
-}
-
-func (s *Scaler) probe(ctx context.Context, namespace string) (idleDTO, error) {
-	url := s.probeURL(namespace)
-	req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
-	if err != nil {
-		return idleDTO{}, fmt.Errorf("build request: %w", err)
-	}
-	resp, err := s.httpClient.Do(req)
-	if err != nil {
-		return idleDTO{}, fmt.Errorf("%s: %w", url, err)
-	}
-	defer resp.Body.Close()
-	if resp.StatusCode != http.StatusOK {
-		return idleDTO{}, fmt.Errorf("%s: status %d", url, resp.StatusCode)
-	}
-	var dto idleDTO
-	if err := json.NewDecoder(resp.Body).Decode(&dto); err != nil {
-		return idleDTO{}, fmt.Errorf("decode %s body: %w", url, err)
-	}
-	return dto, nil
-}
-
-func (s *Scaler) stampAnnotation(ctx context.Context, ss *appsv1.StatefulSet, lastActivity time.Time) error {
-	stamp := lastActivity.UTC().Format(time.RFC3339)
-	if existing, ok := ss.Annotations[AnnLastActivityAt]; ok && existing == stamp {
-		// Already up-to-date (typical when probe + last poll agree
-		// within a second). Skip the patch.
-		return nil
-	}
-
-	// JSON-Merge-Patch body — only the annotation we care about.
-	// strategic-merge-patch over annotations is equivalent here since
-	// metav1.ObjectMeta.Annotations is a map (additive merge).
-	patch := []byte(fmt.Sprintf(
-		`{"metadata":{"annotations":{%q:%q}}}`,
-		AnnLastActivityAt, stamp))
-
-	if err := s.client.Patch(ctx, ss, client.RawPatch(client.Merge.Type(), patch)); err != nil {
-		if apierrors.IsNotFound(err) {
-			// The StatefulSet was deleted between List and Patch —
-			// nothing to do.
-			return nil
-		}
-		return err
-	}
-	// Reflect the new value in our in-memory copy too so subsequent
-	// runOnce passes within this process don't re-stamp.
-	if ss.Annotations == nil {
-		ss.Annotations = map[string]string{}
-	}
-	ss.Annotations[AnnLastActivityAt] = stamp
-	return nil
-}
-
-func (s *Scaler) scaleToZero(ctx context.Context, ss *appsv1.StatefulSet) error {
-	patch := []byte(`{"spec":{"replicas":0}}`)
-	if err := s.client.Patch(ctx, ss, client.RawPatch(client.Merge.Type(), patch)); err != nil {
-		if apierrors.IsNotFound(err) {
-			return nil
-		}
-		return err
-	}
-	// Keep our local copy consistent for the rest of this pass.
-	var zero int32 = 0
-	ss.Spec.Replicas = &zero
-	return nil
-}
-
--- a/core/controllers/sandbox/internal/idlescaler/idlescaler_test.go
+++ b/core/controllers/sandbox/internal/idlescaler/idlescaler_test.go
@ -1,488 +0,0 @@
-// idlescaler_test.go — coverage for the Wave 10 IdleScaler.
-//
-// Strategy: drive the scaler with a fake controller-runtime client +
-// a localhost httptest server that mimics pty-server's /idle endpoint.
-// We assert four trajectories:
-//
-//	(1) Active pty-server with no idle time → no scale, no harm.
-//	(2) Idle pty-server past timeout → spec.replicas patched to 0.
-//	(3) activeSessions > 0 keeps the pod alive even past timeout.
-//	(4) /idle probe failure with NO prior annotation → skip (next tick).
-//	(5) Per-StatefulSet annotation overrides the controller default.
-//	(6) StatefulSets outside `sandbox-*` namespace are ignored (defence
-//	    in depth).
-//	(7) StatefulSets already at replicas=0 are not re-patched.
-
-package idlescaler
-
-import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"net/http"
-	"net/http/httptest"
-	"testing"
-	"time"
-
-	"github.com/go-logr/logr"
-	prometheustestutil "github.com/prometheus/client_golang/prometheus/testutil"
-	appsv1 "k8s.io/api/apps/v1"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/apimachinery/pkg/runtime"
-	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
-	"sigs.k8s.io/controller-runtime/pkg/client"
-	"sigs.k8s.io/controller-runtime/pkg/client/fake"
-)
-
-// helper to assemble a pty-server StatefulSet with the labels +
-// annotations the renderer writes.
-func ptyStatefulSet(namespace, name string, replicas int32, annotations map[string]string) *appsv1.StatefulSet {
-	ann := map[string]string{}
-	for k, v := range annotations {
-		ann[k] = v
-	}
-	return &appsv1.StatefulSet{
-		TypeMeta: metav1.TypeMeta{
-			APIVersion: "apps/v1",
-			Kind:       "StatefulSet",
-		},
-		ObjectMeta: metav1.ObjectMeta{
-			Name:      name,
-			Namespace: namespace,
-			Labels: map[string]string{
-				LabelComponent: ComponentValue,
-				LabelManagedBy: ManagedByValue,
-			},
-			Annotations: ann,
-		},
-		Spec: appsv1.StatefulSetSpec{
-			Replicas: &replicas,
-		},
-	}
-}
-
-func newFakeClient(t *testing.T, objs ...client.Object) client.Client {
-	t.Helper()
-	scheme := runtime.NewScheme()
-	if err := clientgoscheme.AddToScheme(scheme); err != nil {
-		t.Fatalf("add clientgo scheme: %v", err)
-	}
-	return fake.NewClientBuilder().
-		WithScheme(scheme).
-		WithObjects(objs...).
-		Build()
-}
-
-// helper — make a pty-server probe target. fn decides response per ns.
-func newProbeServer(t *testing.T, fn func(ns string) (idleDTO, bool)) (*httptest.Server, func(ns string) string) {
-	t.Helper()
-	mux := http.NewServeMux()
-	mux.HandleFunc("/idle/", func(w http.ResponseWriter, r *http.Request) {
-		ns := r.URL.Path[len("/idle/"):]
-		dto, ok := fn(ns)
-		if !ok {
-			http.Error(w, "no", http.StatusNotFound)
-			return
-		}
-		w.Header().Set("Content-Type", "application/json")
-		_ = json.NewEncoder(w).Encode(dto)
-	})
-	srv := httptest.NewServer(mux)
-	t.Cleanup(srv.Close)
-	build := func(ns string) string { return srv.URL + "/idle/" + ns }
-	return srv, build
-}
-
-// (1) Active pty-server, very recent activity → no scale.
-func TestProcessOne_NotIdle_NoScale(t *testing.T) {
-	t.Parallel()
-	ss := ptyStatefulSet("sandbox-emrah", "pty-server", 2,
-		map[string]string{AnnIdleTimeoutMinutes: "30"})
-
-	c := newFakeClient(t, ss)
-	_, probe := newProbeServer(t, func(ns string) (idleDTO, bool) {
-		return idleDTO{
-			LastActivityAt: time.Now().UTC().Add(-1 * time.Minute),
-			ActiveSessions: 0,
-		}, true
-	})
-
-	now := time.Now().UTC()
-	s := New(c, logr.Discard(), Options{
-		DefaultIdleTimeoutMinutes: 30,
-		ProbeURL:                  probe,
-		Now:                       func() time.Time { return now },
-	})
-
-	if err := s.runOnce(context.Background()); err != nil {
-		t.Fatalf("runOnce: %v", err)
-	}
-	var got appsv1.StatefulSet
-	if err := c.Get(context.Background(),
-		client.ObjectKey{Namespace: "sandbox-emrah", Name: "pty-server"}, &got); err != nil {
-		t.Fatalf("get post-pass: %v", err)
-	}
-	if got.Spec.Replicas == nil || *got.Spec.Replicas != 2 {
-		t.Errorf("replicas: got %v want 2 (not idle yet, must not scale)", got.Spec.Replicas)
-	}
-	if _, ok := got.Annotations[AnnLastActivityAt]; !ok {
-		t.Errorf("expected %s annotation to be stamped on success probe", AnnLastActivityAt)
-	}
-}
-
-// (2) Idle past timeout, no active sessions → scale to zero.
-func TestProcessOne_Idle_ScalesToZero(t *testing.T) {
-	t.Parallel()
-	ss := ptyStatefulSet("sandbox-emrah", "pty-server", 3,
-		map[string]string{AnnIdleTimeoutMinutes: "30"})
-
-	c := newFakeClient(t, ss)
-	now := time.Now().UTC()
-	stale := now.Add(-45 * time.Minute) // past the 30-min annotation timeout
-	_, probe := newProbeServer(t, func(ns string) (idleDTO, bool) {
-		return idleDTO{LastActivityAt: stale, ActiveSessions: 0}, true
-	})
-
-	s := New(c, logr.Discard(), Options{
-		DefaultIdleTimeoutMinutes: 30,
-		ProbeURL:                  probe,
-		Now:                       func() time.Time { return now },
-	})
-
-	if err := s.runOnce(context.Background()); err != nil {
-		t.Fatalf("runOnce: %v", err)
-	}
-	var got appsv1.StatefulSet
-	if err := c.Get(context.Background(),
-		client.ObjectKey{Namespace: "sandbox-emrah", Name: "pty-server"}, &got); err != nil {
-		t.Fatalf("get post-pass: %v", err)
-	}
-	if got.Spec.Replicas == nil || *got.Spec.Replicas != 0 {
-		t.Errorf("replicas: got %v want 0 (idle past timeout)", got.Spec.Replicas)
-	}
-	if got.Annotations[AnnLastActivityAt] == "" {
-		t.Errorf("expected %s annotation to be stamped before scale", AnnLastActivityAt)
-	}
-}
-
-// (3) activeSessions > 0 keeps the pod alive even past timeout.
-func TestProcessOne_ActiveSessions_NeverScales(t *testing.T) {
-	t.Parallel()
-	ss := ptyStatefulSet("sandbox-emrah", "pty-server", 3,
-		map[string]string{AnnIdleTimeoutMinutes: "5"})
-
-	c := newFakeClient(t, ss)
-	now := time.Now().UTC()
-	stale := now.Add(-2 * time.Hour) // way past timeout
-	_, probe := newProbeServer(t, func(ns string) (idleDTO, bool) {
-		return idleDTO{LastActivityAt: stale, ActiveSessions: 2}, true
-	})
-
-	s := New(c, logr.Discard(), Options{
-		DefaultIdleTimeoutMinutes: 5,
-		ProbeURL:                  probe,
-		Now:                       func() time.Time { return now },
-	})
-
-	if err := s.runOnce(context.Background()); err != nil {
-		t.Fatalf("runOnce: %v", err)
-	}
-	var got appsv1.StatefulSet
-	if err := c.Get(context.Background(),
-		client.ObjectKey{Namespace: "sandbox-emrah", Name: "pty-server"}, &got); err != nil {
-		t.Fatalf("get post-pass: %v", err)
-	}
-	if got.Spec.Replicas == nil || *got.Spec.Replicas != 3 {
-		t.Errorf("replicas: got %v want 3 (activeSessions > 0 must keep pod alive)",
-			got.Spec.Replicas)
-	}
-}
-
-// (4) Probe failure with no prior annotation → skip (no scale, no
-// annotation written).
-func TestProcessOne_ProbeFailNoPriorAnnotation_Skips(t *testing.T) {
-	t.Parallel()
-	ss := ptyStatefulSet("sandbox-emrah", "pty-server", 2,
-		map[string]string{AnnIdleTimeoutMinutes: "5"})
-
-	c := newFakeClient(t, ss)
-	now := time.Now().UTC()
-	// probe returns 404
-	_, probe := newProbeServer(t, func(ns string) (idleDTO, bool) {
-		return idleDTO{}, false
-	})
-
-	s := New(c, logr.Discard(), Options{
-		DefaultIdleTimeoutMinutes: 5,
-		ProbeURL:                  probe,
-		Now:                       func() time.Time { return now },
-	})
-
-	if err := s.runOnce(context.Background()); err != nil {
-		t.Fatalf("runOnce: %v", err)
-	}
-	var got appsv1.StatefulSet
-	if err := c.Get(context.Background(),
-		client.ObjectKey{Namespace: "sandbox-emrah", Name: "pty-server"}, &got); err != nil {
-		t.Fatalf("get post-pass: %v", err)
-	}
-	if got.Spec.Replicas == nil || *got.Spec.Replicas != 2 {
-		t.Errorf("replicas: got %v want 2 (probe failed, no decision)",
-			got.Spec.Replicas)
-	}
-	if _, ok := got.Annotations[AnnLastActivityAt]; ok {
-		t.Errorf("annotation: got %v, expected NO annotation when probe fails",
-			got.Annotations[AnnLastActivityAt])
-	}
-}
-
-// (5) Per-StatefulSet annotation override the controller default.
-func TestProcessOne_AnnotationOverridesDefault(t *testing.T) {
-	t.Parallel()
-	// SS says timeout is 5 minutes; controller default is 60 (would
-	// not have scaled at 10min idle).
-	ss := ptyStatefulSet("sandbox-emrah", "pty-server", 1,
-		map[string]string{AnnIdleTimeoutMinutes: "5"})
-
-	c := newFakeClient(t, ss)
-	now := time.Now().UTC()
-	_, probe := newProbeServer(t, func(ns string) (idleDTO, bool) {
-		return idleDTO{
-			LastActivityAt: now.Add(-10 * time.Minute),
-			ActiveSessions: 0,
-		}, true
-	})
-
-	s := New(c, logr.Discard(), Options{
-		DefaultIdleTimeoutMinutes: 60, // would NOT scale at 10min
-		ProbeURL:                  probe,
-		Now:                       func() time.Time { return now },
-	})
-
-	if err := s.runOnce(context.Background()); err != nil {
-		t.Fatalf("runOnce: %v", err)
-	}
-	var got appsv1.StatefulSet
-	if err := c.Get(context.Background(),
-		client.ObjectKey{Namespace: "sandbox-emrah", Name: "pty-server"}, &got); err != nil {
-		t.Fatalf("get post-pass: %v", err)
-	}
-	if got.Spec.Replicas == nil || *got.Spec.Replicas != 0 {
-		t.Errorf("replicas: got %v want 0 (annotation says 5min, 10min idle)",
-			got.Spec.Replicas)
-	}
-}
-
-// (6) StatefulSets outside `sandbox-*` are ignored.
-func TestRunOnce_IgnoresNonSandboxNamespace(t *testing.T) {
-	t.Parallel()
-	rogue := ptyStatefulSet("kube-system", "pty-server", 1,
-		map[string]string{AnnIdleTimeoutMinutes: "5"})
-
-	c := newFakeClient(t, rogue)
-	now := time.Now().UTC()
-	_, probe := newProbeServer(t, func(ns string) (idleDTO, bool) {
-		return idleDTO{
-			LastActivityAt: now.Add(-2 * time.Hour),
-			ActiveSessions: 0,
-		}, true
-	})
-
-	s := New(c, logr.Discard(), Options{
-		DefaultIdleTimeoutMinutes: 5,
-		ProbeURL:                  probe,
-		Now:                       func() time.Time { return now },
-	})
-
-	if err := s.runOnce(context.Background()); err != nil {
-		t.Fatalf("runOnce: %v", err)
-	}
-	var got appsv1.StatefulSet
-	if err := c.Get(context.Background(),
-		client.ObjectKey{Namespace: "kube-system", Name: "pty-server"}, &got); err != nil {
-		t.Fatalf("get post-pass: %v", err)
-	}
-	if got.Spec.Replicas == nil || *got.Spec.Replicas != 1 {
-		t.Errorf("replicas: got %v want 1 (must NOT touch kube-system)",
-			got.Spec.Replicas)
-	}
-}
-
-// (7) StatefulSets already at replicas=0 are not re-patched (idempotent).
-func TestProcessOne_AlreadyZero_NoOp(t *testing.T) {
-	t.Parallel()
-	ss := ptyStatefulSet("sandbox-emrah", "pty-server", 0,
-		map[string]string{AnnIdleTimeoutMinutes: "5"})
-
-	c := newFakeClient(t, ss)
-	now := time.Now().UTC()
-	_, probe := newProbeServer(t, func(ns string) (idleDTO, bool) {
-		return idleDTO{
-			LastActivityAt: now.Add(-2 * time.Hour),
-			ActiveSessions: 0,
-		}, true
-	})
-
-	s := New(c, logr.Discard(), Options{
-		DefaultIdleTimeoutMinutes: 5,
-		ProbeURL:                  probe,
-		Now:                       func() time.Time { return now },
-	})
-
-	// A double pass — both passes should leave replicas==0 and not error.
-	for i := 0; i < 2; i++ {
-		if err := s.runOnce(context.Background()); err != nil {
-			t.Fatalf("runOnce pass %d: %v", i, err)
-		}
-	}
-	var got appsv1.StatefulSet
-	if err := c.Get(context.Background(),
-		client.ObjectKey{Namespace: "sandbox-emrah", Name: "pty-server"}, &got); err != nil {
-		t.Fatalf("get post-pass: %v", err)
-	}
-	if got.Spec.Replicas == nil || *got.Spec.Replicas != 0 {
-		t.Errorf("replicas: got %v want 0 (already zero, no-op)", got.Spec.Replicas)
-	}
-}
-
-// (8) Default URL builder produces the cluster-DNS form.
-func TestDefaultProbeURL(t *testing.T) {
-	t.Parallel()
-	s := New(nil, logr.Discard(), Options{})
-	got := s.probeURL("sandbox-ceo-at-acme-com")
-	want := fmt.Sprintf("http://pty-server.sandbox-ceo-at-acme-com.svc.cluster.local:%d%s",
-		PtyServicePort, IdlePath)
-	if got != want {
-		t.Errorf("default probeURL:\n  got  %q\n  want %q", got, want)
-	}
-}
-
-// (9) NeedLeaderElection — singleton across HA replicas.
-func TestNeedLeaderElection_True(t *testing.T) {
-	t.Parallel()
-	s := New(nil, logr.Discard(), Options{})
-	if !s.NeedLeaderElection() {
-		t.Errorf("NeedLeaderElection: got false, want true (must be singleton)")
-	}
-}
-
-// (10) Wave 15 — idle-timeout counter ticks once per scale-to-zero, with
-// the namespace label set. Asserts the `sandbox_controller_idle_timeout_events_total`
-// counter the Grafana panel "Idle-Timeout Scale-Down Events / hour" reads.
-func TestProcessOne_IdleTimeoutCounter_Increments(t *testing.T) {
-	// Not t.Parallel — counter is package-global and we read its value.
-	ns := "sandbox-metric-test"
-	ss := ptyStatefulSet(ns, "pty-server", 2,
-		map[string]string{AnnIdleTimeoutMinutes: "10"})
-
-	c := newFakeClient(t, ss)
-	now := time.Now().UTC()
-	stale := now.Add(-30 * time.Minute) // past 10-min timeout
-	_, probe := newProbeServer(t, func(_ string) (idleDTO, bool) {
-		return idleDTO{LastActivityAt: stale, ActiveSessions: 0}, true
-	})
-
-	s := New(c, logr.Discard(), Options{
-		DefaultIdleTimeoutMinutes: 10,
-		ProbeURL:                  probe,
-		Now:                       func() time.Time { return now },
-	})
-
-	before := testutilCounterValue(t, ns)
-	if err := s.runOnce(context.Background()); err != nil {
-		t.Fatalf("runOnce: %v", err)
-	}
-	after := testutilCounterValue(t, ns)
-	if got, want := after-before, 1.0; got != want {
-		t.Errorf("idle_timeout_events_total{namespace=%q} delta: got %v want %v", ns, got, want)
-	}
-}
-
-// testutilCounterValue reads the current counter value for the namespace
-// label using the prometheus testutil package — returns 0 if the label
-// tuple has not been touched yet.
-func testutilCounterValue(t *testing.T, namespace string) float64 {
-	t.Helper()
-	return prometheustestutil.ToFloat64(idleTimeoutEvents.WithLabelValues(namespace))
-}
-
-// (10) TBD-D8b #1725 — `openova.io/sandbox-idle-scaling-disabled=true`
-// annotation prevents scale-to-zero even when the StatefulSet has been
-// idle for far past its timeout window.
-func TestProcessOne_IdleScalingDisabled_NeverScales(t *testing.T) {
-	t.Parallel()
-	ss := ptyStatefulSet("sandbox-emrah", "pty-server", 2, map[string]string{
-		AnnIdleTimeoutMinutes:  "5",
-		AnnIdleScalingDisabled: "true",
-	})
-
-	c := newFakeClient(t, ss)
-	now := time.Now().UTC()
-	// Probe should never be called when scaling is disabled — return an
-	// implausibly-stale lastActivity to prove that even if it WERE called
-	// the scaler still wouldn't act.
-	_, probe := newProbeServer(t, func(_ string) (idleDTO, bool) {
-		return idleDTO{
-			LastActivityAt: now.Add(-24 * time.Hour),
-			ActiveSessions: 0,
-		}, true
-	})
-
-	s := New(c, logr.Discard(), Options{
-		DefaultIdleTimeoutMinutes: 5,
-		ProbeURL:                  probe,
-		Now:                       func() time.Time { return now },
-	})
-
-	if err := s.runOnce(context.Background()); err != nil {
-		t.Fatalf("runOnce: %v", err)
-	}
-
-	var got appsv1.StatefulSet
-	if err := c.Get(context.Background(),
-		client.ObjectKey{Namespace: "sandbox-emrah", Name: "pty-server"}, &got); err != nil {
-		t.Fatalf("get post-pass: %v", err)
-	}
-	if got.Spec.Replicas == nil || *got.Spec.Replicas != 2 {
-		t.Errorf("replicas: got %v want 2 (idle-scaling disabled)", got.Spec.Replicas)
-	}
-	// AnnLastActivityAt must NOT have been stamped — disabled path
-	// skips the entire process pipeline.
-	if _, stamped := got.Annotations[AnnLastActivityAt]; stamped {
-		t.Errorf("AnnLastActivityAt unexpectedly stamped on disabled Sandbox")
-	}
-}
-
-// (11) `false` / "0" / unset → scaler still active. Confirms the
-// truthy-only matcher (architecture.md §1 idle policy default-on).
-func TestIsIdleScalingDisabled_TruthyOnly(t *testing.T) {
-	t.Parallel()
-	cases := []struct {
-		val  string
-		want bool
-	}{
-		{"true", true},
-		{"True", true},
-		{"TRUE", true},
-		{"1", true},
-		{"yes", true},
-		{"\"true\"", true},
-		{"false", false},
-		{"0", false},
-		{"", false},
-		{"maybe", false},
-	}
-	for _, tc := range cases {
-		ss := ptyStatefulSet("sandbox-x", "pty-server", 1,
-			map[string]string{AnnIdleScalingDisabled: tc.val})
-		got := isIdleScalingDisabled(ss)
-		if got != tc.want {
-			t.Errorf("isIdleScalingDisabled(%q) = %v, want %v", tc.val, got, tc.want)
-		}
-	}
-	// Annotation absent → never disabled.
-	bare := ptyStatefulSet("sandbox-x", "pty-server", 1, nil)
-	if isIdleScalingDisabled(bare) {
-		t.Errorf("isIdleScalingDisabled(no annotation) = true, want false")
-	}
-}
--- a/core/controllers/sandbox/internal/idlescaler/metrics.go
+++ b/core/controllers/sandbox/internal/idlescaler/metrics.go
@ -1,39 +0,0 @@
-// metrics.go — Prometheus metrics for the IdleScaler (Wave 15, PR #1674).
-//
-// Wave 14 (PR #1674) shipped a Grafana dashboard panel
-// "Idle-Timeout Scale-Down Events / hour" that targets metric
-// `sandbox_controller_idle_timeout_events_total`. The panel renders
-// "No data" until the sandbox-controller image carrying this emitter
-// rolls out across the fleet (Inviolable Principle #11 — no fabricated
-// metrics).
-//
-// This file closes that loop on the controller side:
-//
-//   - Registers Counter `sandbox_controller_idle_timeout_events_total`
-//     with label {namespace} via controller-runtime's metrics registry
-//     (sigs.k8s.io/controller-runtime/pkg/metrics). The controller's
-//     manager already wires up /metrics on :8080 — registering with
-//     ctrlmetrics.Registry surfaces this counter on the same scrape.
-//   - The IdleScaler calls IncIdleTimeoutEvent(namespace) inside
-//     scaleToZero() so the counter ticks once per pty-server
-//     StatefulSet scaled to 0 replicas, with the namespace label
-//     matching the dashboard's `sum by (namespace) (rate(...))`
-//     aggregation.
-package idlescaler
-
-import (
-	"github.com/prometheus/client_golang/prometheus"
-	ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
-)
-
-var idleTimeoutEvents = prometheus.NewCounterVec(prometheus.CounterOpts{
-	Name: "sandbox_controller_idle_timeout_events_total",
-	Help: "Number of pty-server StatefulSets scaled to 0 replicas by the IdleScaler, partitioned by namespace.",
-}, []string{"namespace"})
-
-func init() {
-	// Register with controller-runtime's shared registry so the
-	// manager's existing :8080 /metrics endpoint exposes it. Re-
-	// registration on test process reuse is guarded by ctrlmetrics.
-	ctrlmetrics.Registry.MustRegister(idleTimeoutEvents)
-}
--- a/core/controllers/sandbox/internal/newapi/client.go
+++ b/core/controllers/sandbox/internal/newapi/client.go
@ -1,202 +0,0 @@
-// Package newapi is the thin HTTP client the sandbox-controller uses
-// to mint per-Sandbox LLM-gateway tokens via the catalyst-api bridge
-// handler shipped in PR #1638 — POST /admin/tokens/sandbox.
-//
-// Wire shape mirrors platform/newapi/internal/handler/sandbox_token.go
-// EXACTLY (request fields org_id/user_id/sandbox_id/allowed_channels,
-// response fields token/expires_at). If the handler's contract evolves
-// both sides must change in the same PR — there is no schema
-// generator between them on purpose: the bridge endpoint is the
-// authoritative spec, this client is its only known caller, and a
-// thin manual binding is easier to audit than yet-another generated
-// surface.
-//
-// Per Inviolable Principle #4 (no hardcoded values) every operational
-// knob (base URL, admin secret, HTTP timeout) is injected by the
-// caller — no defaults baked into the package.
-package newapi
-
-import (
-	"bytes"
-	"context"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"io"
-	"net/http"
-	"strings"
-	"time"
-)
-
-// Client is the surface the sandbox-controller's reconciler depends
-// on. Defined as an interface so the controller's unit tests can
-// substitute an in-process stub without standing up a httptest.Server
-// per case.
-type Client interface {
-	// MintSandboxToken POSTs the request to /admin/tokens/sandbox and
-	// returns the issued bearer + its absolute expiry. Caller-supplied
-	// context governs cancellation + the outbound HTTP deadline.
-	MintSandboxToken(ctx context.Context, req MintRequest) (*MintResponse, error)
-}
-
-// MintRequest is the wire body for POST /admin/tokens/sandbox.
-//
-// Field names match handler.sandboxTokenRequest one-for-one — change
-// them in lockstep with platform/newapi/internal/handler/
-// sandbox_token.go.
-type MintRequest struct {
-	// OrgID is the parent Organization slug (e.g. "acme"). The handler
-	// stamps this as the `org` claim on the minted JWT.
-	OrgID string `json:"org_id"`
-
-	// UserID is the Sandbox owner's stable identity — Keycloak sub or
-	// owner email. Forwarded as `X-User-Id` on every NewAPI /v1/* call
-	// for per-user billing-ledger attribution.
-	UserID string `json:"user_id"`
-
-	// SandboxID is the opaque per-Sandbox identifier the
-	// sandbox-controller assigns. We pass the Sandbox CR's
-	// metadata.uid — stable across spec mutations and 1:1 with a
-	// rendered Pod's identity.
-	SandboxID string `json:"sandbox_id"`
-
-	// AllowedChannels is the list of NewAPI channels the issued token
-	// is restricted to. Empty rejected with 400 by the handler.
-	AllowedChannels []string `json:"allowed_channels"`
-
-	// Capabilities is the MCP capability allowlist the issued token's
-	// `capabilities` claim carries. Sourced from the Sandbox CR's
-	// spec.capabilities (falling back to the plan→capabilities map via
-	// sandboxapi.ResolveCapabilities). Encoded by the bridge handler
-	// as the JWT `capabilities` claim which `Claims.HasCapability`
-	// reads on every MCP tool call. Wildcards (`sandbox.db.*`) are
-	// supported by the matcher so this list can carry coarse grants.
-	// Empty list is allowed (downgrades the token to the introspection
-	// surface only, matching a pre-PR-#1671 token).
-	Capabilities []string `json:"capabilities,omitempty"`
-}
-
-// MintResponse is the wire body for the 200 OK reply.
-type MintResponse struct {
-	// Token is the HS256-signed JWT the Sandbox Pod presents to NewAPI
-	// on every /v1/* call as the bearer credential.
-	Token string `json:"token"`
-
-	// ExpiresAt is the absolute expiry instant of Token (RFC3339).
-	ExpiresAt time.Time `json:"expires_at"`
-}
-
-// HTTPClient is the net/http.Client subset we need — narrowed for
-// dependency-injection in tests.
-type HTTPClient interface {
-	Do(req *http.Request) (*http.Response, error)
-}
-
-// liveClient is the production implementation. Constructed with a
-// pre-configured *http.Client + the shared admin bearer + base URL.
-type liveClient struct {
-	baseURL     string
-	adminSecret string
-	http        HTTPClient
-}
-
-// New returns a live client. baseURL is the catalyst-api root the
-// bridge handler is mounted on (e.g.
-// "http://newapi.newapi.svc.cluster.local:3000"). adminSecret is the
-// value of NEWAPI_ADMIN_SECRET — chart-emitted by the
-// newapi-token-signing-key Secret. httpClient may be nil; in that
-// case a default 30s-timeout client is used.
-//
-// Returns an error when baseURL or adminSecret is empty so the
-// controller fails-loud at process start rather than shipping a
-// no-op token-mint path.
-func New(baseURL, adminSecret string, httpClient HTTPClient) (Client, error) {
-	baseURL = strings.TrimRight(strings.TrimSpace(baseURL), "/")
-	if baseURL == "" {
-		return nil, errors.New("newapi.New: base URL is empty")
-	}
-	if strings.TrimSpace(adminSecret) == "" {
-		return nil, errors.New("newapi.New: admin secret is empty")
-	}
-	if httpClient == nil {
-		httpClient = &http.Client{Timeout: 30 * time.Second}
-	}
-	return &liveClient{
-		baseURL:     baseURL,
-		adminSecret: adminSecret,
-		http:        httpClient,
-	}, nil
-}
-
-// MintSandboxToken implements Client.
-//
-// Failure modes surfaced as wrapped errors:
-//   - request marshalling   — caller bug (programmer error)
-//   - transport error       — retry-worthy
-//   - non-2xx with body     — retry-worthy unless 4xx (configuration drift)
-//   - response decode error — bridge contract violation (escalate)
-func (c *liveClient) MintSandboxToken(ctx context.Context, req MintRequest) (*MintResponse, error) {
-	if strings.TrimSpace(req.OrgID) == "" {
-		return nil, errors.New("newapi: MintRequest.OrgID is required")
-	}
-	if strings.TrimSpace(req.UserID) == "" {
-		return nil, errors.New("newapi: MintRequest.UserID is required")
-	}
-	if strings.TrimSpace(req.SandboxID) == "" {
-		return nil, errors.New("newapi: MintRequest.SandboxID is required")
-	}
-	if len(req.AllowedChannels) == 0 {
-		return nil, errors.New("newapi: MintRequest.AllowedChannels is empty")
-	}
-
-	body, err := json.Marshal(req)
-	if err != nil {
-		return nil, fmt.Errorf("newapi: marshal request: %w", err)
-	}
-
-	url := c.baseURL + "/admin/tokens/sandbox"
-	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
-	if err != nil {
-		return nil, fmt.Errorf("newapi: build request: %w", err)
-	}
-	httpReq.Header.Set("Content-Type", "application/json")
-	httpReq.Header.Set("Accept", "application/json")
-	httpReq.Header.Set("Authorization", "Bearer "+c.adminSecret)
-	// Wave 15 (PR #1674 follow-up) — stamp the tool header so the
-	// bridge handler's `newapi_admin_token_mint_requests_total{tool,status}`
-	// counter attributes mints to this controller. Header value must
-	// match the dashboard's tool="sandbox-controller" panel filter.
-	httpReq.Header.Set("X-Catalyst-Tool", "sandbox-controller")
-
-	resp, err := c.http.Do(httpReq)
-	if err != nil {
-		return nil, fmt.Errorf("newapi: POST %s: %w", url, err)
-	}
-	defer resp.Body.Close()
-	respBody, _ := io.ReadAll(resp.Body)
-
-	if resp.StatusCode != http.StatusOK {
-		// Surface bridge error verbatim — operator log diagnoses the
-		// difference between 401 (admin secret rotated out of sync),
-		// 400 (controller sent malformed request) and 5xx (bridge
-		// outage). Body capped to 512 bytes for sanity.
-		snip := respBody
-		if len(snip) > 512 {
-			snip = snip[:512]
-		}
-		return nil, fmt.Errorf("newapi: POST %s: status %d: %s",
-			url, resp.StatusCode, string(snip))
-	}
-
-	var out MintResponse
-	if err := json.Unmarshal(respBody, &out); err != nil {
-		return nil, fmt.Errorf("newapi: decode response: %w", err)
-	}
-	if out.Token == "" {
-		return nil, errors.New("newapi: response missing token")
-	}
-	if out.ExpiresAt.IsZero() {
-		return nil, errors.New("newapi: response missing expires_at")
-	}
-	return &out, nil
-}
--- a/core/controllers/sandbox/internal/newapi/client_test.go
+++ b/core/controllers/sandbox/internal/newapi/client_test.go
@ -1,117 +0,0 @@
-package newapi
-
-import (
-	"context"
-	"encoding/json"
-	"io"
-	"net/http"
-	"net/http/httptest"
-	"strings"
-	"testing"
-	"time"
-)
-
-func TestMintSandboxToken_HappyPath(t *testing.T) {
-	t.Parallel()
-
-	var captured MintRequest
-	var capturedAuth string
-	exp := time.Date(2030, 1, 2, 3, 4, 5, 0, time.UTC)
-
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		if r.Method != http.MethodPost {
-			http.Error(w, "method", http.StatusMethodNotAllowed)
-			return
-		}
-		if r.URL.Path != "/admin/tokens/sandbox" {
-			http.Error(w, "path", http.StatusNotFound)
-			return
-		}
-		capturedAuth = r.Header.Get("Authorization")
-		body, _ := io.ReadAll(r.Body)
-		_ = json.Unmarshal(body, &captured)
-		w.Header().Set("Content-Type", "application/json")
-		w.WriteHeader(http.StatusOK)
-		_ = json.NewEncoder(w).Encode(MintResponse{Token: "tok-abc", ExpiresAt: exp})
-	}))
-	defer srv.Close()
-
-	c, err := New(srv.URL, "admin-bytes", nil)
-	if err != nil {
-		t.Fatalf("New: %v", err)
-	}
-	got, err := c.MintSandboxToken(context.Background(), MintRequest{
-		OrgID:           "acme",
-		UserID:          "ceo@acme.com",
-		SandboxID:       "uid-1",
-		AllowedChannels: []string{"qwen"},
-	})
-	if err != nil {
-		t.Fatalf("MintSandboxToken: %v", err)
-	}
-	if got.Token != "tok-abc" {
-		t.Errorf("token: got %q", got.Token)
-	}
-	if !got.ExpiresAt.Equal(exp) {
-		t.Errorf("expires_at: got %v want %v", got.ExpiresAt, exp)
-	}
-	if capturedAuth != "Bearer admin-bytes" {
-		t.Errorf("auth header: got %q", capturedAuth)
-	}
-	if captured.OrgID != "acme" || captured.UserID != "ceo@acme.com" ||
-		captured.SandboxID != "uid-1" || len(captured.AllowedChannels) != 1 ||
-		captured.AllowedChannels[0] != "qwen" {
-		t.Errorf("request body: got %+v", captured)
-	}
-}
-
-func TestMintSandboxToken_Non2xx(t *testing.T) {
-	t.Parallel()
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		w.WriteHeader(http.StatusUnauthorized)
-		_, _ = io.WriteString(w, `{"error":"invalid admin credentials"}`)
-	}))
-	defer srv.Close()
-	c, err := New(srv.URL, "wrong", nil)
-	if err != nil {
-		t.Fatalf("New: %v", err)
-	}
-	_, err = c.MintSandboxToken(context.Background(), MintRequest{
-		OrgID: "a", UserID: "u", SandboxID: "s", AllowedChannels: []string{"q"},
-	})
-	if err == nil {
-		t.Fatalf("expected error on 401")
-	}
-	if !strings.Contains(err.Error(), "401") {
-		t.Errorf("error should surface status code: %v", err)
-	}
-}
-
-func TestNew_InputValidation(t *testing.T) {
-	t.Parallel()
-	if _, err := New("", "x", nil); err == nil {
-		t.Errorf("expected error on empty baseURL")
-	}
-	if _, err := New("http://x", "  ", nil); err == nil {
-		t.Errorf("expected error on empty adminSecret")
-	}
-}
-
-func TestMintSandboxToken_RequestValidation(t *testing.T) {
-	t.Parallel()
-	c, err := New("http://x", "s", nil)
-	if err != nil {
-		t.Fatalf("New: %v", err)
-	}
-	cases := []MintRequest{
-		{OrgID: "", UserID: "u", SandboxID: "s", AllowedChannels: []string{"q"}},
-		{OrgID: "o", UserID: "", SandboxID: "s", AllowedChannels: []string{"q"}},
-		{OrgID: "o", UserID: "u", SandboxID: "", AllowedChannels: []string{"q"}},
-		{OrgID: "o", UserID: "u", SandboxID: "s", AllowedChannels: nil},
-	}
-	for i, tc := range cases {
-		if _, err := c.MintSandboxToken(context.Background(), tc); err == nil {
-			t.Errorf("case %d: expected error, got nil", i)
-		}
-	}
-}
--- a/Show More
+++ b/Show More