#!/usr/bin/env bash
# =============================================================================
# onx-onoxsoft-slice-bootstrap — Install + start top-level onoxsoft.slice
#
# v86.1 — Multi-tenant isolation: top-level admin slice installer.
#
# Copies /usr/local/onoxsoft/templates/onoxsoft.slice.stub →
# /etc/systemd/system/onoxsoft.slice, runs daemon-reload, starts the slice,
# and verifies it is active. Idempotent: if onoxsoft.slice already installed
# AND active, the script returns `installed: false, active: true` without
# touching disk.
#
# Operators who want to override aggregate caps should drop a file at
#   /etc/systemd/system/onoxsoft.slice.d/admin-caps.conf
# which this script will NOT clobber on re-runs.
#
# Input (stdin JSON — optional):
#   {}             — install if missing
#   {"force":true} — re-install template even if present (overrides edits!)
#
# Output (stdout JSON):
#   {
#     "ok": true,
#     "slice_unit": "onoxsoft.slice",
#     "slice_path": "/etc/systemd/system/onoxsoft.slice",
#     "runtime_path": "/sys/fs/cgroup/onoxsoft.slice",
#     "installed": true,        -- true if file written this run
#     "active": true            -- systemctl is-active result
#   }
#
# Exit codes: 0=ok 1=invalid-input 2=preflight-fail 3=exec-fail
#
# Deployed to: /usr/local/onoxsoft/bin/onx-onoxsoft-slice-bootstrap
# =============================================================================

set -euo pipefail

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
# shellcheck source=_lib/common.sh
source "${SCRIPT_DIR}/_lib/common.sh"

# ── Constants ────────────────────────────────────────────────────────────────
TEMPLATE_PATH="${ONX_ONOXSOFT_SLICE_TEMPLATE_PATH:-/usr/local/onoxsoft/templates/onoxsoft.slice.stub}"
SLICE_UNIT="onoxsoft.slice"
SLICE_PATH="/etc/systemd/system/${SLICE_UNIT}"
RUNTIME_PATH="/sys/fs/cgroup/${SLICE_UNIT}"

# ── Dependencies ─────────────────────────────────────────────────────────────
command -v jq        >/dev/null 2>&1 || onx_die 2 "jq required"
command -v systemctl >/dev/null 2>&1 || onx_die 2 "systemctl required"
command -v install   >/dev/null 2>&1 || onx_die 2 "install (coreutils) required"
require_root

# ── Read stdin (optional) ────────────────────────────────────────────────────
INPUT="$(cat 2>/dev/null || true)"
[[ -z "${INPUT}" ]] && INPUT='{}'
echo "${INPUT}" | jq -e 'type == "object"' >/dev/null 2>&1 \
    || onx_die 1 "stdin is not a valid JSON object"

FORCE=$(onx_json_get_bool "${INPUT}" "force" "false")

# ── Preflight ────────────────────────────────────────────────────────────────
[[ -r "${TEMPLATE_PATH}" ]] \
    || onx_die 2 "template not found or unreadable: ${TEMPLATE_PATH}"

# Confirm cgroup v2 is available before bootstrapping (mirrors detect logic)
CGROUP_FSTYPE="$(stat -fc %T /sys/fs/cgroup 2>/dev/null || echo "missing")"
[[ "${CGROUP_FSTYPE}" == "cgroup2fs" ]] \
    || onx_die 2 "cgroup v2 not mounted (fstype=${CGROUP_FSTYPE}) — run onx-cgroup-detect first"

# ── Install template (idempotent) ────────────────────────────────────────────
INSTALLED="false"
if [[ ! -f "${SLICE_PATH}" || "${FORCE}" == "true" ]]; then
    # Atomic install — write to tmp, then rename via install(1)
    TMP_SLICE="$(mktemp /tmp/onx-onoxsoft-slice-XXXXXX)"
    trap 'rm -f "${TMP_SLICE}"' EXIT
    cp "${TEMPLATE_PATH}" "${TMP_SLICE}"
    install -m 0644 -o root -g root "${TMP_SLICE}" "${SLICE_PATH}"
    INSTALLED="true"
    onx_log "onoxsoft-slice-bootstrap: installed ${SLICE_PATH} (force=${FORCE})"
fi

# ── Reload + start ───────────────────────────────────────────────────────────
systemctl daemon-reload || onx_die 3 "systemctl daemon-reload failed"

# Slice 'start' materialises the cgroup hierarchy. Re-start is harmless.
if ! systemctl start "${SLICE_UNIT}" >/dev/null 2>&1; then
    onx_die 3 "systemctl start ${SLICE_UNIT} failed"
fi

# ── Verify ───────────────────────────────────────────────────────────────────
ACTIVE="false"
if systemctl is-active --quiet "${SLICE_UNIT}" 2>/dev/null; then
    ACTIVE="true"
fi

if [[ "${ACTIVE}" != "true" ]]; then
    # Try once more after a brief settle delay — systemd reloads can be async
    sleep 0.2
    if systemctl is-active --quiet "${SLICE_UNIT}" 2>/dev/null; then
        ACTIVE="true"
    fi
fi

if [[ "${ACTIVE}" != "true" ]]; then
    STATUS_OUT="$(systemctl status "${SLICE_UNIT}" --no-pager 2>&1 | head -20 || true)"
    onx_log "onoxsoft-slice-bootstrap: slice failed to activate; status=${STATUS_OUT}"
    onx_die 3 "${SLICE_UNIT} did not become active after start"
fi

onx_log "onoxsoft-slice-bootstrap: ok installed=${INSTALLED} active=${ACTIVE}"

# ── Output ───────────────────────────────────────────────────────────────────
jq -nc \
    --arg slice_unit   "${SLICE_UNIT}" \
    --arg slice_path   "${SLICE_PATH}" \
    --arg runtime_path "${RUNTIME_PATH}" \
    --argjson installed "${INSTALLED}" \
    --argjson active    "${ACTIVE}" \
    '{
        ok:           true,
        slice_unit:   $slice_unit,
        slice_path:   $slice_path,
        runtime_path: $runtime_path,
        installed:    $installed,
        active:       $active
    }'
