#!/usr/bin/env bash
# =============================================================================
# onx-cgroup-detect — Feature detection for cgroup v2 + systemd slice support
#
# v86.0 — Multi-tenant isolation FOUNDATION (cgroup-detect sysapi)
#
# Purpose:
#   Probes the running system for everything v86 multi-tenant isolation needs:
#     - cgroup v2 unified hierarchy mounted at /sys/fs/cgroup
#     - kernel controllers: cpu, memory, io, pids
#     - systemd version >= 240 (slice Delegate= support)
#     - `Delegate=` actually works (write-test on a throwaway scope)
#     - top-level `onoxsoft.slice` presence (informational only)
#
#   install.sh shells out to this script during preflight; a `ready: false`
#   response aborts panel installation with a clear remediation note for
#   the operator.
#
# Input (stdin JSON — optional):
#   {}                — defaults
#   {"skip_probe":true}  — skip systemd-run Delegate probe (faster, less reliable)
#
# Output (stdout JSON):
#   {
#     "ok": true,
#     "ready": true,
#     "cgroup_version": "v2",
#     "unified_hierarchy": true,
#     "controllers": ["cpu","memory","io","pids","cpuset"],
#     "controllers_missing": [],
#     "systemd_version": 252,
#     "kernel_version": "5.14.0-503.el9_5.x86_64",
#     "delegate_supported": true,
#     "onoxsoft_slice_exists": false,
#     "warnings": [],
#     "recommendations": []
#   }
#
# Exit codes:
#   0  ready  (supported=true AND critical controllers present AND delegate=true)
#   1  invalid input
#   2  unsupported (cgroup v1 or missing controllers — operator must intervene)
#   3  internal error (jq missing, /proc unreadable, etc.)
#
# Deployed to: /usr/local/onoxsoft/bin/onx-cgroup-detect
# =============================================================================

set -euo pipefail

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
# shellcheck source=_lib/common.sh
source "${SCRIPT_DIR}/_lib/common.sh"

# ── Dependencies ─────────────────────────────────────────────────────────────
command -v jq   >/dev/null 2>&1 || { printf '{"error":"jq required"}\n' >&2; exit 3; }
command -v stat >/dev/null 2>&1 || { printf '{"error":"stat required"}\n' >&2; exit 3; }

# Note: deliberately NOT calling require_root — detect is read-only when
# skip_probe=true; the Delegate probe needs root only on locked-down sudoers
# configurations. systemd-run as non-root also works for transient units.

# ── Read & parse stdin (optional) ────────────────────────────────────────────
INPUT="$(cat 2>/dev/null || true)"
[[ -z "${INPUT}" ]] && INPUT='{}'

# Validate is JSON object (allow empty)
echo "${INPUT}" | jq -e 'type == "object"' >/dev/null 2>&1 \
    || onx_die 1 "stdin is not a valid JSON object"

SKIP_PROBE="$(onx_json_get_bool "${INPUT}" "skip_probe" "false")"

# ── Init result accumulators ─────────────────────────────────────────────────
WARNINGS=()
RECOMMENDATIONS=()
CGROUP_VERSION="unknown"
UNIFIED="false"
SUPPORTED="false"
SLICE_DELEGATE="false"
ONOXSOFT_SLICE="false"

# ── 1. Detect cgroup version ─────────────────────────────────────────────────
# `stat -fc %T /sys/fs/cgroup` returns:
#   cgroup2fs → v2 unified
#   tmpfs     → v1 (multiple controller mounts under /sys/fs/cgroup/*)
#   anything else → bizarre; treat as unsupported
CGROUP_FSTYPE="$(stat -fc %T /sys/fs/cgroup 2>/dev/null || echo "missing")"

case "${CGROUP_FSTYPE}" in
    cgroup2fs)
        CGROUP_VERSION="v2"
        UNIFIED="true"
        SUPPORTED="true"
        ;;
    tmpfs)
        # v1 single-mount; or possibly hybrid (v2 mounted at /sys/fs/cgroup/unified)
        if [[ -d /sys/fs/cgroup/unified ]]; then
            CGROUP_VERSION="hybrid"
            UNIFIED="false"
            SUPPORTED="false"
            WARNINGS+=("hybrid cgroup hierarchy detected — v86 requires pure v2")
            RECOMMENDATIONS+=("Add kernel cmdline: systemd.unified_cgroup_hierarchy=1, then reboot")
        else
            CGROUP_VERSION="v1"
            UNIFIED="false"
            SUPPORTED="false"
            WARNINGS+=("cgroup v1 detected — v86 multi-tenant isolation requires v2")
            RECOMMENDATIONS+=("Run: grubby --update-kernel=ALL --remove-args=\"systemd.unified_cgroup_hierarchy=0\" && reboot")
        fi
        ;;
    missing)
        WARNINGS+=("/sys/fs/cgroup not mounted — cgroup support absent")
        RECOMMENDATIONS+=("Reboot with systemd init (cgroup is mounted by systemd at PID 1)")
        ;;
    *)
        WARNINGS+=("unknown cgroup filesystem: ${CGROUP_FSTYPE}")
        ;;
esac

# ── 2. Enumerate available controllers ───────────────────────────────────────
CONTROLLERS_JSON="[]"
MISSING_JSON="[]"
REQUIRED=("cpu" "memory" "io" "pids")

if [[ -r /sys/fs/cgroup/cgroup.controllers ]]; then
    AVAILABLE_LIST="$(tr ' ' '\n' < /sys/fs/cgroup/cgroup.controllers | sort -u | sed '/^$/d')"
    CONTROLLERS_JSON="$(printf '%s\n' "${AVAILABLE_LIST}" | jq -R . | jq -sc .)"

    MISSING=()
    for ctrl in "${REQUIRED[@]}"; do
        if ! grep -qw "${ctrl}" /sys/fs/cgroup/cgroup.controllers; then
            MISSING+=("${ctrl}")
        fi
    done

    if (( ${#MISSING[@]} > 0 )); then
        MISSING_JSON="$(printf '%s\n' "${MISSING[@]}" | jq -R . | jq -sc .)"
        SUPPORTED="false"
        WARNINGS+=("missing required controllers: ${MISSING[*]}")
        RECOMMENDATIONS+=("Enable controllers via cgroup_enable= kernel cmdline or systemd Delegate=")
    fi
elif [[ "${CGROUP_VERSION}" == "v2" ]]; then
    WARNINGS+=("/sys/fs/cgroup/cgroup.controllers unreadable — controller enumeration skipped")
fi

# ── 3. systemd version ───────────────────────────────────────────────────────
SYSTEMD_VERSION=0
if command -v systemctl >/dev/null 2>&1; then
    # `systemctl --version` first line: "systemd 252 (252.16-1.el9)"
    SYSTEMD_VERSION="$(systemctl --version 2>/dev/null | head -1 | awk '{print $2}' || echo 0)"
    [[ "${SYSTEMD_VERSION}" =~ ^[0-9]+$ ]] || SYSTEMD_VERSION=0
fi

if (( SYSTEMD_VERSION == 0 )); then
    WARNINGS+=("systemd not detected — Delegate= probe skipped")
    SUPPORTED="false"
elif (( SYSTEMD_VERSION < 240 )); then
    WARNINGS+=("systemd ${SYSTEMD_VERSION} < 240 — Delegate= unreliable on this version")
    RECOMMENDATIONS+=("Upgrade systemd to >= 240 (AlmaLinux 9 ships systemd 252)")
    SUPPORTED="false"
fi

# ── 4. Kernel version ────────────────────────────────────────────────────────
KERNEL_VERSION="$(uname -r 2>/dev/null || echo "unknown")"

# ── 5. Probe Delegate= support with throwaway scope ──────────────────────────
# Run `systemd-run --slice=onx-probe.slice --property=Delegate=yes /bin/true`
# and confirm systemd accepts the directive.
if [[ "${SKIP_PROBE}" == "true" ]]; then
    # User opted out — assume true if systemd >= 240
    if (( SYSTEMD_VERSION >= 240 )); then
        SLICE_DELEGATE="true"
        WARNINGS+=("Delegate= probe skipped (skip_probe=true) — assumed true based on systemd ${SYSTEMD_VERSION}")
    fi
elif (( SYSTEMD_VERSION >= 240 )) && command -v systemd-run >/dev/null 2>&1; then
    PROBE_UNIT="onx-cgroup-probe-$$"
    PROBE_SLICE="onx-probe-$$.slice"

    if systemd-run \
        --quiet \
        --unit="${PROBE_UNIT}" \
        --slice="${PROBE_SLICE}" \
        --property=Delegate=yes \
        /bin/true >/dev/null 2>&1; then
        SLICE_DELEGATE="true"
        # Best-effort cleanup — transient units typically auto-reap
        systemctl reset-failed "${PROBE_UNIT}.service" 2>/dev/null || true
        systemctl reset-failed "${PROBE_SLICE}"        2>/dev/null || true
    else
        SLICE_DELEGATE="false"
        SUPPORTED="false"
        WARNINGS+=("Delegate= probe failed — slice delegation may be blocked by policy or kernel")
        RECOMMENDATIONS+=("Check: systemd-run --slice=test.slice --property=Delegate=yes /bin/true")
    fi
elif (( SYSTEMD_VERSION >= 240 )); then
    WARNINGS+=("systemd-run not found — cannot probe Delegate= support")
else
    : # already warned above
fi

# ── 6. Check for existing onoxsoft.slice (informational) ─────────────────────
if command -v systemctl >/dev/null 2>&1; then
    if systemctl cat onoxsoft.slice >/dev/null 2>&1; then
        ONOXSOFT_SLICE="true"
        WARNINGS+=("onoxsoft.slice already installed — v86.1 will skip creation (idempotent)")
    fi
fi

# ── 7. Final ready verdict ───────────────────────────────────────────────────
READY="false"
if [[ "${SUPPORTED}" == "true" && "${SLICE_DELEGATE}" == "true" ]]; then
    READY="true"
fi

# Friendly summary recommendation when everything is green
if [[ "${READY}" == "true" && "${ONOXSOFT_SLICE}" == "false" ]]; then
    RECOMMENDATIONS+=("System ready — bootstrap top-level slice with: onx-onoxsoft-slice-bootstrap")
fi

# ── Marshal warnings/recommendations to JSON ─────────────────────────────────
if (( ${#WARNINGS[@]} > 0 )); then
    WARNINGS_JSON="$(printf '%s\n' "${WARNINGS[@]}" | jq -R . | jq -sc .)"
else
    WARNINGS_JSON="[]"
fi
if (( ${#RECOMMENDATIONS[@]} > 0 )); then
    RECOMMENDATIONS_JSON="$(printf '%s\n' "${RECOMMENDATIONS[@]}" | jq -R . | jq -sc .)"
else
    RECOMMENDATIONS_JSON="[]"
fi

onx_log "cgroup-detect: ready=${READY} version=${CGROUP_VERSION} systemd=${SYSTEMD_VERSION} delegate=${SLICE_DELEGATE}"

# ── Output ───────────────────────────────────────────────────────────────────
jq -nc \
    --argjson ready "${READY}" \
    --arg version "${CGROUP_VERSION}" \
    --argjson unified "${UNIFIED}" \
    --argjson controllers "${CONTROLLERS_JSON}" \
    --argjson missing "${MISSING_JSON}" \
    --argjson systemd "${SYSTEMD_VERSION}" \
    --arg kernel "${KERNEL_VERSION}" \
    --argjson delegate "${SLICE_DELEGATE}" \
    --argjson onx_slice "${ONOXSOFT_SLICE}" \
    --argjson warnings "${WARNINGS_JSON}" \
    --argjson recs "${RECOMMENDATIONS_JSON}" \
    '{
        ok:                    true,
        ready:                 $ready,
        cgroup_version:        $version,
        unified_hierarchy:     $unified,
        controllers:           $controllers,
        controllers_missing:   $missing,
        systemd_version:       $systemd,
        kernel_version:        $kernel,
        delegate_supported:    $delegate,
        onoxsoft_slice_exists: $onx_slice,
        warnings:              $warnings,
        recommendations:       $recs
    }'

# Exit 0 if ready, 2 if known-unsupported, 3 reserved for internal errors
[[ "${READY}" == "true" ]] && exit 0 || exit 2
