#!/usr/bin/env bash
# =============================================================================
# onx-fpm-slice-rebind — Attach PHP-FPM worker PIDs to per-customer cgroup slice
#
# Purpose (v86.2 — Interim "Option B"):
#   PHP-FPM master is launched under `system.slice/phpXY-php-fpm.service`. Its
#   forked pool workers inherit the master's cgroup, so cgroup limits applied
#   to `customer-<user>.slice` (Agent 1 plan: `onoxsoft.slice/customer-<user>.slice`)
#   never reach the per-user PHP workers.
#
#   This script does a one-shot rebind:
#     1. Locate FPM worker PIDs for the given user (or all onx_* users).
#     2. For each PID, write to /sys/fs/cgroup/onoxsoft.slice/customer-<user>.slice/cgroup.procs
#     3. Verify via /proc/<PID>/cgroup that the move took effect.
#
#   Designed to be called per-pool right after `onx-fpm-pool-create` AND from
#   a per-minute cron to catch new fork()s — Option B race window ~30-60s.
#
#   v87 will replace this with `onoxsoft-fpm@<user>.service` template (Option A,
#   fork-time atomicity, no race).
#
# Input (stdin JSON):
#   { "username": "onx_xxxx" }           — single user (idempotent)
#   { }                                   — all onx_* users on the box
#   { "users": ["onx_a","onx_b","..."] }  — explicit set (optional convenience)
#
# Output (stdout JSON):
#   {
#     "ok": true,
#     "total_rebound": 7,
#     "total_already_in_slice": 12,
#     "total_skipped": 0,
#     "per_user": {
#       "onx_leafport": { "rebound": 3, "already": 5, "missing_slice": false },
#       "onx_acme01":  { "rebound": 4, "already": 7, "missing_slice": false }
#     },
#     "skipped": [
#       { "username": "onx_demo99", "reason": "slice_missing" }
#     ],
#     "elapsed_ms": 124
#   }
#
# Exit codes:
#   0 — ok (even if some users skipped — rebind is best-effort)
#   1 — invalid input (bad username, malformed JSON)
#   2 — preflight fail (cgroup v2 unavailable, missing onoxsoft.slice runtime)
#   3 — execution fail (unrecoverable I/O error on cgroup.procs)
#
# Idempotent: PIDs already inside the target slice are counted as `already_in_slice`
# and NOT rewritten (writing the same PID twice is a no-op for kernel but
# spams audit logs).
#
# Performance budget: must complete in < 5s even with 100 users × 10 workers.
# Per-user ps scan: ~10ms. cgroup.procs write: < 1ms. So ~50 users < 1s.
#
# Sudoers entry:
#   apache ALL=(root) NOPASSWD: /usr/local/onoxsoft/bin/onx-fpm-slice-rebind
#
# Deployed to: /usr/local/onoxsoft/bin/onx-fpm-slice-rebind
# =============================================================================

set -euo pipefail

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
# shellcheck source=_lib/common.sh
source "${SCRIPT_DIR}/_lib/common.sh"

# ── Dependencies ─────────────────────────────────────────────────────────────
command -v jq >/dev/null 2>&1 || { printf '{"error":"jq required"}\n' >&2; exit 2; }
command -v ps >/dev/null 2>&1 || { printf '{"error":"ps required"}\n' >&2; exit 2; }
require_root

# ── Constants ────────────────────────────────────────────────────────────────
readonly ONOXSOFT_SLICE_ROOT="/sys/fs/cgroup/customer.slice"

# ── Read & parse stdin ───────────────────────────────────────────────────────
INPUT=$(cat)
onx_require_json "${INPUT}"

# ── Preflight: cgroup v2 + onoxsoft.slice exists at runtime ──────────────────
if [[ ! -d /sys/fs/cgroup || ! -f /sys/fs/cgroup/cgroup.controllers ]]; then
    onx_die 2 "cgroup v2 unified hierarchy not mounted at /sys/fs/cgroup"
fi

# onoxsoft.slice missing → cannot rebind; not an error since Agent 1 may not
# have deployed yet. Emit empty output and exit 0 (cron-friendly).
if [[ ! -d "${ONOXSOFT_SLICE_ROOT}" ]]; then
    onx_log "fpm-slice-rebind: ${ONOXSOFT_SLICE_ROOT} not present (Agent 1 not deployed?)"
    printf '{"ok":true,"total_rebound":0,"total_already_in_slice":0,"total_skipped":0,"per_user":{},"skipped":[],"note":"onoxsoft.slice missing","elapsed_ms":0}\n'
    exit 0
fi

# ── Resolve user set ─────────────────────────────────────────────────────────
declare -a USERS=()

USERNAME_INPUT=$(echo "${INPUT}" | jq -r 'if has("username") and .username != null then .username else "" end')
USERS_LIST_PRESENT=$(echo "${INPUT}" | jq -r 'if has("users") and (.users | type == "array") then "1" else "0" end')

if [[ -n "${USERNAME_INPUT}" ]]; then
    onx_validate_username "${USERNAME_INPUT}"
    USERS=("${USERNAME_INPUT}")
elif [[ "${USERS_LIST_PRESENT}" == "1" ]]; then
    while IFS= read -r u; do
        [[ -z "${u}" ]] && continue
        onx_validate_username "${u}"
        USERS+=("${u}")
    done < <(echo "${INPUT}" | jq -r '.users[]')
else
    # Auto-detect: every onx_* user that actually owns a running FPM worker.
    # `getent passwd onx_*` is wrong glob — use awk on passwd.
    while IFS= read -r u; do
        [[ -z "${u}" ]] && continue
        USERS+=("${u}")
    done < <(getent passwd 2>/dev/null | awk -F: '$1 ~ /^onx_[a-z0-9]+$/{print $1}' | sort -u)
fi

if [[ "${#USERS[@]}" -eq 0 ]]; then
    printf '{"ok":true,"total_rebound":0,"total_already_in_slice":0,"total_skipped":0,"per_user":{},"skipped":[],"note":"no onx_* users found","elapsed_ms":0}\n'
    exit 0
fi

# ── Rebind logic ─────────────────────────────────────────────────────────────
START_MS=$(date +%s%3N 2>/dev/null || echo 0)

TOTAL_REBOUND=0
TOTAL_ALREADY=0
TOTAL_SKIPPED=0

# Per-user JSON object accumulator
PER_USER_JSON='{}'
SKIPPED_JSON='[]'

# Pre-build the ps snapshot once — avoids N ps invocations.
# Format: PID UID USER CMD
#   $1=pid $2=uid $3=user $4..=cmd
# We look for "php-fpm: pool <USERNAME>" in the cmdline.
# Use `ps -eo pid=,user:32=,args=` to keep it compact + null-safe.
# v86.1.1: user:32 prevents 8-char truncation (e.g. "onx_65q+" instead of "onx_65qyec")
PS_SNAP=$(ps -eo pid=,user:32=,args= 2>/dev/null || true)

for U in "${USERS[@]}"; do
    SLICE_DIR="${ONOXSOFT_SLICE_ROOT}/customer-${U}.slice"

    if [[ ! -d "${SLICE_DIR}" ]]; then
        # Slice not created by Agent 1 yet — skip cleanly.
        SKIPPED_JSON=$(echo "${SKIPPED_JSON}" | jq --arg u "${U}" \
            '. + [{"username":$u,"reason":"slice_missing"}]')
        TOTAL_SKIPPED=$(( TOTAL_SKIPPED + 1 ))
        continue
    fi

    PROCS_FILE="${SLICE_DIR}/cgroup.procs"
    if [[ ! -w "${PROCS_FILE}" ]]; then
        SKIPPED_JSON=$(echo "${SKIPPED_JSON}" | jq --arg u "${U}" \
            '. + [{"username":$u,"reason":"cgroup_procs_not_writable"}]')
        TOTAL_SKIPPED=$(( TOTAL_SKIPPED + 1 ))
        continue
    fi

    # Filter ps snapshot to this user's FPM workers.
    # Match: user-field == U AND args contains "php-fpm:" AND args contains "pool U"
    # The pool argument format is "php-fpm: pool <name>" (note the colon+space).
    # Master ("php-fpm: master process ...") runs as root → skipped naturally.
    USER_PIDS=()
    while IFS= read -r line; do
        [[ -z "${line}" ]] && continue
        USER_PIDS+=("${line}")
    done < <(awk -v user="${U}" \
        '$2 == user && index($0, "php-fpm:") > 0 && index($0, "pool "user) > 0 {print $1}' \
        <<< "${PS_SNAP}")

    REBOUND_COUNT=0
    ALREADY_COUNT=0

    for PID in "${USER_PIDS[@]}"; do
        # Validate PID is a positive integer (defense against ps weirdness)
        [[ "${PID}" =~ ^[0-9]+$ ]] || continue

        # Skip if process has exited between ps and now
        [[ -d "/proc/${PID}" ]] || continue

        # Read current cgroup membership (cgroup v2: single line "0::/path")
        CUR_CGROUP=""
        if [[ -r "/proc/${PID}/cgroup" ]]; then
            CUR_CGROUP=$(awk -F: 'NR==1 {print $3}' "/proc/${PID}/cgroup" 2>/dev/null || true)
        fi

        EXPECTED_CGROUP="/customer.slice/customer-${U}.slice"

        if [[ "${CUR_CGROUP}" == "${EXPECTED_CGROUP}" ]]; then
            # Already in the right slice — skip (idempotent).
            ALREADY_COUNT=$(( ALREADY_COUNT + 1 ))
            continue
        fi

        # Write PID to cgroup.procs (kernel moves it into the slice atomically).
        # Failure modes:
        #   - PID exited (ESRCH) → ignore
        #   - EPERM (caller lost cap) → log + continue
        #   - EINVAL (PID already part of an ancestor with non-empty children) → log + continue
        if echo "${PID}" > "${PROCS_FILE}" 2>/dev/null; then
            REBOUND_COUNT=$(( REBOUND_COUNT + 1 ))
        else
            onx_log "fpm-slice-rebind: write failed user=${U} pid=${PID} (process may have exited)"
        fi
    done

    TOTAL_REBOUND=$(( TOTAL_REBOUND + REBOUND_COUNT ))
    TOTAL_ALREADY=$(( TOTAL_ALREADY + ALREADY_COUNT ))

    # Append per-user breakdown
    PER_USER_JSON=$(echo "${PER_USER_JSON}" | jq \
        --arg u "${U}" \
        --argjson r "${REBOUND_COUNT}" \
        --argjson a "${ALREADY_COUNT}" \
        '. + {($u): {"rebound": $r, "already": $a, "missing_slice": false}}')
done

END_MS=$(date +%s%3N 2>/dev/null || echo 0)
ELAPSED_MS=$(( END_MS - START_MS ))
[[ "${ELAPSED_MS}" -lt 0 ]] && ELAPSED_MS=0

onx_log "fpm-slice-rebind: users=${#USERS[@]} rebound=${TOTAL_REBOUND} already=${TOTAL_ALREADY} skipped=${TOTAL_SKIPPED} elapsed_ms=${ELAPSED_MS}"

# ── Output ───────────────────────────────────────────────────────────────────
jq -n \
    --argjson rebound "${TOTAL_REBOUND}" \
    --argjson already "${TOTAL_ALREADY}" \
    --argjson skipped "${TOTAL_SKIPPED}" \
    --argjson per_user "${PER_USER_JSON}" \
    --argjson skipped_list "${SKIPPED_JSON}" \
    --argjson elapsed "${ELAPSED_MS}" \
    '{
        ok: true,
        total_rebound: $rebound,
        total_already_in_slice: $already,
        total_skipped: $skipped,
        per_user: $per_user,
        skipped: $skipped_list,
        elapsed_ms: $elapsed
    }'
