#!/usr/bin/env bash
# =============================================================================
# onx-cgroup-usage-read — Per-customer cgroup metric snapshot reader
#
# v86.5 — Resource usage poller (time-series snapshot source)
#
# Purpose:
#   Reads instantaneous cgroup v2 metrics from a per-customer slice path:
#     /sys/fs/cgroup/onoxsoft.slice/customer-<user>.slice/
#
#   Two modes:
#     SINGLE — {"username":"onx_xyz"}     → one slice
#     BULK   — {} (no username)           → iterate all customer-*.slice dirs
#
#   Designed for high-frequency polling (per-minute cron); aims to complete
#   under 500 ms even with 100+ users. No subshells per file (mapfile +
#   builtins). Bulk mode opens each cgroup.controllers once with `read` only.
#
#   Slice file → metric mapping:
#     memory.current          → memory_current_bytes
#     memory.max              → memory_max_bytes      ("max" → 0)
#     memory.peak             → memory_peak_bytes     (cgroup v2 ≥ 5.13)
#     cpu.stat (usage_usec)   → cpu_usage_usec        (monoton cumulative)
#     cpu.pressure (some av10)→ cpu_pressure_avg10    ("some avg10=N.NN ...")
#     io.stat (rbytes/wbytes) → io_read_bytes/io_write_bytes (Σ all devs)
#     pids.current            → pids_current
#     pids.max                → pids_max              ("max" → 0)
#     memory.events (oom_kill)→ oom_kill_count        (cumulative)
#
# Input (stdin JSON — optional):
#   SINGLE: {"username":"onx_leafport"}
#   BULK:   {}                            (iterate /sys/fs/cgroup/onoxsoft.slice/customer-*.slice)
#
# Output (stdout JSON):
#   SINGLE:
#     {
#       "ok": true,
#       "username": "onx_leafport",
#       "active": true,
#       "snapshot": {
#         "memory_current_bytes": 268435456,
#         "memory_max_bytes": 1073741824,
#         "memory_peak_bytes": 805306368,
#         "cpu_usage_usec": 18253411,
#         "cpu_pressure_avg10": 0.42,
#         "io_read_bytes": 104857600,
#         "io_write_bytes": 52428800,
#         "pids_current": 12,
#         "pids_max": 100,
#         "oom_kill_count": 0
#       }
#     }
#   BULK:
#     {
#       "ok": true,
#       "users": {
#         "onx_leafport": { ...same snapshot fields... },
#         "onx_other":    { ... }
#       },
#       "count": 2
#     }
#
#   Slice missing in SINGLE mode → ok:true active:false snapshot:null (graceful).
#
# Exit codes:
#   0  success (incl. empty bulk result)
#   1  invalid input (bad JSON / invalid username)
#   2  preflight fail (jq missing / non-Linux)
#   3  internal error
#
# Deployed to: /usr/local/onoxsoft/bin/onx-cgroup-usage-read
# =============================================================================

set -euo pipefail

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
# shellcheck source=_lib/common.sh
source "${SCRIPT_DIR}/_lib/common.sh"

command -v jq >/dev/null 2>&1 || { printf '{"error":"jq required"}\n' >&2; exit 2; }
# Read-only operation — root is not strictly required (cgroup files are world-
# readable on default cgroup v2 mounts) but most schedulers run as root anyway.

readonly CGROUP_ROOT="/sys/fs/cgroup/customer.slice"

# ── Read stdin (optional — bulk mode if empty) ───────────────────────────────
INPUT="$(cat 2>/dev/null || true)"
[[ -z "${INPUT}" ]] && INPUT='{}'

echo "${INPUT}" | jq -e 'type == "object"' >/dev/null 2>&1 \
    || onx_die 1 "stdin is not a valid JSON object"

USERNAME="$(onx_json_get "${INPUT}" "username")"

# ─────────────────────────────────────────────────────────────────────────────
# Read a single metric value from a cgroup file. Prints integer to stdout.
# - File missing or unreadable → 0
# - "max" (limit unset) → 0
# - First whitespace-token only (handles multi-line cpu.stat etc.)
# ─────────────────────────────────────────────────────────────────────────────
_read_int_file() {
    local path="${1}"
    local default="${2:-0}"
    local val
    [[ -r "${path}" ]] || { printf '%s' "${default}"; return; }
    read -r val < "${path}" 2>/dev/null || val=""
    if [[ -z "${val}" || "${val}" == "max" ]]; then
        printf '%s' "${default}"
    elif [[ "${val}" =~ ^[0-9]+$ ]]; then
        printf '%s' "${val}"
    else
        # Multi-token file (e.g. cpu.stat first line) — take first int token
        local first
        first="$(printf '%s' "${val}" | awk '{print $1+0}')"
        printf '%s' "${first:-${default}}"
    fi
}

# Read `usage_usec` from cpu.stat (multi-line key-value file).
_read_cpu_usage_usec() {
    local path="${1}/cpu.stat"
    [[ -r "${path}" ]] || { printf '0'; return; }
    local v
    v="$(awk '$1=="usage_usec"{print $2; exit}' "${path}" 2>/dev/null)"
    [[ -z "${v}" ]] && v=0
    printf '%s' "${v}"
}

# Read `some avg10` from cpu.pressure (PSI: "some avg10=N.NN avg60=... avg300=... total=...").
# Returns float (2 decimals) or "0.00" if PSI not enabled / file missing.
_read_cpu_pressure_avg10() {
    local path="${1}/cpu.pressure"
    [[ -r "${path}" ]] || { printf '0.00'; return; }
    local v
    v="$(awk '$1=="some"{
                for(i=2;i<=NF;i++){
                    if($i ~ /^avg10=/){
                        sub(/^avg10=/,"",$i); print $i; exit
                    }
                }
              }' "${path}" 2>/dev/null)"
    [[ -z "${v}" ]] && v="0.00"
    printf '%s' "${v}"
}

# Aggregate rbytes + wbytes across all devices in io.stat.
# io.stat lines: "MAJ:MIN rbytes=N wbytes=N rios=N wios=N dbytes=N dios=N"
# Emits "rbytes wbytes" (space-separated).
_read_io_aggregate() {
    local path="${1}/io.stat"
    [[ -r "${path}" ]] || { printf '0 0'; return; }
    awk '{
        for(i=2;i<=NF;i++){
            if($i ~ /^rbytes=/){ sub(/^rbytes=/,"",$i); r+=$i }
            else if($i ~ /^wbytes=/){ sub(/^wbytes=/,"",$i); w+=$i }
        }
    } END { printf("%d %d", r+0, w+0) }' "${path}" 2>/dev/null || printf '0 0'
}

# Read `oom_kill` counter from memory.events.
_read_oom_kill() {
    local path="${1}/memory.events"
    [[ -r "${path}" ]] || { printf '0'; return; }
    local v
    v="$(awk '$1=="oom_kill"{print $2; exit}' "${path}" 2>/dev/null)"
    [[ -z "${v}" ]] && v=0
    printf '%s' "${v}"
}

# ─────────────────────────────────────────────────────────────────────────────
# Build a JSON snapshot object for a single slice directory.
# Args: $1 = slice directory (absolute)
# Stdout: jq -nc compact JSON object (or `null` if dir does not exist).
# ─────────────────────────────────────────────────────────────────────────────
build_snapshot_json() {
    local dir="${1}"
    if [[ ! -d "${dir}" ]]; then
        printf 'null'
        return
    fi

    local mem_cur mem_max mem_peak cpu_usec cpu_p10 io pids_cur pids_max oom
    mem_cur="$(_read_int_file "${dir}/memory.current" 0)"
    mem_max="$(_read_int_file "${dir}/memory.max" 0)"
    mem_peak="$(_read_int_file "${dir}/memory.peak" 0)"
    cpu_usec="$(_read_cpu_usage_usec "${dir}")"
    cpu_p10="$(_read_cpu_pressure_avg10 "${dir}")"
    io="$(_read_io_aggregate "${dir}")"
    local io_r="${io% *}"
    local io_w="${io#* }"
    pids_cur="$(_read_int_file "${dir}/pids.current" 0)"
    pids_max="$(_read_int_file "${dir}/pids.max" 0)"
    oom="$(_read_oom_kill "${dir}")"

    jq -nc \
        --argjson mc "${mem_cur:-0}" \
        --argjson mm "${mem_max:-0}" \
        --argjson mp "${mem_peak:-0}" \
        --argjson cu "${cpu_usec:-0}" \
        --argjson cp "${cpu_p10:-0}" \
        --argjson ir "${io_r:-0}" \
        --argjson iw "${io_w:-0}" \
        --argjson pc "${pids_cur:-0}" \
        --argjson pm "${pids_max:-0}" \
        --argjson ok "${oom:-0}" \
        '{
            memory_current_bytes: $mc,
            memory_max_bytes:     $mm,
            memory_peak_bytes:    $mp,
            cpu_usage_usec:       $cu,
            cpu_pressure_avg10:   $cp,
            io_read_bytes:        $ir,
            io_write_bytes:       $iw,
            pids_current:         $pc,
            pids_max:             $pm,
            oom_kill_count:       $ok
        }'
}

# ── Mode dispatch ────────────────────────────────────────────────────────────

if [[ -n "${USERNAME}" ]]; then
    # ── SINGLE mode ──────────────────────────────────────────────────────────
    onx_validate_username "${USERNAME}"

    SLICE_DIR="${CGROUP_ROOT}/customer-${USERNAME}.slice"

    if [[ ! -d "${SLICE_DIR}" ]]; then
        # Graceful: slice not active (account just created, or not provisioned)
        jq -nc \
            --arg user "${USERNAME}" \
            --arg slice "${SLICE_DIR}" \
            '{ ok: true, username: $user, active: false, snapshot: null, slice_path: $slice }'
        exit 0
    fi

    SNAPSHOT="$(build_snapshot_json "${SLICE_DIR}")"
    onx_log "cgroup-usage-read: ${USERNAME} ok"

    jq -nc \
        --arg user "${USERNAME}" \
        --arg slice "${SLICE_DIR}" \
        --argjson snap "${SNAPSHOT}" \
        '{ ok: true, username: $user, active: true, snapshot: $snap, slice_path: $slice }'
    exit 0
fi

# ── BULK mode ────────────────────────────────────────────────────────────────
# Iterate /sys/fs/cgroup/onoxsoft.slice/customer-*.slice  (graceful: root may not exist yet)

if [[ ! -d "${CGROUP_ROOT}" ]]; then
    jq -nc --arg root "${CGROUP_ROOT}" \
        '{ ok: true, users: {}, count: 0, note: "onoxsoft.slice not bootstrapped", slice_root: $root }'
    exit 0
fi

# Build accumulator JSON object
USERS_JSON='{}'
COUNT=0

# `nullglob`-style emptiness handling without affecting outer shopt state:
# expand explicitly and skip the literal pattern when nothing matches.
shopt -s nullglob
SLICE_DIRS=( "${CGROUP_ROOT}"/customer-*.slice )
shopt -u nullglob

for SLICE_DIR in "${SLICE_DIRS[@]}"; do
    # Extract username from customer-<user>.slice
    BASE="$(basename "${SLICE_DIR}")"
    USER_PART="${BASE#customer-}"
    USER_PART="${USER_PART%.slice}"

    # Skip non-onx_ entries (defensive — top-level customer slices should
    # always be onx_*, but we don't want a malformed dir to crash the run).
    [[ "${USER_PART}" =~ ^onx_[a-z0-9]{4,12}$ ]] || continue

    SNAP="$(build_snapshot_json "${SLICE_DIR}")"
    # Merge { "<user>": snap } into accumulator
    USERS_JSON="$(jq -c --arg u "${USER_PART}" --argjson s "${SNAP}" '. + { ($u): $s }' <<<"${USERS_JSON}")"
    COUNT=$(( COUNT + 1 ))
done

onx_log "cgroup-usage-read: bulk count=${COUNT}"

jq -nc \
    --argjson users "${USERS_JSON}" \
    --argjson count "${COUNT}" \
    --arg root "${CGROUP_ROOT}" \
    '{ ok: true, users: $users, count: $count, slice_root: $root }'

exit 0
