#!/usr/bin/env bash
# onx-db-slow-queries — MariaDB slow_query_log dosyasini parse + normalize edip
# hash bazli gruplari (cPanel/WHM'de YOK olan analyzer) JSON dondurur.
#
# Input:  {"limit":100,"range":"24h|7d|30d","database":"","user":"","min_count":0,"min_avg_time":0}
# Output: {"queries":[{...}],"log_path":"...","log_size_bytes":N,"enabled":true,"total_distinct":N}

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/_lib/common.sh"

require_root
require_cmd mysql
onx_json_input

LIMIT="$(onx_json_field limit '100')"
RANGE="$(onx_json_field range '24h')"
DB_FILTER="$(onx_json_field database '')"
USER_FILTER="$(onx_json_field user '')"
MIN_COUNT="$(onx_json_field min_count '0')"
MIN_AVG_TIME="$(onx_json_field min_avg_time '0')"

# Validation
[[ "$LIMIT" =~ ^[0-9]+$ ]] || onx_die 1 "Gecersiz limit"
[[ "$RANGE" =~ ^(24h|7d|30d|all)$ ]] || onx_die 1 "Gecersiz range (24h|7d|30d|all)"
[[ "$DB_FILTER" =~ ^[a-z0-9_]*$ ]] || onx_die 1 "Gecersiz database filtresi"
[[ "$USER_FILTER" =~ ^[a-z0-9_]*$ ]] || onx_die 1 "Gecersiz user filtresi"

onx_log "db-slow-queries: limit=${LIMIT} range=${RANGE} db_filter=${DB_FILTER} user_filter=${USER_FILTER}"

# slow_query_log_file ve enabled değerlerini MySQL'den oku
ENABLED_RAW="$(mysql_exec_root "" "SHOW GLOBAL VARIABLES LIKE 'slow_query_log'" 2>/dev/null | tail -1 | awk '{print $2}')"
LOG_PATH="$(mysql_exec_root "" "SHOW GLOBAL VARIABLES LIKE 'slow_query_log_file'" 2>/dev/null | tail -1 | awk '{print $2}')"

ENABLED="false"
if [[ "${ENABLED_RAW^^}" == "ON" ]] || [[ "$ENABLED_RAW" == "1" ]]; then
    ENABLED="true"
fi

[[ -z "$LOG_PATH" ]] && LOG_PATH="/var/lib/mysql/slow.log"

# Relative path -> datadir + filename (MariaDB default davranis)
if [[ "$LOG_PATH" != /* ]]; then
    DATADIR="$(mysql_exec_root "" "SHOW GLOBAL VARIABLES LIKE 'datadir'" 2>/dev/null | tail -1 | awk '{print $2}')"
    [[ -z "$DATADIR" ]] && DATADIR="/var/lib/mysql"
    DATADIR="${DATADIR%/}"
    LOG_PATH="${DATADIR}/${LOG_PATH}"
fi

LOG_SIZE=0
if [[ -f "$LOG_PATH" ]] && [[ -r "$LOG_PATH" ]]; then
    LOG_SIZE="$(stat -c '%s' "$LOG_PATH" 2>/dev/null || echo 0)"
fi

# Range -> max age in seconds
case "$RANGE" in
    24h) MAX_AGE_SECONDS=86400 ;;
    7d)  MAX_AGE_SECONDS=604800 ;;
    30d) MAX_AGE_SECONDS=2592000 ;;
    all) MAX_AGE_SECONDS=0 ;;
esac
NOW_TS="$(date +%s)"
MIN_TS=$((NOW_TS - MAX_AGE_SECONDS))

# Slow log dosyasi yoksa veya bossa: bos result
if [[ ! -f "$LOG_PATH" ]] || [[ ! -r "$LOG_PATH" ]] || [[ "$LOG_SIZE" -eq 0 ]]; then
    json_ok "{\"queries\":[],\"log_path\":\"${LOG_PATH}\",\"log_size_bytes\":${LOG_SIZE},\"enabled\":${ENABLED},\"total_distinct\":0}"
    exit 0
fi

# AWK ile slow log parse + normalize hash bazli grupla.
# MariaDB slow log format:
# # Time: 250524 10:01:02
# # User@Host: user[user] @ host []
# # Thread_id: NN  Schema: dbname  QC_hit: No
# # Query_time: 2.5  Lock_time: 0.0  Rows_sent: 10  Rows_examined: 1000
# # Rows_affected: 0
# use dbname;
# SET timestamp=...;
# SELECT * FROM ...;
#
# Normalize: numeric literals → ?, string literals → ?, multiple spaces → single

PARSED_JSON="$(awk -v MIN_TS="$MIN_TS" -v DB_FILTER="$DB_FILTER" -v USER_FILTER="$USER_FILTER" \
    -v MIN_COUNT="$MIN_COUNT" -v MIN_AVG_TIME="$MIN_AVG_TIME" -v LIMIT="$LIMIT" '
function escape_json(s,    out) {
    gsub(/\\/, "\\\\", s)
    gsub(/"/, "\\\"", s)
    gsub(/\t/, "\\t", s)
    gsub(/\n/, "\\n", s)
    gsub(/\r/, "\\r", s)
    return s
}
function normalize(q,    n) {
    n = tolower(q)
    gsub(/[ \t\n\r]+/, " ", n)
    gsub(/'\''[^'\'']*'\''/, "?", n)
    gsub(/[0-9]+\.[0-9]+/, "?", n)
    gsub(/\b[0-9]+\b/, "?", n)
    gsub(/^ +/, "", n)
    gsub(/ +$/, "", n)
    return n
}
function djb2_hash(s,    i, h) {
    h = 5381
    for (i = 1; i <= length(s); i++) {
        h = ((h * 33) + ord[substr(s, i, 1)]) % 4294967295
    }
    return sprintf("%x", h)
}
function ts_to_epoch(t,    Y, M, D, h, m, s, cmd, ts) {
    # MariaDB format: 250524 10:01:02
    if (t !~ /^[0-9]{6} [0-9]{2}:[0-9]{2}:[0-9]{2}$/) return 0
    Y = "20" substr(t, 1, 2)
    M = substr(t, 3, 2)
    D = substr(t, 5, 2)
    h = substr(t, 8, 2)
    m = substr(t, 11, 2)
    s = substr(t, 14, 2)
    cmd = sprintf("date -d \"%s-%s-%s %s:%s:%s\" +%%s 2>/dev/null", Y, M, D, h, m, s)
    cmd | getline ts
    close(cmd)
    return ts + 0
}
BEGIN {
    for (i = 0; i < 256; i++) ord[sprintf("%c", i)] = i
    cur_ts = 0
    cur_user = ""
    cur_host = ""
    cur_db = ""
    cur_qtime = 0
    cur_rows_examined = 0
    cur_rows_sent = 0
    cur_sql = ""
    in_query = 0
}
/^# Time: / {
    if (in_query) process_query()
    sub(/^# Time: /, "")
    cur_ts = ts_to_epoch($0)
    cur_sql = ""
    in_query = 0
    next
}
/^# User@Host: / {
    if (in_query) process_query()
    sub(/^# User@Host: /, "")
    # user[user_name] @ host []
    n = split($0, parts, " ")
    cur_user = parts[1]
    sub(/\[.*\]/, "", cur_user)
    cur_host = parts[3]
    sub(/\[.*\]/, "", cur_host)
    cur_sql = ""
    in_query = 0
    next
}
/^# Thread_id:/ {
    # Schema: dbname
    if (match($0, /Schema: [^ ]+/)) {
        cur_db = substr($0, RSTART + 8, RLENGTH - 8)
    }
    next
}
/^# Query_time:/ {
    if (match($0, /Query_time: [0-9.]+/)) {
        cur_qtime = substr($0, RSTART + 12, RLENGTH - 12) + 0
    }
    if (match($0, /Rows_examined: [0-9]+/)) {
        cur_rows_examined = substr($0, RSTART + 15, RLENGTH - 15) + 0
    }
    if (match($0, /Rows_sent: [0-9]+/)) {
        cur_rows_sent = substr($0, RSTART + 11, RLENGTH - 11) + 0
    }
    cur_sql = ""
    in_query = 1
    next
}
/^use / { next }
/^SET timestamp=/ { next }
/^# / { next }
in_query && /[^[:space:]]/ {
    if (cur_sql == "") cur_sql = $0
    else cur_sql = cur_sql " " $0
}
END {
    if (in_query) process_query()
    # Output groups as JSON-friendly TSV: hash\tcount\ttotal_time\tmax_time\tmin_time\tavg_time\tsum_rows_examined\tsum_rows_sent\tlast_seen\tsample\tdatabases\tusers
    for (h in groups) {
        if (counts[h] < MIN_COUNT + 0) continue
        avg = totals[h] / counts[h]
        if (avg < MIN_AVG_TIME + 0) continue
        if (DB_FILTER != "" && !(h SUBSEP DB_FILTER in dbs_seen)) continue
        if (USER_FILTER != "" && !(h SUBSEP USER_FILTER in users_seen)) continue
        # collect unique databases/users
        dbs_str = ""
        for (k in dbs_seen) {
            split(k, parts, SUBSEP)
            if (parts[1] == h) {
                if (dbs_str != "") dbs_str = dbs_str "|"
                dbs_str = dbs_str parts[2]
            }
        }
        users_str = ""
        for (k in users_seen) {
            split(k, parts, SUBSEP)
            if (parts[1] == h) {
                if (users_str != "") users_str = users_str "|"
                users_str = users_str parts[2]
            }
        }
        printf "%s\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%d\t%d\t%d\t%s\t%s\t%s\n",
            h, counts[h], totals[h], maxes[h], mins[h], avg, rows_examined[h], rows_sent[h], last_seens[h],
            escape_json(samples[h]), dbs_str, users_str
    }
}
function process_query() {
    if (cur_sql == "") { in_query = 0; return }
    if (cur_ts > 0 && MIN_TS > 0 && cur_ts < MIN_TS) { in_query = 0; return }
    norm = normalize(cur_sql)
    h = djb2_hash(norm)
    groups[h] = 1
    counts[h]++
    totals[h] += cur_qtime
    if (maxes[h] == "" || cur_qtime > maxes[h]) maxes[h] = cur_qtime
    if (mins[h] == "" || cur_qtime < mins[h]) mins[h] = cur_qtime
    rows_examined[h] += cur_rows_examined
    rows_sent[h] += cur_rows_sent
    if (cur_ts > (last_seens[h] + 0)) last_seens[h] = cur_ts
    if (samples[h] == "") samples[h] = cur_sql
    if (cur_db != "") dbs_seen[h SUBSEP cur_db] = 1
    if (cur_user != "") users_seen[h SUBSEP cur_user] = 1
    in_query = 0
    cur_sql = ""
}
' "$LOG_PATH" 2>/dev/null | sort -t$'\t' -k3 -rn | head -n "$LIMIT")"

# TSV -> JSON array
TOTAL_DISTINCT=0
if [[ -n "$PARSED_JSON" ]]; then
    TOTAL_DISTINCT="$(echo "$PARSED_JSON" | wc -l)"
fi

QUERIES_JSON="["
sep=""
while IFS=$'\t' read -r h count total_time max_time min_time avg_time rows_ex rows_sent last_seen sample dbs users; do
    [[ -z "$h" ]] && continue
    # databases/users -> JSON array
    dbs_arr="[]"
    if [[ -n "$dbs" ]]; then
        dbs_arr="[$(echo "$dbs" | awk -F'|' 'BEGIN{first=1} {for(i=1;i<=NF;i++){if(!first)printf ","; printf "\"%s\"",$i; first=0}}')]"
    fi
    users_arr="[]"
    if [[ -n "$users" ]]; then
        users_arr="[$(echo "$users" | awk -F'|' 'BEGIN{first=1} {for(i=1;i<=NF;i++){if(!first)printf ","; printf "\"%s\"",$i; first=0}}')]"
    fi
    QUERIES_JSON+="${sep}{\"hash\":\"${h}\",\"count\":${count},\"total_time\":${total_time},\"max_time\":${max_time},\"min_time\":${min_time},\"avg_time\":${avg_time},\"rows_examined\":${rows_ex},\"rows_sent\":${rows_sent},\"last_seen\":${last_seen},\"sample\":\"${sample}\",\"databases\":${dbs_arr},\"users\":${users_arr}}"
    sep=","
done <<< "$PARSED_JSON"
QUERIES_JSON+="]"

json_ok "{\"queries\":${QUERIES_JSON},\"log_path\":\"${LOG_PATH}\",\"log_size_bytes\":${LOG_SIZE},\"enabled\":${ENABLED},\"total_distinct\":${TOTAL_DISTINCT}}"
