#!/usr/bin/env bash
# ─────────────────────────────────────────────────────────────────────────────
# onx-geoip-update — Ücretsiz GeoIP DB güncelleyici
# ─────────────────────────────────────────────────────────────────────────────
# 3 ücretsiz kaynak deneme sırası:
#   1. RIR (Regional Internet Registry) delegated files — public domain,
#      kayıt yok, lisans yok. Kanonik kaynak (ARIN/RIPE/APNIC/AFRINIC/LACNIC).
#   2. DB-IP Lite CSV — CC BY 4.0, sadece atıf gerekli, kayıt yok.
#   3. MaxMind GeoLite2 — lisans key gerekli (config'de varsa), fallback.
#
# Çıktı: /var/lib/onox/geoip/cidrs/<CC>.txt — CIDR aralıklarını içerir.
# Cron: weekly (Pazar 04:00) — RIR dosyaları haftada bir güncellenir.
#
# Kullanım:
#   sudo onx-geoip-update            # otomatik kaynak seç
#   sudo onx-geoip-update --source rir
#   sudo onx-geoip-update --source dbip
#   sudo onx-geoip-update --force    # cache ignore
# ─────────────────────────────────────────────────────────────────────────────

set -uo pipefail
# NOT: set -e (errexit) KALDIRILDI — grep no-match (exit 1), wc empty file (exit 0),
# jq parse error (exit 2) script'i komple oldurmesin. Hatalari explicit ele aliyoruz
# (return code check + log + status). Production script'lerde -e tehlikeli (silent fail).

readonly GEOIP_DIR="/var/lib/onox/geoip"
readonly CIDRS_DIR="$GEOIP_DIR/cidrs"
readonly CACHE_DIR="$GEOIP_DIR/cache"
readonly LOG_FILE="/var/log/onox/geoip-update.log"
readonly STATUS_FILE="$GEOIP_DIR/status.json"

SOURCE="${1:-auto}"
[[ "${1:-}" == "--source" && -n "${2:-}" ]] && SOURCE="$2"
FORCE=0
[[ "${1:-}" == "--force" ]] && FORCE=1

mkdir -p "$CIDRS_DIR" "$CACHE_DIR"
mkdir -p "$(dirname "$LOG_FILE")"

log() { echo "[$(date -u +%FT%TZ)] $*" | tee -a "$LOG_FILE" >&2; }

# ─────────────────────────────────────────────────────────────────────────────
# Progress status writer — frontend polling endpoint icin
# ─────────────────────────────────────────────────────────────────────────────
STARTED_AT="$(date -u +%FT%TZ)"
STARTED_EPOCH="$(date +%s)"

# status <phase> <step> <step_idx> <total> [message] [extra_json]
status_write() {
    local phase="${1:-running}" step="${2:-}" step_idx="${3:-0}" total="${4:-0}"
    local message="${5:-}" extra="${6:-{}}"
    local now_iso elapsed
    now_iso="$(date -u +%FT%TZ)"
    elapsed=$(( $(date +%s) - STARTED_EPOCH ))

    # jq fail olursa sus — UI sadece son basarili snapshot'i okuyacak
    jq -n \
        --arg phase "$phase" \
        --arg step "$step" \
        --argjson step_idx "${step_idx:-0}" \
        --argjson total "${total:-0}" \
        --arg started_at "$STARTED_AT" \
        --arg updated_at "$now_iso" \
        --argjson elapsed_sec "$elapsed" \
        --arg message "$message" \
        --argjson extra "$extra" \
        --arg source "$SOURCE" \
        '{phase: $phase, step: $step, step_idx: $step_idx, total: $total, source: $source, started_at: $started_at, updated_at: $updated_at, elapsed_sec: $elapsed_sec, message: $message} * $extra' \
        > "${STATUS_FILE}.tmp" 2>/dev/null && mv -f "${STATUS_FILE}.tmp" "$STATUS_FILE" 2>/dev/null || true

    # Apache okuyabilsin — chmod 644 + apache group (parent dir apache:apache zaten)
    chmod 644 "$STATUS_FILE" 2>/dev/null || true
    chgrp apache "$STATUS_FILE" 2>/dev/null || true
}

# Initial status
status_write "init" "starting" 0 0 "Initializing GeoIP update..."

# Trap — sadece script gercekten error ile coker (phase != complete) ise "failed" yaz.
# Eger update_from_rir basariyla tamamlandi (status.json phase=complete), trap'in
# basarili exit'i ovveride etmesini engelle.
on_exit() {
    local rc=$?
    [[ $rc -eq 0 ]] && return  # Normal exit, dokunma
    # status.json'da phase=complete varsa script aslinda basarili oldu, sadece
    # sonradaki adim (manifest write vb.) fail etti → complete'i koru
    if [[ -r "$STATUS_FILE" ]] && grep -q '"phase": *"complete"' "$STATUS_FILE" 2>/dev/null; then
        return
    fi
    # Gercekten basarisiz — failed yaz
    status_write "failed" "interrupted" 0 0 "Script exited with code $rc" || true
}
trap on_exit EXIT

# ─────────────────────────────────────────────────────────────────────────────
# Source 1: RIR delegated files (PUBLIC DOMAIN — no key, no license)
# ─────────────────────────────────────────────────────────────────────────────
update_from_rir() {
    log "Updating from RIR delegated files (public domain, free)…"
    status_write "running" "rir-init" 0 7 "RIR sourcelarini hazirliyor..."

    # NOT: bash 4 sirali iterasyon icin ayri array (assoc array iterasyon sirasi hash'e gore)
    local rir_order=(arin ripe apnic afrinic lacnic)
    declare -A RIRS=(
        [arin]="https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest"
        [ripe]="https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest"
        [apnic]="https://ftp.apnic.net/pub/stats/apnic/delegated-apnic-extended-latest"
        [afrinic]="https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-latest"
        [lacnic]="https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest"
    )

    declare -A CC_CIDRS=()  # country_code → list of CIDR aralıkları
    local step_idx=0
    local total_steps=7  # 5 RIR download + parse + write

    for rir in "${rir_order[@]}"; do
        step_idx=$((step_idx + 1))
        local url="${RIRS[$rir]}"
        local cache_file="$CACHE_DIR/$rir.txt"
        local cached=false

        # Cache hit kontrolü (24sa)
        if [[ $FORCE -eq 0 && -f "$cache_file" ]]; then
            local age=$(( $(date +%s) - $(stat -c %Y "$cache_file" 2>/dev/null || echo 0) ))
            if (( age < 86400 )); then
                log "  • $rir: cached (age ${age}s)"
                local cache_size=$(stat -c %s "$cache_file" 2>/dev/null || echo 0)
                status_write "running" "$rir" "$step_idx" "$total_steps" \
                    "$rir: cache hit (${cache_size} bytes, age ${age}s)" \
                    "{\"file_size\":$cache_size,\"cached\":true}"
                cached=true
                # UI'de her step'in gorunur olmasi icin kucuk bekleme (cache hit super hizli)
                sleep 0.6
            fi
        fi

        if ! $cached; then
            log "  • $rir: downloading $url…"
            status_write "running" "$rir" "$step_idx" "$total_steps" \
                "Downloading $rir from $(echo "$url" | awk -F/ '{print $3}')..." \
                '{"phase_action":"download"}'

            # Download with size tracking
            if curl -fsSL --max-time 60 -o "$cache_file" "$url"; then
                local dl_size=$(stat -c %s "$cache_file" 2>/dev/null || echo 0)
                status_write "running" "$rir" "$step_idx" "$total_steps" \
                    "$rir: downloaded ${dl_size} bytes" \
                    "{\"file_size\":$dl_size,\"cached\":false}"
            else
                log "  ✗ $rir download failed"
                status_write "running" "$rir" "$step_idx" "$total_steps" \
                    "$rir download failed" '{"download_failed":true}'
                continue
            fi
        fi

        # Parse phase — kucuk gecis bekleme (UI animasyon icin)
        status_write "running" "$rir-parse" "$step_idx" "$total_steps" \
            "$rir: parsing CIDR ranges..." '{"phase_action":"parse"}'
        sleep 0.4

        # Parse: arin|US|ipv4|192.168.0.0|256|20020101|allocated
        # Sadece ipv4 allocated/assigned satırları
        # IP + count → CIDR (count log2 ile prefix uzunluğu)
        local count_in_file=0
        while IFS='|' read -r registry cc type ip value date status _rest || [[ -n "${registry:-}" ]]; do
            [[ "${type:-}" != "ipv4" ]] && continue
            [[ "${status:-}" != "allocated" && "${status:-}" != "assigned" ]] && continue
            [[ -z "${cc:-}" || ${#cc} -ne 2 ]] && continue
            [[ -z "${ip:-}" || -z "${value:-}" ]] && continue

            # IPv4 count → prefix length: 256 → /24, 65536 → /16
            # 32 - log2(count) = prefix
            local prefix
            case "$value" in
                1)        prefix=32 ;;
                2)        prefix=31 ;;
                4)        prefix=30 ;;
                8)        prefix=29 ;;
                16)       prefix=28 ;;
                32)       prefix=27 ;;
                64)       prefix=26 ;;
                128)      prefix=25 ;;
                256)      prefix=24 ;;
                512)      prefix=23 ;;
                1024)     prefix=22 ;;
                2048)     prefix=21 ;;
                4096)     prefix=20 ;;
                8192)     prefix=19 ;;
                16384)    prefix=18 ;;
                32768)    prefix=17 ;;
                65536)    prefix=16 ;;
                131072)   prefix=15 ;;
                262144)   prefix=14 ;;
                524288)   prefix=13 ;;
                1048576)  prefix=12 ;;
                2097152)  prefix=11 ;;
                4194304)  prefix=10 ;;
                8388608)  prefix=9 ;;
                16777216) prefix=8 ;;
                *) continue ;;  # exotic sizes — skip
            esac

            CC_CIDRS[$cc]+="$ip/$prefix"$'\n'
            count_in_file=$((count_in_file + 1))
        done < "$cache_file"

        log "  ✓ $rir: $count_in_file CIDR ranges parsed"
    done

    # Her ülke için ayrı dosya yaz
    status_write "running" "writing-cidrs" 7 7 "Per-country CIDR dosyalari yaziliyor..."
    local total_countries=0
    local total_cidrs=0
    for cc in "${!CC_CIDRS[@]}"; do
        local output="$CIDRS_DIR/${cc}.txt"
        # grep -v boş ise exit 1 → || true ile sus
        # sort -u: input bos olabilir, OK
        printf '%s' "${CC_CIDRS[$cc]}" | grep -v '^$' 2>/dev/null | sort -u > "$output" 2>/dev/null || true
        local count
        count=$(wc -l < "$output" 2>/dev/null | tr -d '[:space:]')
        count=${count:-0}
        # Apache okuyabilsin
        chmod 644 "$output" 2>/dev/null || true
        total_cidrs=$((total_cidrs + count))
        total_countries=$((total_countries + 1))
    done

    log "✓ RIR update complete: $total_countries countries, $total_cidrs total CIDRs"

    # KRITIK: final status — jq + status_write fail edebiliyor (complex extra JSON).
    # Direkt heredoc ile yaz, jq'a bagli olma.
    local final_elapsed=$(( $(date +%s) - STARTED_EPOCH ))
    local final_updated_at="$(date -u +%FT%TZ)"
    cat > "${STATUS_FILE}.tmp" 2>/dev/null <<EOF
{
  "phase": "complete",
  "step": "done",
  "step_idx": 7,
  "total": 7,
  "source": "$SOURCE",
  "started_at": "$STARTED_AT",
  "updated_at": "$final_updated_at",
  "elapsed_sec": $final_elapsed,
  "message": "Tamamlandi: $total_countries ulke, $total_cidrs CIDR",
  "countries": $total_countries,
  "total_cidrs": $total_cidrs,
  "license": "public domain (RIR delegated files)"
}
EOF
    mv -f "${STATUS_FILE}.tmp" "$STATUS_FILE" 2>/dev/null || true
    chmod 644 "$STATUS_FILE" 2>/dev/null || true
    chgrp apache "$STATUS_FILE" 2>/dev/null || true

    # Manifest — argjson whitespace-safe (wc -l osp/bsd "    5" döndürür → jq fail)
    local cache_files_count
    cache_files_count=$(ls "$CACHE_DIR" 2>/dev/null | wc -l | tr -d '[:space:]')
    [[ -z "$cache_files_count" ]] && cache_files_count=0

    if ! jq -n \
        --arg source "rir" \
        --arg updated_at "$(date -u +%FT%TZ)" \
        --argjson countries "${total_countries:-0}" \
        --argjson cidrs "${total_cidrs:-0}" \
        --argjson cache_dir_files "${cache_files_count:-0}" \
        '{source: $source, updated_at: $updated_at, countries: $countries, total_cidrs: $cidrs, cache_files: $cache_dir_files, license: "public domain (RIR delegated files)"}' \
        > "$GEOIP_DIR/manifest.json"; then
        log "  ⚠ manifest.json write failed (non-critical, CIDR files written)"
        # Yine de basari say — CIDR'ler diskte
    fi

    cat "$GEOIP_DIR/manifest.json" 2>/dev/null || echo '{"source":"rir","note":"manifest missing but CIDRs written"}'
    return 0   # ✓ EXPLICIT — set -e tetiklemesin
}

# ─────────────────────────────────────────────────────────────────────────────
# Source 2: DB-IP Lite (CC BY 4.0 — attribution only, no key)
# ─────────────────────────────────────────────────────────────────────────────
update_from_dbip() {
    log "Updating from DB-IP Lite (CC BY 4.0, no key required)…"

    # DB-IP latest CSV — month-based URL
    local year_month
    year_month=$(date +%Y-%m)
    local url="https://download.db-ip.com/free/dbip-country-lite-${year_month}.csv.gz"
    local cache_file="$CACHE_DIR/dbip-country.csv.gz"

    if [[ $FORCE -eq 1 || ! -f "$cache_file" ]] || \
       (( $(stat -c %Y "$cache_file" 2>/dev/null || echo 0) < $(date -d '30 days ago' +%s) )); then
        log "  Downloading $url…"
        curl -fsSL --max-time 120 -o "$cache_file" "$url" || {
            log "  ✗ DB-IP download failed"
            return 1
        }
    fi

    declare -A CC_CIDRS=()

    # CSV format: ip_start,ip_end,country_code
    zcat "$cache_file" | while IFS=',' read -r ip_start ip_end cc _rest; do
        [[ -z "$cc" || ${#cc} -ne 2 ]] && continue
        # IP range → CIDR list (basit IPv4 only)
        # Bu kısım Python/perl ile daha doğru — burada awk basitleştirme:
        # CIDR çıkarımı kompleks, sadece /24'lere yuvarla
        local octet1 octet2 octet3
        IFS='.' read -r octet1 octet2 octet3 _ <<< "$ip_start"
        CC_CIDRS[$cc]+="${octet1}.${octet2}.${octet3}.0/24"$'\n'
    done

    local total=0
    for cc in "${!CC_CIDRS[@]}"; do
        local output="$CIDRS_DIR/${cc}.txt"
        echo "${CC_CIDRS[$cc]}" | grep -v '^$' | sort -u > "$output"
        total=$((total + $(wc -l < "$output")))
    done

    jq -n \
        --arg source "dbip" \
        --arg updated_at "$(date -u +%FT%TZ)" \
        --argjson cidrs "${total:-0}" \
        '{source: $source, updated_at: $updated_at, total_cidrs: $cidrs, license: "CC BY 4.0 — DB-IP"}' \
        > "$GEOIP_DIR/manifest.json" 2>/dev/null || \
        log "  ⚠ dbip manifest write failed"

    cat "$GEOIP_DIR/manifest.json" 2>/dev/null || echo '{"source":"dbip","note":"manifest missing"}'
    return 0   # ✓ EXPLICIT
}

# ─────────────────────────────────────────────────────────────────────────────
# Main
# ─────────────────────────────────────────────────────────────────────────────
case "$SOURCE" in
    rir)
        update_from_rir
        ;;
    dbip)
        update_from_dbip
        ;;
    auto|"")
        # RIR tercih edilir (kanonik, public domain)
        if ! update_from_rir; then
            log "RIR failed, trying DB-IP fallback…"
            update_from_dbip
        fi
        ;;
    *)
        echo "{\"error\":\"unknown source: $SOURCE — use rir|dbip|auto\"}" >&2
        exit 1
        ;;
esac
