quietlight/skraak_mcp: shell_scripts/test_calls_from

#!/bin/bash
# Test script for: skraak calls from-preds
# Compares output against reference JSON files (verified with Julia)
#
# Usage: ./test_calls_from_preds.sh
#
# Tests:
# 1. predsST_opensoundscape-kiwi-1.2_2025-11-12.csv (single species: Kiwi)
# 2. preds1_opensoundscape-multi-1.0_2025-07-22.csv (multi-species)
#
# The calls array is compared as a SET (order-independent), matching
# the Julia issetequal() verification used by the author.

set -euo pipefail

# Setup paths
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
DATA_DIR="$SCRIPT_DIR/data"

# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'

# Test counters
TESTS_RUN=0
TESTS_PASSED=0
TESTS_FAILED=0

# Check binary exists
if [ ! -f "$PROJECT_DIR/skraak" ]; then
    echo -e "${RED}Error: skraak binary not found. Run 'go build' first.${NC}"
    exit 1
fi

# Compare calls arrays as sets (order-independent)
# Returns 0 if equal, 1 if different
# Usage: compare_calls_as_set <actual.json> <expected.json>
compare_calls_as_set() {
    local actual="$1"
    local expected="$2"

    # Extract calls array and sort by all fields to get canonical order
    # Then compare as arrays
    local actual_sorted
    local expected_sorted

    actual_sorted=$(jq '(.calls // []) | sort_by(.file, .start_time, .end_time, .ebird_code, .segments)' "$actual" 2>/dev/null)
    expected_sorted=$(jq '(.calls // []) | sort_by(.file, .start_time, .end_time, .ebird_code, .segments)' "$expected" 2>/dev/null)

    if [ "$actual_sorted" = "$expected_sorted" ]; then
        return 0
    else
        return 1
    fi
}

# Compare metadata fields (clip_duration, gap_threshold, total_calls, species_count)
# Returns 0 if all match, 1 if any differ
# Usage: compare_metadata <actual.json> <expected.json>
compare_metadata() {
    local actual="$1"
    local expected="$2"

    # Check each metadata field
    local clip_dur_act clip_dur_exp
    local gap_thr_act gap_thr_exp
    local total_act total_exp
    local species_act species_exp

    clip_dur_act=$(jq -r '.clip_duration // "null"' "$actual")
    clip_dur_exp=$(jq -r '.clip_duration // "null"' "$expected")

    gap_thr_act=$(jq -r '.gap_threshold // "null"' "$actual")
    gap_thr_exp=$(jq -r '.gap_threshold // "null"' "$expected")

    total_act=$(jq -r '.total_calls // "null"' "$actual")
    total_exp=$(jq -r '.total_calls // "null"' "$expected")

    species_act=$(jq -r '.species_count' "$actual")
    species_exp=$(jq -r '.species_count' "$expected")

    local all_match=true

    if [ "$clip_dur_act" != "$clip_dur_exp" ]; then
        echo "    clip_duration: expected=$clip_dur_exp, actual=$clip_dur_act"
        all_match=false
    fi

    if [ "$gap_thr_act" != "$gap_thr_exp" ]; then
        echo "    gap_threshold: expected=$gap_thr_exp, actual=$gap_thr_act"
        all_match=false
    fi

    if [ "$total_act" != "$total_exp" ]; then
        echo "    total_calls: expected=$total_exp, actual=$total_act"
        all_match=false
    fi

    if [ "$species_act" != "$species_exp" ]; then
        echo "    species_count differs"
        all_match=false
    fi

    if [ "$all_match" = true ]; then
        return 0
    else
        return 1
    fi
}

# Run a single test case
# Usage: run_test <csv_name> <csv_path> <expected_json_path>
run_test() {
    local name="$1"
    local csv_path="$2"
    local expected_json="$3"

    ((TESTS_RUN++)) || true

    echo ""
    echo "Testing: $name"
    echo "  CSV: $(basename "$csv_path")"
    echo "  Expected: $(basename "$expected_json")"

    # Create temp files for actual output
    local actual_json stderr_output
    actual_json=$(mktemp --suffix=.json)
    stderr_output=$(mktemp --suffix=.txt)

    # Run the command (capture stdout to file, stderr to variable)
    echo "  Running: skraak calls from-preds --csv ..."

    if ! "$PROJECT_DIR/skraak" calls from-preds --csv "$csv_path" --dot-data=false > "$actual_json" 2>"$stderr_output"; then
        echo -e "  ${RED}✗ Command failed${NC}"
        cat "$stderr_output"
        rm -f "$stderr_output"
        ((TESTS_FAILED++)) || true
        return
    fi

    # Show progress from stderr
    cat "$stderr_output" | head -3
    rm -f "$stderr_output"

    # Check if output is valid JSON
    if ! jq empty "$actual_json" 2>/dev/null; then
        echo -e "  ${RED}✗ Output is not valid JSON${NC}"
        ((TESTS_FAILED++)) || true
        return
    fi

    # Compare calls array as set (PRIMARY CHECK)
    local calls_match=false
    if compare_calls_as_set "$actual_json" "$expected_json"; then
        calls_match=true
    fi

    # Compare metadata
    local metadata_match=false
    local metadata_diff=""
    if compare_metadata "$actual_json" "$expected_json"; then
        metadata_match=true
    fi

    # Report results
    if [ "$calls_match" = true ]; then
        echo -e "  ${GREEN}✓ Calls array matches (set comparison)${NC}"

        # Show summary stats
        local call_count
        call_count=$(jq '.calls | length' "$actual_json")
        local species_count
        species_count=$(jq '.species_count | keys | length' "$actual_json")
        echo "    $call_count calls across $species_count species"

        if [ "$metadata_match" = true ]; then
            echo -e "  ${GREEN}✓ Metadata matches${NC}"
            ((TESTS_PASSED++)) || true
        else
            echo -e "  ${YELLOW}⚠ Metadata differs (calls array is primary)${NC}"
            compare_metadata "$actual_json" "$expected_json"
            # Still count as passed since calls match
            ((TESTS_PASSED++)) || true
        fi
    else
        echo -e "  ${RED}✗ Calls array differs${NC}"

        # Show diff stats
        local actual_count expected_count
        actual_count=$(jq '.calls | length' "$actual_json")
        expected_count=$(jq '.calls | length' "$expected_json")
        echo "    Actual calls: $actual_count, Expected calls: $expected_count"

        # Find calls in expected but not in actual
        local missing extra
        missing=$(jq -n --slurpfile exp "$expected_json" --slurpfile act "$actual_json" \
            '([$exp[0].calls | .[] | {file, start_time, end_time, ebird_code, segments}] | sort) - ([$act[0].calls | .[] | {file, start_time, end_time, ebird_code, segments}] | sort) | length')
        extra=$(jq -n --slurpfile exp "$expected_json" --slurpfile act "$actual_json" \
            '([$act[0].calls | .[] | {file, start_time, end_time, ebird_code, segments}] | sort) - ([$exp[0].calls | .[] | {file, start_time, end_time, ebird_code, segments}] | sort) | length')

        echo "    Missing from actual: $missing calls"
        echo "    Extra in actual: $extra calls"

        ((TESTS_FAILED++)) || true
    fi

    # Cleanup temp files
    rm -f "$actual_json" "$stderr_output"
}

# Print summary
print_summary() {
    echo ""
    echo "=== Summary ==="
    echo "Tests run: $TESTS_RUN"
    echo -e "Passed: ${GREEN}$TESTS_PASSED${NC}"
    if [ "$TESTS_FAILED" -gt 0 ]; then
        echo -e "Failed: ${RED}$TESTS_FAILED${NC}"
        return 1
    else
        echo -e "Failed: $TESTS_FAILED"
        return 0
    fi
}

# Main
echo "=== Testing: skraak calls from-preds ==="
echo "Comparing calls arrays as SETS (order-independent)"

# Test 1: predsST (kiwi single species)
run_test \
    "predsST (single species: Kiwi)" \
    "$DATA_DIR/predsST_opensoundscape-kiwi-1.2_2025-11-12.csv" \
    "$DATA_DIR/predsST_opensoundscape-kiwi-1.2_2025-11-12.json"

# Test 2: preds1 (multi-species)
run_test \
    "preds1 (multi-species)" \
    "$DATA_DIR/preds1_opensoundscape-multi-1.0_2025-07-22.csv" \
    "$DATA_DIR/preds1_opensoundscape-multi-1.0_2025-07-22.json"

print_summary