#!/bin/bash
# Test script for: skraak calls from-preds
# Compares output against reference JSON files (verified with Julia)
#
# Usage: ./test_calls_from_preds.sh
#
# Tests:
# 1. predsST_opensoundscape-kiwi-1.2_2025-11-12.csv (single species: Kiwi)
# 2. preds1_opensoundscape-multi-1.0_2025-07-22.csv (multi-species)
#
# The calls array is compared as a SET (order-independent), matching
# the Julia issetequal() verification used by the author.
set -euo pipefail
# Setup paths
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
DATA_DIR="$SCRIPT_DIR/data"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
# Test counters
TESTS_RUN=0
TESTS_PASSED=0
TESTS_FAILED=0
# Check binary exists
if [ ! -f "$PROJECT_DIR/skraak" ]; then
echo -e "${RED}Error: skraak binary not found. Run 'go build' first.${NC}"
exit 1
fi
# Compare calls arrays as sets (order-independent)
# Returns 0 if equal, 1 if different
# Usage: compare_calls_as_set <actual.json> <expected.json>
compare_calls_as_set() {
local actual="$1"
local expected="$2"
# Extract calls array and sort by all fields to get canonical order
# Then compare as arrays
local actual_sorted
local expected_sorted
actual_sorted=$(jq '(.calls // []) | sort_by(.file, .start_time, .end_time, .ebird_code, .segments)' "$actual" 2>/dev/null)
expected_sorted=$(jq '(.calls // []) | sort_by(.file, .start_time, .end_time, .ebird_code, .segments)' "$expected" 2>/dev/null)
if [ "$actual_sorted" = "$expected_sorted" ]; then
return 0
else
return 1
fi
}
# Compare metadata fields (clip_duration, gap_threshold, total_calls, species_count)
# Returns 0 if all match, 1 if any differ
# Usage: compare_metadata <actual.json> <expected.json>
compare_metadata() {
local actual="$1"
local expected="$2"
# Check each metadata field
local clip_dur_act clip_dur_exp
local gap_thr_act gap_thr_exp
local total_act total_exp
local species_act species_exp
clip_dur_act=$(jq -r '.clip_duration // "null"' "$actual")
clip_dur_exp=$(jq -r '.clip_duration // "null"' "$expected")
gap_thr_act=$(jq -r '.gap_threshold // "null"' "$actual")
gap_thr_exp=$(jq -r '.gap_threshold // "null"' "$expected")
total_act=$(jq -r '.total_calls // "null"' "$actual")
total_exp=$(jq -r '.total_calls // "null"' "$expected")
species_act=$(jq -r '.species_count' "$actual")
species_exp=$(jq -r '.species_count' "$expected")
local all_match=true
if [ "$clip_dur_act" != "$clip_dur_exp" ]; then
echo " clip_duration: expected=$clip_dur_exp, actual=$clip_dur_act"
all_match=false
fi
if [ "$gap_thr_act" != "$gap_thr_exp" ]; then
echo " gap_threshold: expected=$gap_thr_exp, actual=$gap_thr_act"
all_match=false
fi
if [ "$total_act" != "$total_exp" ]; then
echo " total_calls: expected=$total_exp, actual=$total_act"
all_match=false
fi
if [ "$species_act" != "$species_exp" ]; then
echo " species_count differs"
all_match=false
fi
if [ "$all_match" = true ]; then
return 0
else
return 1
fi
}
# Run a single test case
# Usage: run_test <csv_name> <csv_path> <expected_json_path>
run_test() {
local name="$1"
local csv_path="$2"
local expected_json="$3"
((TESTS_RUN++)) || true
echo ""
echo "Testing: $name"
echo " CSV: $(basename "$csv_path")"
echo " Expected: $(basename "$expected_json")"
# Create temp files for actual output
local actual_json stderr_output
actual_json=$(mktemp --suffix=.json)
stderr_output=$(mktemp --suffix=.txt)
# Run the command (capture stdout to file, stderr to variable)
echo " Running: skraak calls from-preds --csv ..."
if ! "$PROJECT_DIR/skraak" calls from-preds --csv "$csv_path" > "$actual_json" 2>"$stderr_output"; then
echo -e " ${RED}✗ Command failed${NC}"
cat "$stderr_output"
rm -f "$stderr_output"
((TESTS_FAILED++)) || true
return
fi
# Show progress from stderr
cat "$stderr_output" | head -3
rm -f "$stderr_output"
# Check if output is valid JSON
if ! jq empty "$actual_json" 2>/dev/null; then
echo -e " ${RED}✗ Output is not valid JSON${NC}"
((TESTS_FAILED++)) || true
return
fi
# Compare calls array as set (PRIMARY CHECK)
local calls_match=false
if compare_calls_as_set "$actual_json" "$expected_json"; then
calls_match=true
fi
# Compare metadata
local metadata_match=false
local metadata_diff=""
if compare_metadata "$actual_json" "$expected_json"; then
metadata_match=true
fi
# Report results
if [ "$calls_match" = true ]; then
echo -e " ${GREEN}✓ Calls array matches (set comparison)${NC}"
# Show summary stats
local call_count
call_count=$(jq '.calls | length' "$actual_json")
local species_count
species_count=$(jq '.species_count | keys | length' "$actual_json")
echo " $call_count calls across $species_count species"
if [ "$metadata_match" = true ]; then
echo -e " ${GREEN}✓ Metadata matches${NC}"
((TESTS_PASSED++)) || true
else
echo -e " ${YELLOW}⚠ Metadata differs (calls array is primary)${NC}"
compare_metadata "$actual_json" "$expected_json"
# Still count as passed since calls match
((TESTS_PASSED++)) || true
fi
else
echo -e " ${RED}✗ Calls array differs${NC}"
# Show diff stats
local actual_count expected_count
actual_count=$(jq '.calls | length' "$actual_json")
expected_count=$(jq '.calls | length' "$expected_json")
echo " Actual calls: $actual_count, Expected calls: $expected_count"
# Find calls in expected but not in actual
local missing extra
missing=$(jq -n --slurpfile exp "$expected_json" --slurpfile act "$actual_json" \
'([$exp[0].calls | .[] | {file, start_time, end_time, ebird_code, segments}] | sort) - ([$act[0].calls | .[] | {file, start_time, end_time, ebird_code, segments}] | sort) | length')
extra=$(jq -n --slurpfile exp "$expected_json" --slurpfile act "$actual_json" \
'([$act[0].calls | .[] | {file, start_time, end_time, ebird_code, segments}] | sort) - ([$exp[0].calls | .[] | {file, start_time, end_time, ebird_code, segments}] | sort) | length')
echo " Missing from actual: $missing calls"
echo " Extra in actual: $extra calls"
((TESTS_FAILED++)) || true
fi
# Cleanup temp files
rm -f "$actual_json" "$stderr_output"
}
# Print summary
print_summary() {
echo ""
echo "=== Summary ==="
echo "Tests run: $TESTS_RUN"
echo -e "Passed: ${GREEN}$TESTS_PASSED${NC}"
if [ "$TESTS_FAILED" -gt 0 ]; then
echo -e "Failed: ${RED}$TESTS_FAILED${NC}"
return 1
else
echo -e "Failed: $TESTS_FAILED"
return 0
fi
}
# Main
echo "=== Testing: skraak calls from-preds ==="
echo "Comparing calls arrays as SETS (order-independent)"
# Test 1: predsST (kiwi single species)
run_test \
"predsST (single species: Kiwi)" \
"$DATA_DIR/predsST_opensoundscape-kiwi-1.2_2025-11-12.csv" \
"$DATA_DIR/predsST_opensoundscape-kiwi-1.2_2025-11-12.json"
# Test 2: preds1 (multi-species)
run_test \
"preds1 (multi-species)" \
"$DATA_DIR/preds1_opensoundscape-multi-1.0_2025-07-22.csv" \
"$DATA_DIR/preds1_opensoundscape-multi-1.0_2025-07-22.json"
print_summary