test_clip_labels.sh
#!/bin/bash
# Test skraak calls clip-labels
# Compares output against reference CSVs in clip-labels_test_data/
#
# Two test cases:
# 1. Normal (OPSO-equivalent): output matches clip_labels_opso.csv
# 2. __IGNORE__ mapping: D03 clips overlapping the ignored segment are excluded,
# but the file is not dropped entirely
#
# Note: removes clip_labels.csv and clip_labels_ignore.csv before each run
# because the command appends and checks for duplicates.
source "$(dirname "$0")/test_lib.sh"
TEST_DIR="$SCRIPT_DIR/clip-labels_test_data"
echo "=== Testing skraak calls clip-labels ==="
echo ""
check_binary
cd "$TEST_DIR"
# ── Test 1: OPSO-equivalent output ──────────────────────────────────────
echo "Test 1: OPSO-equivalent output"
rm -f ./clip_labels.csv
"$PROJECT_DIR/skraak" calls clip-labels \
--folder . --mapping ./mapping.json \
--clip-duration 5 --clip-overlap 0 --min-label-overlap 0.25 --final-clip full \
--output ./clip_labels.csv 2>/dev/null
# Compare: sort both, skip header
diff_output=$(diff <(tail -n +2 clip_labels_opso.csv | sort) \
<(tail -n +2 clip_labels.csv | sort))
if [ -z "$diff_output" ]; then
echo -e " ${GREEN}✓${NC} clip_labels.csv matches clip_labels_opso.csv (sorted, prefix-normalised)"
((TESTS_PASSED++)) || true
else
echo -e " ${RED}✗${NC} clip_labels.csv differs from clip_labels_opso.csv"
echo "$diff_output" | head -20
((TESTS_FAILED++)) || true
fi
((TESTS_RUN++)) || true
# ── Test 2: __IGNORE__ mapping ──────────────────────────────────────────
echo "Test 2: __IGNORE__ mapping (D03 segment skipped, file kept)"
rm -f ./clip_labels_ignore.csv
"$PROJECT_DIR/skraak" calls clip-labels \
--folder . --mapping ./mapping_ignore.json \
--clip-duration 5 --clip-overlap 0 --min-label-overlap 0.25 --final-clip full \
--output ./clip_labels_ignore.csv 2>/dev/null
# With __IGNORE__, clips overlapping the Don't Know segment (777-860s)
# in D03 are excluded, but D03's other clips are still emitted.
# The non-D03 rows should be identical to opso.
diff_output=$(diff <(grep -v "D03" clip_labels_opso.csv | sort) \
<(grep -v "D03" clip_labels_ignore.csv | sort))
if [ -z "$diff_output" ]; then
echo -e " ${GREEN}✓${NC} non-D03 rows match between ignore and opso"
((TESTS_PASSED++)) || true
else
echo -e " ${RED}✗${NC} non-D03 rows differ between ignore and opso"
echo "$diff_output" | head -20
((TESTS_FAILED++)) || true
fi
((TESTS_RUN++)) || true
# Verify D03 IS present in ignore output (file not dropped)
if grep -q "D03" clip_labels_ignore.csv; then
echo -e " ${GREEN}✓${NC} D03 rows present in clip_labels_ignore.csv (file not dropped)"
((TESTS_PASSED++)) || true
else
echo -e " ${RED}✗${NC} D03 rows missing from clip_labels_ignore.csv (file should be kept)"
((TESTS_FAILED++)) || true
fi
((TESTS_RUN++)) || true
# Verify D03 clips overlapping the __IGNORE__ segment (775-860s) are excluded
d03_ignore=$(grep "D03" clip_labels_ignore.csv | wc -l)
d03_opso=$(grep "D03" clip_labels_opso.csv | wc -l)
if [ "$d03_ignore" -lt "$d03_opso" ]; then
echo -e " ${GREEN}✓${NC} D03 clips reduced: $d03_ignore in ignore vs $d03_opso in opso (overlapping clips excluded)"
((TESTS_PASSED++)) || true
else
echo -e " ${RED}✗${NC} D03 clips not reduced: $d03_ignore in ignore vs $d03_opso in opso"
((TESTS_FAILED++)) || true
fi
((TESTS_RUN++)) || true
# Verify no D03 clips in the 775-860s range appear in ignore output
d03_in_range=$(grep "D03" clip_labels_ignore.csv | awk -F, '{split($2,a,"."); if ($2+0 >= 775 && $2+0 < 860) print}' | wc -l)
if [ "$d03_in_range" -eq 0 ]; then
echo -e " ${GREEN}✓${NC} No D03 clips in 775-860s range (correctly excluded)"
((TESTS_PASSED++)) || true
else
echo -e " ${RED}✗${NC} Found $d03_in_range D03 clips in 775-860s range (should be excluded)"
((TESTS_FAILED++)) || true
fi
((TESTS_RUN++)) || true
echo ""
print_summary