test_mapping_validation.sh
#!/bin/bash
# Test ValidateMappingAgainstDB via the `import segments` CLI command
# Creates temporary .data files and mapping.json, then validates mapping errors
# Uses fresh copy of production DB in /tmp (auto-cleaned)
source "$(dirname "$0")/test_lib.sh"
echo "=== Testing Mapping Validation ==="
echo ""
check_binary
# Create fresh test database
DB_PATH=$(fresh_test_db)
trap "cleanup_test_db '$DB_PATH'" EXIT
echo "Using fresh test database: $DB_PATH"
echo ""
SKRAAK="$PROJECT_DIR/skraak"
# Create test entities
echo "Setup: Creating test dataset, location, cluster"
DATASET_RESULT=$($SKRAAK create dataset --db "$DB_PATH" --name "Mapping Validation Test" --type structured 2>/dev/null)
DATASET_ID=$(echo "$DATASET_RESULT" | jq -r '.dataset.id // empty')
if [ -z "$DATASET_ID" ]; then
echo -e "${RED}✗ Failed to create test dataset${NC}"
exit 1
fi
LOCATION_RESULT=$($SKRAAK create location --db "$DB_PATH" --dataset "$DATASET_ID" --name "MapTest Site" --lat -41.2865 --lon 174.7762 --timezone "Pacific/Auckland" 2>/dev/null)
LOCATION_ID=$(echo "$LOCATION_RESULT" | jq -r '.location.id // empty')
if [ -z "$LOCATION_ID" ]; then
echo -e "${RED}✗ Failed to create test location${NC}"
exit 1
fi
CLUSTER_RESULT=$($SKRAAK create cluster --db "$DB_PATH" --dataset "$DATASET_ID" --location "$LOCATION_ID" --name "MapTest Cluster" --sample-rate 16000 2>/dev/null)
CLUSTER_ID=$(echo "$CLUSTER_RESULT" | jq -r '.cluster.id // empty')
if [ -z "$CLUSTER_ID" ]; then
echo -e "${RED}✗ Failed to create test cluster${NC}"
exit 1
fi
echo " Dataset: $DATASET_ID"
echo " Location: $LOCATION_ID"
echo " Cluster: $CLUSTER_ID"
echo ""
# Import WAV files into the cluster first (segments require existing file records)
WAV_DIR="/tmp/skraak_map_test_$$"
mkdir -p "$WAV_DIR"
generate_wav "$WAV_DIR/test_recording.wav" 1 16000
$SKRAAK import folder --db "$DB_PATH" --dataset "$DATASET_ID" --location "$LOCATION_ID" --cluster "$CLUSTER_ID" --folder "$WAV_DIR" --recursive=false 2>&1 > /dev/null
IMPORT_COUNT=$($SKRAAK sql --db "$DB_PATH" "SELECT COUNT(*) as cnt FROM file WHERE cluster_id = '$CLUSTER_ID' AND active = true" 2>/dev/null | jq -r '.rows[0].cnt')
if [ "$IMPORT_COUNT" != "1" ]; then
echo -e "${RED}✗ Failed to import test WAV file (found $IMPORT_COUNT files)${NC}"
rm -rf "$WAV_DIR"
exit 1
fi
echo -e "${GREEN}✓${NC} Imported 1 WAV file for segment testing"
echo ""
# Helper: create a .data file for the test WAV
# Format: [meta, [start, end, freqLow, freqHigh, [labels]]]
# Labels have: species, certainty, filter
create_data_file() {
local wav_path="$1"
local species="$2"
local calltype="$3"
local filter="$4"
local data_path="${wav_path}.data"
local label_json
if [ -n "$calltype" ]; then
label_json="{\"species\":\"$species\",\"certainty\":100,\"filter\":\"$filter\",\"calltype\":\"$calltype\"}"
else
label_json="{\"species\":\"$species\",\"certainty\":100,\"filter\":\"$filter\"}"
fi
echo "[[\"Operator\",\"None\",1],[0,0.5,100,7900,[$label_json]]]" > "$data_path"
}
# Helper: run import segments and capture output
run_import_segments() {
local mapping_path="$1"
$SKRAAK import segments --db "$DB_PATH" --dataset "$DATASET_ID" --location "$LOCATION_ID" --cluster "$CLUSTER_ID" --folder "$WAV_DIR" --mapping "$mapping_path" 2>&1 || true
}
# -------------------------------------------------------
# Test 1: Valid mapping - species exists in DB
# -------------------------------------------------------
echo "Test 1: Valid mapping (species exists in DB)"
VALID_MAPPING="/tmp/skraak_valid_mapping_$$.json"
cat > "$VALID_MAPPING" << 'EOF'
{
"Roroa": {"species": "Roroa", "calltypes": {"Male": "Male - Solo"}}
}
EOF
create_data_file "$WAV_DIR/test_recording.wav" "Roroa" "Male" "Manual"
# This should pass mapping validation (may fail later for other reasons, but no mapping error)
RESULT=$(run_import_segments "$VALID_MAPPING")
if ! echo "$RESULT" | grep -qi "species in .data but not in mapping\|mapped species not found in DB\|calltypes not found in DB"; then
echo -e "${GREEN}✓${NC} No mapping validation errors for valid mapping"
((TESTS_RUN++)) || true; ((TESTS_PASSED++)) || true
else
echo -e "${RED}✗${NC} Unexpected mapping validation error:"
echo "$RESULT" | grep -i "mapping\|species\|calltype" | head -5
((TESTS_RUN++)) || true; ((TESTS_FAILED++)) || true
fi
rm -f "$VALID_MAPPING" "$WAV_DIR/test_recording.wav.data"
# -------------------------------------------------------
# Test 2: Species in .data but not in mapping
# -------------------------------------------------------
echo ""
echo "Test 2: Species in .data but not in mapping"
INCOMPLETE_MAPPING="/tmp/skraak_incomplete_mapping_$$.json"
cat > "$INCOMPLETE_MAPPING" << 'EOF'
{
"SomeOtherSpecies": {"species": "Roroa"}
}
EOF
create_data_file "$WAV_DIR/test_recording.wav" "Kiwi" "" "Manual"
RESULT=$(run_import_segments "$INCOMPLETE_MAPPING")
if echo "$RESULT" | grep -qi "species in .data but not in mapping"; then
echo -e "${GREEN}✓${NC} Correctly detected unmapped species in .data"
((TESTS_RUN++)) || true; ((TESTS_PASSED++)) || true
else
echo -e "${RED}✗${NC} Should have detected unmapped species 'Kiwi'"
echo "$RESULT" | head -5
((TESTS_RUN++)) || true; ((TESTS_FAILED++)) || true
fi
rm -f "$INCOMPLETE_MAPPING" "$WAV_DIR/test_recording.wav.data"
# -------------------------------------------------------
# Test 3: Mapped species not found in DB
# -------------------------------------------------------
echo ""
echo "Test 3: Mapped species not found in DB"
PHANTOM_MAPPING="/tmp/skraak_phantom_mapping_$$.json"
cat > "$PHANTOM_MAPPING" << 'EOF'
{
"Kiwi": {"species": "PhantomSpecies"}
}
EOF
create_data_file "$WAV_DIR/test_recording.wav" "Kiwi" "" "Manual"
RESULT=$(run_import_segments "$PHANTOM_MAPPING")
if echo "$RESULT" | grep -qi "mapped species not found in DB\|not found in DB"; then
echo -e "${GREEN}✓${NC} Correctly detected species not in DB"
((TESTS_RUN++)) || true; ((TESTS_PASSED++)) || true
else
echo -e "${RED}✗${NC} Should have detected 'PhantomSpecies' not in DB"
echo "$RESULT" | head -5
((TESTS_RUN++)) || true; ((TESTS_FAILED++)) || true
fi
rm -f "$PHANTOM_MAPPING" "$WAV_DIR/test_recording.wav.data"
# -------------------------------------------------------
# Test 4: Calltype not found in DB
# -------------------------------------------------------
echo ""
echo "Test 4: Calltype not found in DB"
BAD_CT_MAPPING="/tmp/skraak_bad_ct_mapping_$$.json"
cat > "$BAD_CT_MAPPING" << 'EOF'
{
"Roroa": {"species": "Roroa", "calltypes": {"Male": "NonexistentCall"}}
}
EOF
create_data_file "$WAV_DIR/test_recording.wav" "Roroa" "Male" "Manual"
RESULT=$(run_import_segments "$BAD_CT_MAPPING")
if echo "$RESULT" | grep -qi "calltypes not found in DB"; then
echo -e "${GREEN}✓${NC} Correctly detected calltype not in DB"
((TESTS_RUN++)) || true; ((TESTS_PASSED++)) || true
else
echo -e "${RED}✗${NC} Should have detected 'NonexistentCall' calltype not in DB"
echo "$RESULT" | head -5
((TESTS_RUN++)) || true; ((TESTS_FAILED++)) || true
fi
rm -f "$BAD_CT_MAPPING" "$WAV_DIR/test_recording.wav.data"
# -------------------------------------------------------
# Test 5: __NEGATIVE__ sentinel - should not error
# -------------------------------------------------------
echo ""
echo "Test 5: __NEGATIVE__ sentinel (no DB lookup)"
NEG_MAPPING="/tmp/skraak_neg_mapping_$$.json"
cat > "$NEG_MAPPING" << 'EOF'
{
"Noise": {"species": "__NEGATIVE__"}
}
EOF
create_data_file "$WAV_DIR/test_recording.wav" "Noise" "" "Manual"
RESULT=$(run_import_segments "$NEG_MAPPING")
# __NEGATIVE__ species are NOT looked up in DB, so no "mapped species not found" error
if ! echo "$RESULT" | grep -qi "mapped species not found in DB.*__NEGATIVE__\|Phantom"; then
echo -e "${GREEN}✓${NC} __NEGATIVE__ sentinel not looked up in DB"
((TESTS_RUN++)) || true; ((TESTS_PASSED++)) || true
else
echo -e "${RED}✗${NC} __NEGATIVE__ should not trigger DB species lookup"
echo "$RESULT" | head -5
((TESTS_RUN++)) || true; ((TESTS_FAILED++)) || true
fi
rm -f "$NEG_MAPPING" "$WAV_DIR/test_recording.wav.data"
# -------------------------------------------------------
# Test 6: __IGNORE__ sentinel - should not error
# -------------------------------------------------------
echo ""
echo "Test 6: __IGNORE__ sentinel (no DB lookup)"
IGNORE_MAPPING="/tmp/skraak_ignore_mapping_$$.json"
cat > "$IGNORE_MAPPING" << 'EOF'
{
"Skip": {"species": "__IGNORE__"}
}
EOF
create_data_file "$WAV_DIR/test_recording.wav" "Skip" "" "Manual"
RESULT=$(run_import_segments "$IGNORE_MAPPING")
if ! echo "$RESULT" | grep -qi "mapped species not found in DB.*__IGNORE__"; then
echo -e "${GREEN}✓${NC} __IGNORE__ sentinel not looked up in DB"
((TESTS_RUN++)) || true; ((TESTS_PASSED++)) || true
else
echo -e "${RED}✗${NC} __IGNORE__ should not trigger DB species lookup"
echo "$RESULT" | head -5
((TESTS_RUN++)) || true; ((TESTS_FAILED++)) || true
fi
rm -f "$IGNORE_MAPPING" "$WAV_DIR/test_recording.wav.data"
# Cleanup
rm -rf "$WAV_DIR"
echo ""
print_summary