test_db_state.sh
#!/bin/bash
# Verify database state - check table counts and referential integrity
# Usage: ./test_db_state.sh [db_path]
# Default: uses test.duckdb
source "$(dirname "$0")/test_lib.sh"
DB_PATH="${1:-$DEFAULT_TEST_DB}"
if [ ! -f "$DB_PATH" ]; then
echo -e "${RED}Error: Database not found at $DB_PATH${NC}"
exit 1
fi
echo "=== Database State Verification ==="
echo "Database: $DB_PATH"
echo ""
check_binary
sql() {
"$PROJECT_DIR/skraak" sql --db "$DB_PATH" "$1" 2>/dev/null
}
cnt() {
sql "$1" | jq -r '.rows[0].cnt // "error"'
}
# Assert a query returns 0 rows (integrity violation check)
check_zero() {
local name="$1"
local query="$2"
local count
count=$(cnt "$query")
((TESTS_RUN++)) || true
if [ "$count" = "0" ]; then
echo -e " ${GREEN}✓${NC} $name"
((TESTS_PASSED++)) || true
else
echo -e " ${RED}✗${NC} $name: $count violation(s)"
((TESTS_FAILED++)) || true
fi
}
# ── Counts ────────────────────────────────────────────────────────────────────
echo "Table Counts:"
echo " Datasets: $(cnt 'SELECT COUNT(*) AS cnt FROM dataset WHERE active = true')"
echo " Locations: $(cnt 'SELECT COUNT(*) AS cnt FROM location WHERE active = true')"
echo " Clusters: $(cnt 'SELECT COUNT(*) AS cnt FROM cluster WHERE active = true')"
echo " Files: $(cnt 'SELECT COUNT(*) AS cnt FROM file WHERE active = true')"
echo " File-Dataset: $(cnt 'SELECT COUNT(*) AS cnt FROM file_dataset')"
echo " Segments: $(cnt 'SELECT COUNT(*) AS cnt FROM segment WHERE active = true')"
echo " Labels: $(cnt 'SELECT COUNT(*) AS cnt FROM label WHERE active = true')"
echo " Label subtypes: $(cnt 'SELECT COUNT(*) AS cnt FROM label_subtype WHERE active = true')"
echo " Moth metadata: $(cnt 'SELECT COUNT(*) AS cnt FROM moth_metadata WHERE active = true')"
echo " File metadata: $(cnt 'SELECT COUNT(*) AS cnt FROM file_metadata WHERE active = true')"
echo " Label metadata: $(cnt 'SELECT COUNT(*) AS cnt FROM label_metadata WHERE active = true')"
echo ""
# ── Location hierarchy ────────────────────────────────────────────────────────
echo "Location hierarchy:"
check_zero "location.dataset_id → dataset" \
"SELECT COUNT(*) AS cnt FROM location l LEFT JOIN dataset d ON l.dataset_id = d.id WHERE d.id IS NULL"
check_zero "cluster.location_id → location" \
"SELECT COUNT(*) AS cnt FROM cluster c LEFT JOIN location l ON c.location_id = l.id WHERE l.id IS NULL"
check_zero "cluster.dataset_id → dataset" \
"SELECT COUNT(*) AS cnt FROM cluster c LEFT JOIN dataset d ON c.dataset_id = d.id WHERE d.id IS NULL"
check_zero "cluster.cyclic_recording_pattern_id → cyclic_recording_pattern" \
"SELECT COUNT(*) AS cnt FROM cluster c LEFT JOIN cyclic_recording_pattern p ON c.cyclic_recording_pattern_id = p.id WHERE c.cyclic_recording_pattern_id IS NOT NULL AND p.id IS NULL"
echo ""
# ── File linkage ──────────────────────────────────────────────────────────────
echo "File linkage:"
check_zero "file.location_id → location" \
"SELECT COUNT(*) AS cnt FROM file f LEFT JOIN location l ON f.location_id = l.id WHERE f.location_id IS NOT NULL AND l.id IS NULL"
check_zero "file.cluster_id → cluster" \
"SELECT COUNT(*) AS cnt FROM file f LEFT JOIN cluster c ON f.cluster_id = c.id WHERE f.cluster_id IS NOT NULL AND c.id IS NULL"
check_zero "file_dataset.file_id → file" \
"SELECT COUNT(*) AS cnt FROM file_dataset fd LEFT JOIN file f ON fd.file_id = f.id WHERE f.id IS NULL"
check_zero "file_dataset.dataset_id → dataset" \
"SELECT COUNT(*) AS cnt FROM file_dataset fd LEFT JOIN dataset d ON fd.dataset_id = d.id WHERE d.id IS NULL"
check_zero "active files have file_dataset entry" \
"SELECT COUNT(*) AS cnt FROM file f LEFT JOIN file_dataset fd ON f.id = fd.file_id WHERE f.active = true AND fd.file_id IS NULL"
check_zero "file_dataset count >= active file count" \
"SELECT CASE WHEN (SELECT COUNT(*) FROM file_dataset) >= (SELECT COUNT(*) FROM file WHERE active = true) THEN 0 ELSE 1 END AS cnt"
check_zero "file_metadata.file_id → file" \
"SELECT COUNT(*) AS cnt FROM file_metadata fm LEFT JOIN file f ON fm.file_id = f.id WHERE f.id IS NULL"
echo ""
# ── Segment integrity ─────────────────────────────────────────────────────────
echo "Segment integrity:"
check_zero "segment.file_id → file" \
"SELECT COUNT(*) AS cnt FROM segment s LEFT JOIN file f ON s.file_id = f.id WHERE f.id IS NULL"
check_zero "segment.dataset_id → dataset" \
"SELECT COUNT(*) AS cnt FROM segment s LEFT JOIN dataset d ON s.dataset_id = d.id WHERE d.id IS NULL"
check_zero "active segments on inactive files" \
"SELECT COUNT(*) AS cnt FROM segment s JOIN file f ON s.file_id = f.id WHERE s.active = true AND f.active = false"
echo ""
# ── Label integrity ───────────────────────────────────────────────────────────
echo "Label integrity:"
check_zero "label.segment_id → segment" \
"SELECT COUNT(*) AS cnt FROM label l LEFT JOIN segment s ON l.segment_id = s.id WHERE s.id IS NULL"
check_zero "label.species_id → species" \
"SELECT COUNT(*) AS cnt FROM label l LEFT JOIN species sp ON l.species_id = sp.id WHERE sp.id IS NULL"
check_zero "label.filter_id → filter" \
"SELECT COUNT(*) AS cnt FROM label l LEFT JOIN filter f ON l.filter_id = f.id WHERE f.id IS NULL"
check_zero "active labels on inactive segments" \
"SELECT COUNT(*) AS cnt FROM label l JOIN segment s ON l.segment_id = s.id WHERE l.active = true AND s.active = false"
check_zero "label_metadata.label_id → label" \
"SELECT COUNT(*) AS cnt FROM label_metadata lm LEFT JOIN label l ON lm.label_id = l.id WHERE l.id IS NULL"
echo ""
# ── Label subtype integrity ───────────────────────────────────────────────────
echo "Label subtype integrity:"
check_zero "label_subtype.label_id → label" \
"SELECT COUNT(*) AS cnt FROM label_subtype ls LEFT JOIN label l ON ls.label_id = l.id WHERE l.id IS NULL"
check_zero "label_subtype.calltype_id → call_type" \
"SELECT COUNT(*) AS cnt FROM label_subtype ls LEFT JOIN call_type ct ON ls.calltype_id = ct.id WHERE ct.id IS NULL"
check_zero "label_subtype.filter_id → filter" \
"SELECT COUNT(*) AS cnt FROM label_subtype ls LEFT JOIN filter f ON ls.filter_id = f.id WHERE ls.filter_id IS NOT NULL AND f.id IS NULL"
echo ""
# ── Reference table integrity ─────────────────────────────────────────────────
echo "Reference table integrity:"
check_zero "call_type.species_id → species" \
"SELECT COUNT(*) AS cnt FROM call_type ct LEFT JOIN species sp ON ct.species_id = sp.id WHERE sp.id IS NULL"
echo ""
# ── Summary ───────────────────────────────────────────────────────────────────
echo "Summary: $TESTS_PASSED/$TESTS_RUN checks passed"
if [ "$TESTS_FAILED" -gt 0 ]; then
exit 1
fi