VU3KBTQ6AFJV36WVQ4A7BM7Q3MLJQX4DBCGMIZJXNPMEKLIBGHZAC package utilsimport ("database/sql""strings""testing"_ "github.com/duckdb/duckdb-go/v2")// setupMappingTestDB creates an in-memory DB with schema + test species/calltypes.// Species: Kiwi (sp_kiwi000000), Roroa (sp_roroa00000)// Calltypes: Kiwi/song (ct_kiwi000001), Kiwi/duet (ct_kiwi000002), Roroa/brrr (ct_roroa00001)func setupMappingTestDB(t *testing.T) *sql.DB {t.Helper()db, err := sql.Open("duckdb", ":memory:")if err != nil {t.Fatalf("open: %v", err)}// Create minimal tables needed by mapping validation queriesmustExecMapping(t, db, `CREATE TABLE species (id VARCHAR(12) PRIMARY KEY,label VARCHAR(100) UNIQUE NOT NULL,active BOOLEAN DEFAULT TRUE)`)mustExecMapping(t, db, `CREATE TABLE call_type (id VARCHAR(12) PRIMARY KEY,species_id VARCHAR(12) NOT NULL,label VARCHAR(100) NOT NULL,active BOOLEAN DEFAULT TRUE)`)// Insert test speciesmustExecMapping(t, db, "INSERT INTO species (id, label, active) VALUES ('sp_kiwi000000', 'Kiwi', true)")mustExecMapping(t, db, "INSERT INTO species (id, label, active) VALUES ('sp_roroa00000', 'Roroa', true)")mustExecMapping(t, db, "INSERT INTO species (id, label, active) VALUES ('sp_tui0000000', 'Tui', false)") // inactive// Insert test calltypesmustExecMapping(t, db, "INSERT INTO call_type (id, species_id, label, active) VALUES ('ct_kiwi000001', 'sp_kiwi000000', 'song', true)")mustExecMapping(t, db, "INSERT INTO call_type (id, species_id, label, active) VALUES ('ct_kiwi000002', 'sp_kiwi000000', 'duet', true)")mustExecMapping(t, db, "INSERT INTO call_type (id, species_id, label, active) VALUES ('ct_roroa00001', 'sp_roroa00000', 'brrr', true)")return db}func mustExecMapping(t *testing.T, db *sql.DB, query string) {t.Helper()if _, err := db.Exec(query); err != nil {t.Fatalf("exec: %v", err)}}// --- collectMappedLabels ---func TestCollectMappedLabels(t *testing.T) {mapping := MappingFile{"GSK": {Species: "Roroa", Calltypes: map[string]string{"brrr": "brrr"}},"K-M": {Species: "Kiwi"},"noise": {Species: MappingNegative},}dataCalltypes := map[string]map[string]bool{"GSK": {"brrr": true},"K-M": {"song": true, "duet": true},}speciesSet, calltypes := collectMappedLabels(mapping, dataCalltypes)if !speciesSet["Roroa"] || !speciesSet["Kiwi"] {t.Errorf("speciesSet=%v, want Kiwi and Roroa", speciesSet)}if speciesSet[MappingNegative] {t.Error("sentinel species should be excluded")}// Roroa has explicit calltype mappingif calltypes["Roroa"]["brrr"] != "brrr" {t.Errorf("Roroa calltypes=%v", calltypes["Roroa"])}// Kiwi has no calltype mapping, so data calltypes pass throughif calltypes["Kiwi"]["song"] != "song" || calltypes["Kiwi"]["duet"] != "duet" {t.Errorf("Kiwi calltypes=%v", calltypes["Kiwi"])}}// --- collectUnmappedCalltypes ---func TestCollectUnmappedCalltypes(t *testing.T) {mapping := MappingFile{"GSK": {Species: "Roroa", Calltypes: map[string]string{"Male": "brrr"}},}dataCalltypes := map[string]map[string]bool{"GSK": {"Male": true, "Female": true},}mappedCalltypes := make(map[string]map[string]string)collectUnmappedCalltypes(mapping, dataCalltypes, mappedCalltypes)// Male maps to brrrif mappedCalltypes["Roroa"]["brrr"] != "Male" {t.Errorf("mapped Male->brrr: %v", mappedCalltypes["Roroa"])}// Female has no mapping entry, passes through as-isif mappedCalltypes["Roroa"]["Female"] != "Female" {t.Errorf("unmapped Female passthrough: %v", mappedCalltypes["Roroa"])}}// --- validateMappedSpecies ---func TestValidateMappedSpecies(t *testing.T) {db := setupMappingTestDB(t)defer db.Close()t.Run("all species exist in DB", func(t *testing.T) {result := &MappingValidationResult{MissingDBSpecies: make([]string, 0)}err := validateMappedSpecies(db, map[string]bool{"Kiwi": true, "Roroa": true}, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingDBSpecies) > 0 {t.Errorf("missing species: %v", result.MissingDBSpecies)}})t.Run("species not in DB reported", func(t *testing.T) {result := &MappingValidationResult{MissingDBSpecies: make([]string, 0)}err := validateMappedSpecies(db, map[string]bool{"Phantom": true}, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingDBSpecies) != 1 || result.MissingDBSpecies[0] != "Phantom" {t.Errorf("expected [Phantom], got %v", result.MissingDBSpecies)}})t.Run("inactive species not found", func(t *testing.T) {result := &MappingValidationResult{MissingDBSpecies: make([]string, 0)}err := validateMappedSpecies(db, map[string]bool{"Tui": true}, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingDBSpecies) != 1 {t.Errorf("inactive species should be missing, got %v", result.MissingDBSpecies)}})t.Run("empty set is no-op", func(t *testing.T) {result := &MappingValidationResult{MissingDBSpecies: make([]string, 0)}err := validateMappedSpecies(db, map[string]bool{}, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingDBSpecies) != 0 {t.Errorf("expected no missing, got %v", result.MissingDBSpecies)}})}// --- validateMappedCalltypes ---func TestValidateMappedCalltypes(t *testing.T) {db := setupMappingTestDB(t)defer db.Close()t.Run("all calltypes exist", func(t *testing.T) {result := &MappingValidationResult{MissingCalltypes: make(map[string]string)}ctMap := map[string]map[string]string{"Kiwi": {"song": "data-song", "duet": "data-duet"},}err := validateMappedCalltypes(db, ctMap, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingCalltypes) > 0 {t.Errorf("missing calltypes: %v", result.MissingCalltypes)}})t.Run("missing calltype reported", func(t *testing.T) {result := &MappingValidationResult{MissingCalltypes: make(map[string]string)}ctMap := map[string]map[string]string{"Kiwi": {"phantom": "data-phantom"},}err := validateMappedCalltypes(db, ctMap, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingCalltypes) != 1 {t.Errorf("expected 1 missing, got %v", result.MissingCalltypes)}})t.Run("empty calltype map skips species", func(t *testing.T) {result := &MappingValidationResult{MissingCalltypes: make(map[string]string)}ctMap := map[string]map[string]string{"Kiwi": {},}err := validateMappedCalltypes(db, ctMap, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingCalltypes) != 0 {t.Errorf("expected none missing, got %v", result.MissingCalltypes)}})}// --- ValidateMappingAgainstDB (integration of all above) ---func TestValidateMappingAgainstDB(t *testing.T) {db := setupMappingTestDB(t)defer db.Close()t.Run("valid mapping - no errors", func(t *testing.T) {mapping := MappingFile{"GSK": {Species: "Roroa", Calltypes: map[string]string{"brrr": "brrr"}},"K-M": {Species: "Kiwi"},}dataSpecies := map[string]bool{"GSK": true, "K-M": true}dataCT := map[string]map[string]bool{"GSK": {"brrr": true},"K-M": {"song": true},}result, err := ValidateMappingAgainstDB(db, mapping, dataSpecies, dataCT)if err != nil {t.Fatalf("unexpected error: %v", err)}if result.HasErrors() {t.Errorf("expected no errors, got: %v", result)}})t.Run("missing species in mapping", func(t *testing.T) {mapping := MappingFile{"GSK": {Species: "Roroa"},}dataSpecies := map[string]bool{"GSK": true, "K-M": true}result, err := ValidateMappingAgainstDB(db, mapping, dataSpecies, nil)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingSpecies) != 1 || result.MissingSpecies[0] != "K-M" {t.Errorf("expected [K-M] missing, got %v", result.MissingSpecies)}})t.Run("mapped species not in DB", func(t *testing.T) {mapping := MappingFile{"PHANTOM": {Species: "Phantom"},}dataSpecies := map[string]bool{"PHANTOM": true}result, err := ValidateMappingAgainstDB(db, mapping, dataSpecies, nil)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingDBSpecies) != 1 || result.MissingDBSpecies[0] != "Phantom" {t.Errorf("expected [Phantom] missing from DB, got %v", result.MissingDBSpecies)}})t.Run("sentinel species excluded from DB check", func(t *testing.T) {mapping := MappingFile{"noise": {Species: MappingNegative},"ignore": {Species: MappingIgnore},}dataSpecies := map[string]bool{"noise": true, "ignore": true}result, err := ValidateMappingAgainstDB(db, mapping, dataSpecies, nil)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingDBSpecies) > 0 {t.Errorf("sentinels should not be checked against DB, got: %v", result.MissingDBSpecies)}})t.Run("missing calltype in DB", func(t *testing.T) {mapping := MappingFile{"K-M": {Species: "Kiwi", Calltypes: map[string]string{"song": "song", "phantom": "phantom"}},}dataSpecies := map[string]bool{"K-M": true}dataCT := map[string]map[string]bool{"K-M": {"song": true, "phantom": true},}result, err := ValidateMappingAgainstDB(db, mapping, dataSpecies, dataCT)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingCalltypes) == 0 {t.Error("expected missing calltype for phantom")}if !strings.Contains(result.Error(), "phantom") {t.Errorf("error should mention phantom: %s", result.Error())}})}
package toolsimport ("os""path/filepath""strings""testing")// --- datasetTables manifest completeness ---func TestDatasetTablesManifest(t *testing.T) {tableNames := make(map[string]bool)for _, tr := range datasetTables {tableNames[tr.Table] = true}expected := []string{"dataset", "location", "cluster", "segment", "file_dataset","file", "moth_metadata", "file_metadata", "label_metadata","label", "label_subtype","ebird_taxonomy", "species", "call_type", "cyclic_recording_pattern", "filter",}for _, name := range expected {if !tableNames[name] {t.Errorf("manifest missing table: %s", name)}}}func TestDatasetTablesRelations(t *testing.T) {validRelations := map[string]bool{"owned": true, "owned-via": true, "copy": true}for _, tr := range datasetTables {if !validRelations[tr.Relation] {t.Errorf("table %s has invalid relation: %s", tr.Table, tr.Relation)}if tr.Relation == "owned-via" && tr.ViaTable == "" {t.Errorf("table %s is owned-via but has no ViaTable", tr.Table)}if tr.Relation == "owned" && tr.FilterCol == "" {t.Errorf("table %s is owned but has no FilterCol", tr.Table)}if tr.Relation == "copy" && (tr.FilterCol != "" || tr.ViaTable != "") {t.Errorf("copy table %s should not have FilterCol/ViaTable", tr.Table)}}}// --- orderByFKDependency ---func TestOrderByFKDependency(t *testing.T) {tables := []TableRelationship{{Table: "label", Relation: "owned-via"},{Table: "dataset", Relation: "owned"},{Table: "file", Relation: "owned-via"},{Table: "location", Relation: "owned"},{Table: "species", Relation: "copy"},}// FK order: dataset, location, species, file, labelfkOrder := []string{"dataset", "location", "species", "file", "label"}sorted := orderByFKDependency(tables, fkOrder)// Verify sorted order matches FK orderexpectedOrder := []string{"dataset", "location", "species", "file", "label"}for i, tr := range sorted {if tr.Table != expectedOrder[i] {t.Errorf("position %d: got %s, want %s", i, tr.Table, expectedOrder[i])}}}func TestOrderByFKDependency_UnknownTable(t *testing.T) {tables := []TableRelationship{{Table: "unknown_table", Relation: "copy"},{Table: "dataset", Relation: "owned"},}// Unknown table gets order 0 (default int), so it sorts before known tablessorted := orderByFKDependency(tables, []string{"dataset"})if sorted[0].Table != "unknown_table" {t.Errorf("unknown table should sort first (order=0), got %s", sorted[0].Table)}}// --- buildOwnedViaQuery ---func TestBuildOwnedViaQuery(t *testing.T) {tests := []struct {name stringtr TableRelationshipwantVia string // should contain the ViaTable name in the query}{{"via cluster", TableRelationship{Table: "file", FilterCol: "cluster_id", ViaTable: "cluster"}, "cluster"},{"via file", TableRelationship{Table: "moth_metadata", FilterCol: "file_id", ViaTable: "file"}, "file"},{"via segment", TableRelationship{Table: "label", FilterCol: "segment_id", ViaTable: "segment"}, "segment"},{"via label", TableRelationship{Table: "label_metadata", FilterCol: "label_id", ViaTable: "label"}, "label"},}for _, tt := range tests {t.Run(tt.name, func(t *testing.T) {query := buildOwnedViaQuery(tt.tr, "ds_test")if !strings.Contains(query, tt.tr.Table) {t.Errorf("query doesn't contain table name %s: %s", tt.tr.Table, query)}if !strings.Contains(query, tt.wantVia) {t.Errorf("query doesn't contain via table %s: %s", tt.wantVia, query)}if !strings.Contains(query, "dataset_id = ?") {t.Errorf("query doesn't contain dataset_id parameter: %s", query)}})}}// --- buildCountOwnedViaQuery ---func TestBuildCountOwnedViaQuery(t *testing.T) {tests := []struct {name stringtr TableRelationshipwantVia string}{{"via cluster", TableRelationship{Table: "file", FilterCol: "cluster_id", ViaTable: "cluster"}, "cluster"},{"via file", TableRelationship{Table: "moth_metadata", FilterCol: "file_id", ViaTable: "file"}, "file"},{"via segment", TableRelationship{Table: "label", FilterCol: "segment_id", ViaTable: "segment"}, "segment"},{"via label", TableRelationship{Table: "label_subtype", FilterCol: "label_id", ViaTable: "label"}, "label"},}for _, tt := range tests {t.Run(tt.name, func(t *testing.T) {query := buildCountOwnedViaQuery(tt.tr)if !strings.Contains(query, "COUNT(*)") {t.Errorf("count query doesn't contain COUNT(*): %s", query)}if !strings.Contains(query, tt.tr.Table) {t.Errorf("count query doesn't contain table %s: %s", tt.tr.Table, query)}if !strings.Contains(query, "dataset_id = ?") {t.Errorf("count query doesn't contain dataset_id param: %s", query)}})}}// --- checkOutputFile ---func TestCheckOutputFile(t *testing.T) {t.Run("dry run always passes", func(t *testing.T) {err := checkOutputFile(ExportDatasetInput{DryRun: true})if err != nil {t.Errorf("dry run should not error: %v", err)}})t.Run("nonexistent output passes", func(t *testing.T) {err := checkOutputFile(ExportDatasetInput{Output: filepath.Join(t.TempDir(), "nonexistent.duckdb"),})if err != nil {t.Errorf("nonexistent output should pass: %v", err)}})t.Run("existing output without force fails", func(t *testing.T) {dir := t.TempDir()existing := filepath.Join(dir, "existing.duckdb")if err := os.WriteFile(existing, []byte("test"), 0644); err != nil {t.Fatalf("setup: %v", err)}err := checkOutputFile(ExportDatasetInput{Output: existing})if err == nil || !strings.Contains(err.Error(), "exists") {t.Errorf("expected exists error, got: %v", err)}})t.Run("existing output with force passes", func(t *testing.T) {dir := t.TempDir()existing := filepath.Join(dir, "existing.duckdb")if err := os.WriteFile(existing, []byte("test"), 0644); err != nil {t.Fatalf("setup: %v", err)}err := checkOutputFile(ExportDatasetInput{Output: existing, Force: true})if err != nil {t.Errorf("force should overwrite: %v", err)}})}
package callsimport ("testing""skraak/utils")// --- filterSegments ---func TestFilterSegments(t *testing.T) {makeSeg := func(labels []*utils.Label) *utils.Segment {return &utils.Segment{Labels: labels}}kiwiLabel := &utils.Label{Filter: "kiwi.txt", Species: "Kiwi", CallType: "song", Certainty: 100}tomtitLabel := &utils.Label{Filter: "tomtit.txt", Species: "Tomtit", Certainty: 80}segments := []*utils.Segment{makeSeg([]*utils.Label{kiwiLabel}),makeSeg([]*utils.Label{tomtitLabel}),makeSeg([]*utils.Label{kiwiLabel, tomtitLabel}),makeSeg([]*utils.Label{}),}t.Run("no filters returns all", func(t *testing.T) {got := filterSegments(segments, "", "", "", -1)if len(got) != 4 {t.Errorf("expected 4 segments (no filters), got %d", len(got))}})t.Run("filter by species", func(t *testing.T) {got := filterSegments(segments, "", "Kiwi", "", -1)if len(got) != 2 {t.Errorf("expected 2 segments with Kiwi, got %d", len(got))}})t.Run("filter by filter name", func(t *testing.T) {got := filterSegments(segments, "kiwi.txt", "", "", -1)if len(got) != 2 {t.Errorf("expected 2 segments with kiwi.txt filter, got %d", len(got))}})t.Run("filter by certainty", func(t *testing.T) {got := filterSegments(segments, "", "", "", 100)if len(got) != 2 {t.Errorf("expected 2 segments with certainty=100, got %d", len(got))}})t.Run("no matching segments", func(t *testing.T) {got := filterSegments(segments, "", "Phantom", "", -1)if len(got) != 0 {t.Errorf("expected 0 segments, got %d", len(got))}})t.Run("nil segments slice", func(t *testing.T) {got := filterSegments(nil, "", "", "", -1)if len(got) != 0 {t.Errorf("expected 0, got %d", len(got))}})}// --- validateClipInput ---func TestValidateClipInput(t *testing.T) {t.Run("missing file and folder", func(t *testing.T) {var out CallsClipOutputerr := validateClipInput(&out, CallsClipInput{})if err == nil {t.Error("expected error")}if len(out.Errors) != 1 {t.Errorf("expected 1 output error, got %d", len(out.Errors))}})t.Run("missing output", func(t *testing.T) {var out CallsClipOutputerr := validateClipInput(&out, CallsClipInput{File: "test.data"})if err == nil {t.Error("expected error for missing output")}})t.Run("missing prefix", func(t *testing.T) {var out CallsClipOutputerr := validateClipInput(&out, CallsClipInput{File: "test.data", Output: "/tmp"})if err == nil {t.Error("expected error for missing prefix")}})t.Run("all required fields present via file", func(t *testing.T) {var out CallsClipOutputerr := validateClipInput(&out, CallsClipInput{File: "test.data", Output: "/tmp", Prefix: "clip"})if err != nil {t.Errorf("unexpected error: %v", err)}})t.Run("all required fields present via folder", func(t *testing.T) {var out CallsClipOutputerr := validateClipInput(&out, CallsClipInput{Folder: "/data", Output: "/tmp", Prefix: "clip"})if err != nil {t.Errorf("unexpected error: %v", err)}})}// --- accumulateFileResult ---func TestAccumulateFileResult(t *testing.T) {t.Run("accumulates clips and errors", func(t *testing.T) {var out CallsClipOutputaccumulateFileResult(&out, []string{"a.wav", "a.png"}, 0, []string{"warn1"}, false)if out.SegmentsClipped != 2 {t.Errorf("SegmentsClipped=%d want 2", out.SegmentsClipped)}if out.FilesProcessed != 1 {t.Errorf("FilesProcessed=%d want 1", out.FilesProcessed)}if len(out.OutputFiles) != 2 {t.Errorf("OutputFiles=%d want 2", len(out.OutputFiles))}if len(out.Errors) != 1 {t.Errorf("Errors=%d want 1", len(out.Errors))}})t.Run("night skip goes to NightSkipped", func(t *testing.T) {var out CallsClipOutputaccumulateFileResult(&out, []string{"a.wav"}, 3, nil, true)if out.NightSkipped != 3 {t.Errorf("NightSkipped=%d want 3", out.NightSkipped)}})t.Run("day skip goes to DaySkipped", func(t *testing.T) {var out CallsClipOutputaccumulateFileResult(&out, []string{"a.wav"}, 5, nil, false)if out.DaySkipped != 5 {t.Errorf("DaySkipped=%d want 5", out.DaySkipped)}})t.Run("no clips and no errors counts as processed", func(t *testing.T) {var out CallsClipOutputaccumulateFileResult(&out, nil, 0, nil, false)// len(clips)==0 but len(errs)==0, so file is considered processedif out.FilesProcessed != 1 {t.Errorf("FilesProcessed=%d want 1", out.FilesProcessed)}})}
package dbimport ("database/sql""strings""testing")// setupValidationDB creates an in-memory DB with a complete test hierarchy.//// dataset (structured): ds_valtest00001// dataset (unstructured): ds_valtest00002// dataset (inactive): ds_valtest00003// location in ds1: loc_valtest0001// location in ds2: loc_valtest0002// location (inactive): loc_valtest0003 (in ds1)// cluster in loc1: cl_valtest00001// cluster (inactive): cl_valtest00002 (in loc1)// pattern: pat_valtest0001// pattern (inactive): pat_valtest0002func setupValidationDB(t *testing.T) *sql.DB {t.Helper()database := SetupTestDB(t)InsertTestDatasetWithType(t, database, "ds_valtest00001", "Val Structured", "structured")InsertTestDatasetWithType(t, database, "ds_valtest00002", "Val Unstructured", "unstructured")InsertTestDatasetWithType(t, database, "ds_valtest00003", "Val Inactive", "structured")mustExec(t, database, "UPDATE dataset SET active = false WHERE id = 'ds_valtest00003'")InsertTestLocation(t, database, "loc_valtest0001", "ds_valtest00001", "Loc Active")InsertTestLocation(t, database, "loc_valtest0002", "ds_valtest00002", "Loc Unstructured")InsertTestLocation(t, database, "loc_valtest0003", "ds_valtest00001", "Loc Inactive")mustExec(t, database, "UPDATE location SET active = false WHERE id = 'loc_valtest0003'")InsertTestCluster(t, database, "cl_valtest00001", "ds_valtest00001", "loc_valtest0001", "Cl Active")InsertTestCluster(t, database, "cl_valtest00002", "ds_valtest00001", "loc_valtest0001", "Cl Inactive")mustExec(t, database, "UPDATE cluster SET active = false WHERE id = 'cl_valtest00002'")InsertTestPattern(t, database, "pat_valtest0001", 300, 600)InsertTestPattern(t, database, "pat_valtest0002", 60, 60)mustExec(t, database, "UPDATE cyclic_recording_pattern SET active = false WHERE id = 'pat_valtest0002'")return database}func mustExec(t *testing.T, database *sql.DB, query string) {t.Helper()if _, err := database.Exec(query); err != nil {t.Fatalf("exec %q: %v", query, err)}}// --- GetDatasetType ---func TestGetDatasetType(t *testing.T) {db := setupValidationDB(t)defer db.Close()t.Run("existing structured dataset", func(t *testing.T) {dtype, exists, err := GetDatasetType(db, "ds_valtest00001")if err != nil {t.Fatalf("unexpected error: %v", err)}if !exists {t.Error("expected exists=true")}if dtype != "structured" {t.Errorf("got %q, want structured", dtype)}})t.Run("existing unstructured dataset", func(t *testing.T) {dtype, exists, err := GetDatasetType(db, "ds_valtest00002")if err != nil {t.Fatalf("unexpected error: %v", err)}if !exists {t.Error("expected exists=true")}if dtype != "unstructured" {t.Errorf("got %q, want unstructured", dtype)}})t.Run("nonexistent dataset", func(t *testing.T) {_, exists, err := GetDatasetType(db, "ds_nonexist00")if err != nil {t.Fatalf("unexpected error: %v", err)}if exists {t.Error("expected exists=false for nonexistent dataset")}})}// --- ValidateDatasetTypeForImport ---func TestValidateDatasetTypeForImport(t *testing.T) {db := setupValidationDB(t)defer db.Close()t.Run("structured dataset passes", func(t *testing.T) {if err := ValidateDatasetTypeForImport(db, "ds_valtest00001"); err != nil {t.Errorf("unexpected error: %v", err)}})t.Run("unstructured dataset fails", func(t *testing.T) {err := ValidateDatasetTypeForImport(db, "ds_valtest00002")if err == nil || !strings.Contains(err.Error(), "only support 'structured'") {t.Errorf("expected structured-only error, got: %v", err)}})t.Run("nonexistent dataset fails", func(t *testing.T) {err := ValidateDatasetTypeForImport(db, "ds_nonexist00")if err == nil || !strings.Contains(err.Error(), "not found") {t.Errorf("expected not-found error, got: %v", err)}})}// --- ValidateDatasetTypeUnstructured ---func TestValidateDatasetTypeUnstructured(t *testing.T) {db := setupValidationDB(t)defer db.Close()t.Run("unstructured dataset passes", func(t *testing.T) {if err := ValidateDatasetTypeUnstructured(db, "ds_valtest00002"); err != nil {t.Errorf("unexpected error: %v", err)}})t.Run("structured dataset fails", func(t *testing.T) {err := ValidateDatasetTypeUnstructured(db, "ds_valtest00001")if err == nil || !strings.Contains(err.Error(), "only supports 'unstructured'") {t.Errorf("expected unstructured-only error, got: %v", err)}})t.Run("nonexistent dataset fails", func(t *testing.T) {err := ValidateDatasetTypeUnstructured(db, "ds_nonexist00")if err == nil || !strings.Contains(err.Error(), "not found") {t.Errorf("expected not-found error, got: %v", err)}})}// --- ValidateLocationBelongsToDataset ---func TestValidateLocationBelongsToDataset(t *testing.T) {db := setupValidationDB(t)defer db.Close()t.Run("location belongs to dataset", func(t *testing.T) {if err := ValidateLocationBelongsToDataset(db, "loc_valtest0001", "ds_valtest00001"); err != nil {t.Errorf("unexpected error: %v", err)}})t.Run("location belongs to different dataset", func(t *testing.T) {err := ValidateLocationBelongsToDataset(db, "loc_valtest0001", "ds_valtest00002")if err == nil || !strings.Contains(err.Error(), "does not belong") {t.Errorf("expected mismatch error, got: %v", err)}})t.Run("inactive location", func(t *testing.T) {err := ValidateLocationBelongsToDataset(db, "loc_valtest0003", "ds_valtest00001")if err == nil || !strings.Contains(err.Error(), "not found or inactive") {t.Errorf("expected inactive error, got: %v", err)}})t.Run("nonexistent location", func(t *testing.T) {err := ValidateLocationBelongsToDataset(db, "loc_nonexist00", "ds_valtest00001")if err == nil {t.Error("expected error for nonexistent location")}})}// --- DatasetExistsAndActive ---func TestDatasetExistsAndActive(t *testing.T) {db := setupValidationDB(t)defer db.Close()t.Run("active dataset returns name", func(t *testing.T) {name, err := DatasetExistsAndActive(db, "ds_valtest00001")if err != nil {t.Fatalf("unexpected error: %v", err)}if name != "Val Structured" {t.Errorf("got %q, want Val Structured", name)}})t.Run("inactive dataset fails", func(t *testing.T) {_, err := DatasetExistsAndActive(db, "ds_valtest00003")if err == nil || !strings.Contains(err.Error(), "not active") {t.Errorf("expected not-active error, got: %v", err)}})t.Run("nonexistent dataset fails", func(t *testing.T) {_, err := DatasetExistsAndActive(db, "ds_nonexist00")if err == nil || !strings.Contains(err.Error(), "does not exist") {t.Errorf("expected does-not-exist error, got: %v", err)}})}// --- LocationBelongsToDataset ---func TestLocationBelongsToDataset(t *testing.T) {db := setupValidationDB(t)defer db.Close()t.Run("active location in correct dataset", func(t *testing.T) {name, err := LocationBelongsToDataset(db, "loc_valtest0001", "ds_valtest00001")if err != nil {t.Fatalf("unexpected error: %v", err)}if name != "Loc Active" {t.Errorf("got %q, want Loc Active", name)}})t.Run("active location in wrong dataset", func(t *testing.T) {_, err := LocationBelongsToDataset(db, "loc_valtest0001", "ds_valtest00002")if err == nil || !strings.Contains(err.Error(), "does not belong") {t.Errorf("expected does-not-belong error, got: %v", err)}})t.Run("inactive location", func(t *testing.T) {_, err := LocationBelongsToDataset(db, "loc_valtest0003", "ds_valtest00001")if err == nil || !strings.Contains(err.Error(), "not active") {t.Errorf("expected not-active error, got: %v", err)}})t.Run("nonexistent location", func(t *testing.T) {_, err := LocationBelongsToDataset(db, "loc_nonexist00", "ds_valtest00001")if err == nil || !strings.Contains(err.Error(), "does not exist") {t.Errorf("expected does-not-exist error, got: %v", err)}})}// --- ClusterExistsAndActive ---func TestClusterExistsAndActive(t *testing.T) {db := setupValidationDB(t)defer db.Close()t.Run("active cluster passes", func(t *testing.T) {if err := ClusterExistsAndActive(db, "cl_valtest00001"); err != nil {t.Errorf("unexpected error: %v", err)}})t.Run("inactive cluster fails", func(t *testing.T) {err := ClusterExistsAndActive(db, "cl_valtest00002")if err == nil || !strings.Contains(err.Error(), "not active") {t.Errorf("expected not-active error, got: %v", err)}})t.Run("nonexistent cluster fails", func(t *testing.T) {err := ClusterExistsAndActive(db, "cl_nonexist00")if err == nil || !strings.Contains(err.Error(), "not found") {t.Errorf("expected not-found error, got: %v", err)}})}// --- PatternExistsAndActive ---func TestPatternExistsAndActive(t *testing.T) {db := setupValidationDB(t)defer db.Close()t.Run("active pattern passes", func(t *testing.T) {if err := PatternExistsAndActive(db, "pat_valtest0001"); err != nil {t.Errorf("unexpected error: %v", err)}})t.Run("inactive pattern fails", func(t *testing.T) {err := PatternExistsAndActive(db, "pat_valtest0002")if err == nil || !strings.Contains(err.Error(), "not active") {t.Errorf("expected not-active error, got: %v", err)}})t.Run("nonexistent pattern fails", func(t *testing.T) {err := PatternExistsAndActive(db, "pat_nonexist0")if err == nil || !strings.Contains(err.Error(), "does not exist") {t.Errorf("expected does-not-exist error, got: %v", err)}})}// --- LocationExistsAndActive ---func TestLocationExistsAndActive(t *testing.T) {db := setupValidationDB(t)defer db.Close()t.Run("active location passes", func(t *testing.T) {if err := LocationExistsAndActive(db, "loc_valtest0001"); err != nil {t.Errorf("unexpected error: %v", err)}})t.Run("inactive location fails", func(t *testing.T) {err := LocationExistsAndActive(db, "loc_valtest0003")if err == nil || !strings.Contains(err.Error(), "not active") {t.Errorf("expected not-active error, got: %v", err)}})t.Run("nonexistent location fails", func(t *testing.T) {err := LocationExistsAndActive(db, "loc_nonexist00")if err == nil || !strings.Contains(err.Error(), "not found") {t.Errorf("expected not-found error, got: %v", err)}})}// --- ValidateDatasetTypeForExport ---func TestValidateDatasetTypeForExport(t *testing.T) {db := setupValidationDB(t)defer db.Close()t.Run("structured active dataset passes", func(t *testing.T) {name, err := ValidateDatasetTypeForExport(db, "ds_valtest00001")if err != nil {t.Fatalf("unexpected error: %v", err)}if name != "Val Structured" {t.Errorf("got %q, want Val Structured", name)}})t.Run("unstructured dataset fails type check", func(t *testing.T) {_, err := ValidateDatasetTypeForExport(db, "ds_valtest00002")if err == nil || !strings.Contains(err.Error(), "only structured") {t.Errorf("expected structured-only error, got: %v", err)}})t.Run("inactive dataset fails", func(t *testing.T) {_, err := ValidateDatasetTypeForExport(db, "ds_valtest00003")if err == nil || !strings.Contains(err.Error(), "not active") {t.Errorf("expected not-active error, got: %v", err)}})t.Run("nonexistent dataset fails", func(t *testing.T) {_, err := ValidateDatasetTypeForExport(db, "ds_nonexist00")if err == nil || !strings.Contains(err.Error(), "does not exist") {t.Errorf("expected does-not-exist error, got: %v", err)}})}// --- ClusterBelongsToLocation ---func TestClusterBelongsToLocation(t *testing.T) {db := setupValidationDB(t)defer db.Close()t.Run("cluster in correct location passes", func(t *testing.T) {if err := ClusterBelongsToLocation(db, "cl_valtest00001", "loc_valtest0001"); err != nil {t.Errorf("unexpected error: %v", err)}})t.Run("cluster in wrong location fails", func(t *testing.T) {err := ClusterBelongsToLocation(db, "cl_valtest00001", "loc_valtest0002")if err == nil || !strings.Contains(err.Error(), "does not belong") {t.Errorf("expected does-not-belong error, got: %v", err)}})t.Run("inactive cluster fails", func(t *testing.T) {err := ClusterBelongsToLocation(db, "cl_valtest00002", "loc_valtest0001")if err == nil || !strings.Contains(err.Error(), "not active") {t.Errorf("expected not-active error, got: %v", err)}})t.Run("nonexistent cluster fails", func(t *testing.T) {err := ClusterBelongsToLocation(db, "cl_nonexist00", "loc_valtest0001")if err == nil || !strings.Contains(err.Error(), "does not exist") {t.Errorf("expected does-not-exist error, got: %v", err)}})}
package dbimport ("database/sql""testing"_ "github.com/duckdb/duckdb-go/v2")// SetupTestDB creates an in-memory DuckDB with the full schema applied.// The caller must defer db.Close().func SetupTestDB(t *testing.T) *sql.DB {t.Helper()database, err := sql.Open("duckdb", ":memory:")if err != nil {t.Fatalf("failed to open database: %v", err)}schema, err := ReadSchemaSQL()if err != nil {database.Close()t.Fatalf("failed to read schema: %v", err)}if _, err = database.Exec(schema); err != nil {database.Close()t.Fatalf("failed to create schema: %v", err)}return database}// InsertTestDataset inserts a structured dataset row and returns its ID.func InsertTestDataset(t *testing.T, database *sql.DB, id, name string) {t.Helper()_, err := database.Exec("INSERT INTO dataset (id, name, type, active) VALUES (?, ?, 'structured', true)",id, name,)if err != nil {t.Fatalf("failed to insert dataset: %v", err)}}// InsertTestDatasetWithType inserts a dataset with the given type.func InsertTestDatasetWithType(t *testing.T, database *sql.DB, id, name, datasetType string) {t.Helper()_, err := database.Exec("INSERT INTO dataset (id, name, type, active) VALUES (?, ?, ?, true)",id, name, datasetType,)if err != nil {t.Fatalf("failed to insert dataset: %v", err)}}// InsertTestLocation inserts a location row.func InsertTestLocation(t *testing.T, database *sql.DB, id, datasetID, name string) {t.Helper()_, err := database.Exec(`INSERT INTO location (id, dataset_id, name, latitude, longitude, timezone_id, active)VALUES (?, ?, ?, -36.8485, 174.7633, 'Pacific/Auckland', true)`,id, datasetID, name,)if err != nil {t.Fatalf("failed to insert location: %v", err)}}// InsertTestCluster inserts a cluster row.func InsertTestCluster(t *testing.T, database *sql.DB, id, datasetID, locationID, name string) {t.Helper()_, err := database.Exec(`INSERT INTO cluster (id, dataset_id, location_id, name, sample_rate, active)VALUES (?, ?, ?, ?, 48000, true)`,id, datasetID, locationID, name,)if err != nil {t.Fatalf("failed to insert cluster: %v", err)}}// InsertTestFile inserts a file row.func InsertTestFile(t *testing.T, database *sql.DB, id, hash, locationID string) {t.Helper()_, err := database.Exec(`INSERT INTO file (id, file_name, xxh64_hash, location_id, timestamp_local, duration, sample_rate, active)VALUES (?, 'test.wav', ?, ?, CURRENT_TIMESTAMP, 1.0, 48000, true)`,id, hash, locationID,)if err != nil {t.Fatalf("failed to insert file: %v", err)}}// InsertTestSpecies inserts a species row.func InsertTestSpecies(t *testing.T, database *sql.DB, id, label string) {t.Helper()_, err := database.Exec("INSERT INTO species (id, label, active) VALUES (?, ?, true)",id, label,)if err != nil {t.Fatalf("failed to insert species: %v", err)}}// InsertTestCallType inserts a call_type row.func InsertTestCallType(t *testing.T, database *sql.DB, id, speciesID, label string) {t.Helper()_, err := database.Exec("INSERT INTO call_type (id, species_id, label, active) VALUES (?, ?, ?, true)",id, speciesID, label,)if err != nil {t.Fatalf("failed to insert call_type: %v", err)}}// InsertTestPattern inserts a cyclic_recording_pattern row.func InsertTestPattern(t *testing.T, database *sql.DB, id string, recordS, sleepS int) {t.Helper()_, err := database.Exec("INSERT INTO cyclic_recording_pattern (id, record_s, sleep_s, active) VALUES (?, ?, ?, true)",id, recordS, sleepS,)if err != nil {t.Fatalf("failed to insert pattern: %v", err)}}
// setupInvariantsTestDB creates an in-memory database with the full schemafunc setupInvariantsTestDB(t *testing.T) *sql.DB {t.Helper()db, err := sql.Open("duckdb", ":memory:")if err != nil {t.Fatalf("failed to open database: %v", err)}schema, err := ReadSchemaSQL()if err != nil {t.Fatalf("failed to read schema: %v", err)}_, err = db.Exec(schema)if err != nil {t.Fatalf("failed to create schema: %v", err)}return db}
// insertDataset creates a test dataset and returns its IDfunc insertDataset(t *testing.T, db *sql.DB, id, name string) {t.Helper()_, err := db.Exec("INSERT INTO dataset (id, name, type, active) VALUES (?, ?, 'structured', true)",id, name,)if err != nil {t.Fatalf("failed to insert dataset: %v", err)}}// insertLocation creates a test location and returns its IDfunc insertLocation(t *testing.T, db *sql.DB, id, datasetID, name string) {t.Helper()_, err := db.Exec(`INSERT INTO location (id, dataset_id, name, latitude, longitude, timezone_id, active)VALUES (?, ?, ?, -36.8485, 174.7633, 'Pacific/Auckland', true)`,id, datasetID, name,)if err != nil {t.Fatalf("failed to insert location: %v", err)}}// insertCluster creates a test clusterfunc insertCluster(t *testing.T, db *sql.DB, id, datasetID, locationID, name string) {t.Helper()_, err := db.Exec(`INSERT INTO cluster (id, dataset_id, location_id, name, sample_rate, active)VALUES (?, ?, ?, ?, 48000, true)`,id, datasetID, locationID, name,)if err != nil {t.Fatalf("failed to insert cluster: %v", err)}}// insertFile creates a test filefunc insertFile(t *testing.T, db *sql.DB, id, hash, locationID string) {t.Helper()_, err := db.Exec(`INSERT INTO file (id, file_name, xxh64_hash, location_id, timestamp_local, duration, sample_rate, active)VALUES (?, 'test.wav', ?, ?, CURRENT_TIMESTAMP, 1.0, 48000, true)`,id, hash, locationID,)if err != nil {t.Fatalf("failed to insert file: %v", err)}}
insertDataset(t, db, "ds_test12345", "Test Dataset")insertLocation(t, db, "loc_test1234", "ds_test12345", "Test Location")insertCluster(t, db, "clustest1234", "ds_test12345", "loc_test1234", "Test Cluster")
InsertTestDataset(t, db, "ds_test12345", "Test Dataset")InsertTestLocation(t, db, "loc_test1234", "ds_test12345", "Test Location")InsertTestCluster(t, db, "clustest1234", "ds_test12345", "loc_test1234", "Test Cluster")
})t.Run("location with deleted dataset rejected", func(t *testing.T) {// Create and then soft-delete a datasetinsertDataset(t, db, "ds_del_temp_01", "To Be Deleted")_, err := db.Exec("UPDATE dataset SET active = false WHERE id = 'ds_del_temp_01'")if err != nil {t.Fatalf("failed to deactivate dataset: %v", err)}// Try to create location pointing to inactive dataset_, err = db.Exec(`INSERT INTO location (id, dataset_id, name, latitude, longitude, timezone_id, active)VALUES ('loc_inact_ds01', 'ds_del_temp_01', 'Inactive DS Location', -36.8485, 174.7633, 'Pacific/Auckland', true)`,)// Note: FK constraint may still allow this depending on implementation// This test documents the current behaviort.Logf("Insert location to inactive dataset: err=%v", err)
// Setup: create two separate dataset hierarchiesinsertDataset(t, db, "ds_cluster_t01", "Cluster Test Dataset 1")insertDataset(t, db, "ds_cluster_t02", "Cluster Test Dataset 2")insertLocation(t, db, "loc_clust_t001", "ds_cluster_t01", "Location in DS1")insertLocation(t, db, "loc_clust_t002", "ds_cluster_t02", "Location in DS2")
InsertTestDataset(t, db, "ds_cluster_t01", "Cluster Test Dataset 1")InsertTestDataset(t, db, "ds_cluster_t02", "Cluster Test Dataset 2")InsertTestLocation(t, db, "loc_clust_t001", "ds_cluster_t01", "Location in DS1")InsertTestLocation(t, db, "loc_clust_t002", "ds_cluster_t02", "Location in DS2")
t.Run("cluster with mismatched dataset and location rejected", func(t *testing.T) {// Attempt: cluster.dataset_id = ds1, but cluster.location_id = location from ds2_, err := db.Exec(`INSERT INTO cluster (id, dataset_id, location_id, name, sample_rate, active)VALUES ('cl_mismatch001', 'ds_cluster_t01', 'loc_clust_t002', 'Mismatched Cluster', 48000, true)`,)// This tests the business logic invariant from the spec// The schema allows this via FKs, but the application should reject it// If the schema doesn't prevent this, the test documents the gapt.Logf("Mismatched dataset/location: err=%v", err)})
// Build complete hierarchyinsertDataset(t, db, "ds_hier_test01", "Hierarchy Test")insertLocation(t, db, "loc_hier_test1", "ds_hier_test01", "Hier Location")insertCluster(t, db, "cl_hier_test01", "ds_hier_test01", "loc_hier_test1", "Hier Cluster")
InsertTestDataset(t, db, "ds_hier_test01", "Hierarchy Test")InsertTestLocation(t, db, "loc_hier_test1", "ds_hier_test01", "Hier Location")InsertTestCluster(t, db, "cl_hier_test01", "ds_hier_test01", "loc_hier_test1", "Hier Cluster")
`INSERT INTO file (id, file_name, xxh64_hash, location_id, cluster_id, timestamp_local, duration, sample_rate, active)
`INSERT INTO file (id, file_name, xxh64_hash, location_id, cluster_id, timestamp_local, duration, sample_rate, active)
`INSERT INTO file (id, file_name, xxh64_hash, location_id, cluster_id, timestamp_local, duration, sample_rate, active)
`INSERT INTO file (id, file_name, xxh64_hash, location_id, cluster_id, timestamp_local, duration, sample_rate, active)
## [2026-05-13] Test coverage push: Priority 1 + 2 (37.9% → 41.8%)### Priority 1: Pure function tests (no dependencies)- `db/types_json_test.go`: 5× MarshalJSON + JSONTime + jt + DatasetType constants (0% → 100%).- `utils/location_test.go`: `ParseLocation` — valid, invalid, whitespace, edge cases (0% → 100%).- `utils/placeholders_test.go`: `Placeholders` (0% → 100%).- `utils/validation_optional_test.go`: `ValidateOptionalShortID`, `ValidateOptionalStringLength` (0% → 100%).- `db/utils_test.go`: `Placeholders` delegation (0% → 100%).- `db/resolve_test.go`: `ResolveDBPath` (0% → 100%).- `tools/calls/isnight_test.go`: `String()`, `sunTimeUTC()` (0% → 100%/89%).- `tools/calls/calls_summarise_test.go`: `updateStatsFromLabels` delegation (0% → 100%).### 3c: Shared test helper- `db/testdb.go`: Extracted `SetupTestDB`, `InsertTestDataset`, `InsertTestDatasetWithType`, `InsertTestLocation`, `InsertTestCluster`, `InsertTestFile`, `InsertTestSpecies`, `InsertTestCallType`, `InsertTestPattern` from `invariants_test.go`. Available across `db/` test files.- Refactored `db/invariants_test.go` to use shared helpers.### 2a: db/validation (0% → ~88%)- `db/validation_test.go`: All 11 validation functions tested against in-memory DuckDB with full schema: `GetDatasetType`, `ValidateDatasetTypeForImport`, `ValidateDatasetTypeUnstructured`, `ValidateLocationBelongsToDataset`, `DatasetExistsAndActive`, `LocationBelongsToDataset`, `ClusterExistsAndActive`, `PatternExistsAndActive`, `LocationExistsAndActive`, `ValidateDatasetTypeForExport`, `ClusterBelongsToLocation`. Each tested for valid, inactive, nonexistent, and mismatched cases.### 2b: Mapping DB validation (0% → ~93%)- `utils/mapping_db_test.go`: `collectMappedLabels`, `collectUnmappedCalltypes`, `validateMappedSpecies`, `validateMappedCalltypes`, `ValidateMappingAgainstDB` tested against in-memory DuckDB with species/call_type tables. Covers valid mapping, missing species, missing DB species, inactive species, sentinel exclusion, missing calltypes.### 2d: Clip filter extraction (0% → 100% for tested functions)- `tools/calls/calls_clip_test.go`: `filterSegments`, `validateClipInput`, `accumulateFileResult` — pure logic tests.### 2c: Export extraction (0% → 83-100% for tested functions)- `tools/export_test.go`: `orderByFKDependency`, `buildOwnedViaQuery`, `buildCountOwnedViaQuery`, `checkOutputFile`, `datasetTables` manifest completeness and relation validation.### Coverage summary