NQPVZ3PPQG6EPTTAEHXOXXGK27HZCISHZCOZU6K6RKWTRTOHMY6QC HCOBJB6WGQ5VUJFRFTZKUE3IDGEONLENOMQ3LXZZ2YMPJ2BJRJVQC M34GDDTWJ5E2N2SMNO5BENI5T6IWZNDZ23ATHGBAXUGXOK2EQQ4AC KZKLAINJJWZ64T5MUZT34LJVQIKBTKZ6EJGD7C7TTSSDGCHEDPMAC ZKLAOPURUGKKG4KC7C5NEQ5WSZSFTZM7SCV7PIYJMWN4UKI7UI3QC VU3KBTQ6AFJV36WVQ4A7BM7Q3MLJQX4DBCGMIZJXNPMEKLIBGHZAC ZCCQ4P5T2AMJAPBDWZVHXIUKLI5U2E5GNDXRCWXEOJQRWPSJJFEQC FCCJNYCVGOW6WVHYUUQ3RHHKB4QU3S4LU4AINB4VKWBOXLCPXILQC Q4JPMGETIYLDXTXX3FCR6MYIZDHSZRL35UB2GJGAAAQN3CMRD2WAC LBWQJEDHCNUNMEJWXILGBGYZUKQI7CDAMH2BD44HULM77SVH5UYQC LQLC7S3ADBR4O2JYVUSQJD65U3HG4ADOQBGB4F7KQCXUMNKMNEKAC 2HAQZPV377VV26SMPSXSZR6CL7SS2GTNPR5COIAPN47NLJILRQGAC 2P27XV3DGJCRA4SNJENCJYZLPR2XWZMTY7CGYYSJOY4UMDVVO25AC JZRF7OBJNERB4NIB37RSAF3ZK2A4RBWSCFV5OCRXZYVGPSNOWKTAC WKQ7LFTPDGWTPJKRWB6DH5PUCX2HF34UCGJDIPYC5PTDX4MCZJXAC RUVJ3V4N5V4Z3HSH2YYESKQF5G7RIHBFB5TLV2IPDWXSGJDRD54AC QVIGQOQZIEXLFMMAA7RTL7MQWI4MC3CH22R6YO6J7LGLHWLCSD4AC AVQ66WO4R4KVXAVP4YPEF65CPHJJY55H7ZOVPZ2BHFMGEBTWRUQQC 3DVPQOKB6BX63XSBIYYCPWBL2RBG3LXZS3XPQBANJP2FWVRAOVZQC TSOJUMHVLPASHBAVCTUK6WSGZOSBDZIC47FYILGQ2QAU7Z4BUZMAC ZDZDASRTTRPJRIBAMNO3TB533XFELVYJQQGAMA3WOQYY35SVAQUQC SJN7IKIVTAZX3ACEWPLFVUT7P2TLB3RQBD4PKC6PEQQ33ECXFJRQC IFLKNMMP2NMJG46W3MKRLCUSPAW73N7QSDXQLJBAWPYNVEQP6KXQC VNFPBXF7OPUPHHDUE6I3VAOOQGSTVGMCZKWVG44ZGO6FN6JVFGIQC 2Y5U3QPUBMTMBF6VKUFIYE22FXWKCQN4ODTCQ7T5QXE5ZVP2Z7NQC JAT3DXOLENZZGXE2NYFF3TVQAQIXMMNYO234ETKQGC2CRHJVZERQC I4CMOMXFJ3Y4AY5LPA7MDLWVHJ674IRFYLXCEXCC5ZARLCWSKCAAC package utilsimport ("database/sql""testing")// TestMutator_InterfaceCompliance verifies that *sql.Tx satisfies Mutator.// The *db.LoggedTx check is in db/tx_logger_test.go.func TestMutator_InterfaceCompliance(t *testing.T) {// *sql.Tx must satisfy Mutator (compile-time check is in mutator.go)var _ Mutator = (*sql.Tx)(nil)}// TestMutator_InterfaceMethods verifies the Mutator interface has the expected method set.func TestMutator_InterfaceMethods(t *testing.T) {// Ensure the interface is non-empty and has the right methodsvar m Mutator = nil // will be nil, but confirms the type exists_ = m}
package utilsimport ("context""database/sql")// Mutator represents a transaction-like object that supports both reads and writes.// Both *sql.Tx and *db.LoggedTx satisfy this interface.//// Use Mutator instead of *sql.Tx when the caller needs mutation logging.// This avoids the import cycle that would result from utils importing db.type Mutator interface {ExecContext(ctx context.Context, query string, args ...any) (sql.Result, error)QueryRow(query string, args ...any) *sql.Row}// Compile-time interface compliance checks.// These ensure that both *sql.Tx and *db.LoggedTx satisfy Mutator.// Note: *db.LoggedTx check is in db/tx_logger.go to avoid import cycle.var _ Mutator = (*sql.Tx)(nil)
package utilsimport ("database/sql""slices""strings""testing"_ "github.com/duckdb/duckdb-go/v2")// setupMappingTestDB creates an in-memory DB with schema + test species/calltypes.// Species: Kiwi (sp_kiwi000000), Roroa (sp_roroa00000)// Calltypes: Kiwi/song (ct_kiwi000001), Kiwi/duet (ct_kiwi000002), Roroa/brrr (ct_roroa00001)func setupMappingTestDB(t *testing.T) *sql.DB {t.Helper()db, err := sql.Open("duckdb", ":memory:")if err != nil {t.Fatalf("open: %v", err)}// Create minimal tables needed by mapping validation queriesmustExecMapping(t, db, `CREATE TABLE species (id VARCHAR(12) PRIMARY KEY,label VARCHAR(100) UNIQUE NOT NULL,active BOOLEAN DEFAULT TRUE)`)mustExecMapping(t, db, `CREATE TABLE call_type (id VARCHAR(12) PRIMARY KEY,species_id VARCHAR(12) NOT NULL,label VARCHAR(100) NOT NULL,active BOOLEAN DEFAULT TRUE)`)// Insert test speciesmustExecMapping(t, db, "INSERT INTO species (id, label, active) VALUES ('sp_kiwi000000', 'Kiwi', true)")mustExecMapping(t, db, "INSERT INTO species (id, label, active) VALUES ('sp_roroa00000', 'Roroa', true)")mustExecMapping(t, db, "INSERT INTO species (id, label, active) VALUES ('sp_tui0000000', 'Tui', false)") // inactive// Insert test calltypesmustExecMapping(t, db, "INSERT INTO call_type (id, species_id, label, active) VALUES ('ct_kiwi000001', 'sp_kiwi000000', 'song', true)")mustExecMapping(t, db, "INSERT INTO call_type (id, species_id, label, active) VALUES ('ct_kiwi000002', 'sp_kiwi000000', 'duet', true)")mustExecMapping(t, db, "INSERT INTO call_type (id, species_id, label, active) VALUES ('ct_roroa00001', 'sp_roroa00000', 'brrr', true)")return db}func mustExecMapping(t *testing.T, db *sql.DB, query string) {t.Helper()if _, err := db.Exec(query); err != nil {t.Fatalf("exec: %v", err)}}// --- collectMappedLabels ---func TestCollectMappedLabels(t *testing.T) {mapping := MappingFile{"GSK": {Species: "Roroa", Calltypes: map[string]string{"brrr": "brrr"}},"K-M": {Species: "Kiwi"},"noise": {Species: MappingNegative},}dataCalltypes := map[string]map[string]bool{"GSK": {"brrr": true},"K-M": {"song": true, "duet": true},}speciesSet, calltypes := collectMappedLabels(mapping, dataCalltypes)if !speciesSet["Roroa"] || !speciesSet["Kiwi"] {t.Errorf("speciesSet=%v, want Kiwi and Roroa", speciesSet)}if speciesSet[MappingNegative] {t.Error("sentinel species should be excluded")}// Roroa has explicit calltype mappingif calltypes["Roroa"]["brrr"] != "brrr" {t.Errorf("Roroa calltypes=%v", calltypes["Roroa"])}// Kiwi has no calltype mapping, so data calltypes pass throughif calltypes["Kiwi"]["song"] != "song" || calltypes["Kiwi"]["duet"] != "duet" {t.Errorf("Kiwi calltypes=%v", calltypes["Kiwi"])}}// --- collectUnmappedCalltypes ---func TestCollectUnmappedCalltypes(t *testing.T) {mapping := MappingFile{"GSK": {Species: "Roroa", Calltypes: map[string]string{"Male": "brrr"}},}dataCalltypes := map[string]map[string]bool{"GSK": {"Male": true, "Female": true},}mappedCalltypes := make(map[string]map[string]string)collectUnmappedCalltypes(mapping, dataCalltypes, mappedCalltypes)// Male maps to brrrif mappedCalltypes["Roroa"]["brrr"] != "Male" {t.Errorf("mapped Male->brrr: %v", mappedCalltypes["Roroa"])}// Female has no mapping entry, passes through as-isif mappedCalltypes["Roroa"]["Female"] != "Female" {t.Errorf("unmapped Female passthrough: %v", mappedCalltypes["Roroa"])}}// --- validateMappedSpecies ---func TestValidateMappedSpecies(t *testing.T) {db := setupMappingTestDB(t)defer db.Close()t.Run("all species exist in DB", func(t *testing.T) {result := &MappingValidationResult{MissingDBSpecies: make([]string, 0)}err := validateMappedSpecies(db, map[string]bool{"Kiwi": true, "Roroa": true}, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingDBSpecies) > 0 {t.Errorf("missing species: %v", result.MissingDBSpecies)}})t.Run("species not in DB reported", func(t *testing.T) {result := &MappingValidationResult{MissingDBSpecies: make([]string, 0)}err := validateMappedSpecies(db, map[string]bool{"Phantom": true}, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingDBSpecies) != 1 || result.MissingDBSpecies[0] != "Phantom" {t.Errorf("expected [Phantom], got %v", result.MissingDBSpecies)}})t.Run("inactive species not found", func(t *testing.T) {result := &MappingValidationResult{MissingDBSpecies: make([]string, 0)}err := validateMappedSpecies(db, map[string]bool{"Tui": true}, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingDBSpecies) != 1 {t.Errorf("inactive species should be missing, got %v", result.MissingDBSpecies)}})t.Run("empty set is no-op", func(t *testing.T) {result := &MappingValidationResult{MissingDBSpecies: make([]string, 0)}err := validateMappedSpecies(db, map[string]bool{}, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingDBSpecies) != 0 {t.Errorf("expected no missing, got %v", result.MissingDBSpecies)}})}// --- validateMappedCalltypes ---func TestValidateMappedCalltypes(t *testing.T) {db := setupMappingTestDB(t)defer db.Close()t.Run("all calltypes exist", func(t *testing.T) {result := &MappingValidationResult{MissingCalltypes: make(map[string]string)}ctMap := map[string]map[string]string{"Kiwi": {"song": "data-song", "duet": "data-duet"},}err := validateMappedCalltypes(db, ctMap, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingCalltypes) > 0 {t.Errorf("missing calltypes: %v", result.MissingCalltypes)}})t.Run("missing calltype reported", func(t *testing.T) {result := &MappingValidationResult{MissingCalltypes: make(map[string]string)}ctMap := map[string]map[string]string{"Kiwi": {"phantom": "data-phantom"},}err := validateMappedCalltypes(db, ctMap, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingCalltypes) != 1 {t.Errorf("expected 1 missing, got %v", result.MissingCalltypes)}})t.Run("empty calltype map skips species", func(t *testing.T) {result := &MappingValidationResult{MissingCalltypes: make(map[string]string)}ctMap := map[string]map[string]string{"Kiwi": {},}err := validateMappedCalltypes(db, ctMap, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingCalltypes) != 0 {t.Errorf("expected none missing, got %v", result.MissingCalltypes)}})}// --- ValidateMappingAgainstDB (integration of all above) ---func TestValidateMappingAgainstDB(t *testing.T) {db := setupMappingTestDB(t)defer db.Close()}for _, tt := range tests {t.Run(tt.name, func(t *testing.T) {result, err := ValidateMappingAgainstDB(db, tt.mapping, tt.dataSpecies, tt.dataCT)if err != nil {t.Fatalf("unexpected error: %v", err)}if result.HasErrors() != tt.hasErrors {t.Errorf("HasErrors()=%v, want %v", result.HasErrors(), tt.hasErrors)}assertStringSlice(t, "MissingSpecies", result.MissingSpecies, tt.missingSpecies)assertStringSlice(t, "MissingDBSpecies", result.MissingDBSpecies, tt.missingDBSpecies)if tt.missingCalltypeCT != "" && len(result.MissingCalltypes) == 0 {t.Error("expected missing calltype")}if tt.errorContains != "" && !strings.Contains(result.Error(), tt.errorContains) {t.Errorf("error should contain %q: %s", tt.errorContains, result.Error())}})}tests := []struct {name stringmapping MappingFiledataSpecies map[string]booldataCT map[string]map[string]boolhasErrors boolmissingSpecies []stringmissingDBSpecies []stringmissingCalltypeCT string // substring expected in MissingCalltypes keyerrorContains string // substring expected in result.Error()}{{name: "valid mapping - no errors",mapping: MappingFile{"GSK": {Species: "Roroa", Calltypes: map[string]string{"brrr": "brrr"}},"K-M": {Species: "Kiwi"},},dataSpecies: map[string]bool{"GSK": true, "K-M": true},dataCT: map[string]map[string]bool{"GSK": {"brrr": true}, "K-M": {"song": true}},},{name: "missing species in mapping",mapping: MappingFile{"GSK": {Species: "Roroa"}},dataSpecies: map[string]bool{"GSK": true, "K-M": true},hasErrors: true,missingSpecies: []string{"K-M"},},{name: "mapped species not in DB",mapping: MappingFile{"PHANTOM": {Species: "Phantom"}},dataSpecies: map[string]bool{"PHANTOM": true},hasErrors: true,missingDBSpecies: []string{"Phantom"},},{name: "sentinel species excluded from DB check",mapping: MappingFile{"noise": {Species: MappingNegative}, "ignore": {Species: MappingIgnore}},dataSpecies: map[string]bool{"noise": true, "ignore": true},},{name: "missing calltype in DB",mapping: MappingFile{"K-M": {Species: "Kiwi", Calltypes: map[string]string{"song": "song", "phantom": "phantom"}},},dataSpecies: map[string]bool{"K-M": true},dataCT: map[string]map[string]bool{"K-M": {"song": true, "phantom": true}},hasErrors: true,missingCalltypeCT: "phantom",errorContains: "phantom",},}}// assertStringSlice checks that got matches want (order-insensitive).func assertStringSlice(t *testing.T, label string, got, want []string) {t.Helper()if len(want) == 0 && len(got) == 0 {return}if len(got) != len(want) {t.Errorf("%s: got %v, want %v", label, got, want)return}for _, w := range want {found := slices.Contains(got, w)if !found {t.Errorf("%s: missing %q in %v", label, w, got)}}
package utilsimport ("database/sql""testing")func TestCheckDuplicateHash_NoRows(t *testing.T) {db := openTestDB(t)defer db.Close()// No rows exist — should return not-duplicateid, dup, err := CheckDuplicateHash(db, "abcdef0123456789")if err != nil {t.Fatalf("unexpected error: %v", err)}if dup {t.Error("expected isDuplicate=false when no rows")}if id != "" {t.Errorf("expected empty id, got %q", id)}}func TestCheckDuplicateHash_FoundDuplicate(t *testing.T) {db := openTestDB(t)defer db.Close()// Insert a file with known hashhash := "deadbeef12345678"fileID := "test_file_id_123"_, err := db.Exec(`INSERT INTO file (id, path, dataset_id, xxh64_hash, active)VALUES (?, '/test/file.wav', 'ds1', ?, true)`, fileID, hash)if err != nil {t.Fatalf("insert: %v", err)}id, dup, err := CheckDuplicateHash(db, hash)if err != nil {t.Fatalf("unexpected error: %v", err)}if !dup {t.Error("expected isDuplicate=true")}if id != fileID {t.Errorf("expected id=%q, got %q", fileID, id)}}func TestCheckDuplicateHash_InactiveNotDuplicate(t *testing.T) {db := openTestDB(t)defer db.Close()// Insert an INACTIVE file with known hashhash := "cafebeef12345678"fileID := "inactive_file_id"_, err := db.Exec(`INSERT INTO file (id, path, dataset_id, xxh64_hash, active)VALUES (?, '/test/old.wav', 'ds1', ?, false)`, fileID, hash)if err != nil {t.Fatalf("insert: %v", err)}// Inactive files should NOT be considered duplicatesid, dup, err := CheckDuplicateHash(db, hash)if err != nil {t.Fatalf("unexpected error: %v", err)}if dup {t.Error("expected isDuplicate=false for inactive file")}if id != "" {t.Errorf("expected empty id, got %q", id)}}func TestCheckDuplicateHash_DifferentHashNoDuplicate(t *testing.T) {db := openTestDB(t)defer db.Close()// Insert file with hash A_, err := db.Exec(`INSERT INTO file (id, path, dataset_id, xxh64_hash, active)VALUES ('id1', '/test/a.wav', 'ds1', 'hash_aaaa', true)`)if err != nil {t.Fatalf("insert: %v", err)}// Query for hash B — no duplicateid, dup, err := CheckDuplicateHash(db, "hash_bbbb")if err != nil {t.Fatalf("unexpected error: %v", err)}if dup {t.Error("expected isDuplicate=false for different hash")}if id != "" {t.Errorf("expected empty id, got %q", id)}}// openTestDB creates a DuckDB in-memory database with the minimal schema// needed for the file table.func openTestDB(t *testing.T) *sql.DB {t.Helper()db, err := sql.Open("duckdb", "")if err != nil {t.Fatalf("open duckdb: %v", err)}_, err = db.Exec(`CREATE TABLE file (id VARCHAR PRIMARY KEY,path VARCHAR,dataset_id VARCHAR,xxh64_hash VARCHAR,active BOOLEAN DEFAULT true)`)if err != nil {db.Close()t.Fatalf("create table: %v", err)}return db}_ "github.com/duckdb/duckdb-go/v2"
package utilsimport ("os""path/filepath""testing")func TestLoadMappingFile(t *testing.T) {t.Run("valid mapping", func(t *testing.T) {content := `{"GSK": {"species": "Roroa", "calltypes": {"Male": "Male - Solo"}},"Don't Know": {"species": "Don't Know"}}`path := createTempFile(t, content)defer os.Remove(path)mapping, err := LoadMappingFile(path)if err != nil {t.Fatalf("expected no error, got: %v", err)}if len(mapping) != 2 {t.Errorf("expected 2 entries, got %d", len(mapping))}if mapping["GSK"].Species != "Roroa" {t.Errorf("expected GSK -> Roroa, got %s", mapping["GSK"].Species)}if mapping["GSK"].Calltypes["Male"] != "Male - Solo" {t.Errorf("expected GSK Male -> Male - Solo, got %s", mapping["GSK"].Calltypes["Male"])}})t.Run("invalid JSON", func(t *testing.T) {content := `{invalid json}`path := createTempFile(t, content)defer os.Remove(path)_, err := LoadMappingFile(path)if err == nil {t.Fatal("expected error for invalid JSON")}})t.Run("empty file", func(t *testing.T) {content := `{}`path := createTempFile(t, content)defer os.Remove(path)_, err := LoadMappingFile(path)if err == nil {t.Fatal("expected error for empty mapping")}})t.Run("missing species field", func(t *testing.T) {content := `{"GSK": {"calltypes": {"Male": "Male - Solo"}}}`path := createTempFile(t, content)defer os.Remove(path)_, err := LoadMappingFile(path)if err == nil {t.Fatal("expected error for missing species field")}})t.Run("empty species field", func(t *testing.T) {content := `{"GSK": {"species": ""}}`path := createTempFile(t, content)defer os.Remove(path)_, err := LoadMappingFile(path)if err == nil {t.Fatal("expected error for empty species field")}})t.Run("nonexistent file", func(t *testing.T) {_, err := LoadMappingFile("/nonexistent/path/mapping.json")if err == nil {t.Fatal("expected error for nonexistent file")}})}func TestGetDBSpecies(t *testing.T) {mapping := MappingFile{"GSK": {Species: "Roroa"},"K-M": {Species: "Kiwi"},}t.Run("found", func(t *testing.T) {species, ok := mapping.GetDBSpecies("GSK")if !ok {t.Fatal("expected to find GSK")}if species != "Roroa" {t.Errorf("expected Roroa, got %s", species)}})t.Run("not found", func(t *testing.T) {_, ok := mapping.GetDBSpecies("UNKNOWN")if ok {t.Fatal("expected not to find UNKNOWN")}})}func TestGetDBCalltype(t *testing.T) {mapping := MappingFile{"GSK": {Species: "Roroa",Calltypes: map[string]string{"Male": "Male - Solo","Female": "Female - Solo",},},"K-M": {Species: "Kiwi"}, // no calltype mapping}t.Run("with mapping", func(t *testing.T) {ct := mapping.GetDBCalltype("GSK", "Male")if ct != "Male - Solo" {t.Errorf("expected 'Male - Solo', got %s", ct)}})t.Run("without mapping - passthrough", func(t *testing.T) {ct := mapping.GetDBCalltype("GSK", "Unknown")if ct != "Unknown" {t.Errorf("expected passthrough 'Unknown', got %s", ct)}})t.Run("species not in mapping - passthrough", func(t *testing.T) {ct := mapping.GetDBCalltype("UNKNOWN", "Male")if ct != "Male" {t.Errorf("expected passthrough 'Male', got %s", ct)}})t.Run("species without calltypes - passthrough", func(t *testing.T) {ct := mapping.GetDBCalltype("K-M", "Male")if ct != "Male" {t.Errorf("expected passthrough 'Male', got %s", ct)}})}func TestMappingValidationResult(t *testing.T) {t.Run("HasErrors - no errors", func(t *testing.T) {r := MappingValidationResult{}if r.HasErrors() {t.Error("expected no errors")}})t.Run("HasErrors - missing species", func(t *testing.T) {r := MappingValidationResult{MissingSpecies: []string{"UNKNOWN"}}if !r.HasErrors() {t.Error("expected errors")}})t.Run("HasErrors - missing DB species", func(t *testing.T) {r := MappingValidationResult{MissingDBSpecies: []string{"Phantom"}}if !r.HasErrors() {t.Error("expected errors")}})t.Run("HasErrors - missing calltypes", func(t *testing.T) {r := MappingValidationResult{MissingCalltypes: map[string]string{"GSK/Male": "Roroa/Male - Solo"}}if !r.HasErrors() {t.Error("expected errors")}})t.Run("Error - all error types", func(t *testing.T) {r := MappingValidationResult{MissingSpecies: []string{"UNKNOWN"},MissingDBSpecies: []string{"Phantom"},MissingCalltypes: map[string]string{"GSK/Male": "Roroa/Male - Solo"},}errStr := r.Error()if errStr == "" {t.Error("expected non-empty error string")}// Check all parts are presentif !containsSubstring(errStr, "UNKNOWN") {t.Error("error string should contain MISSING species")}if !containsSubstring(errStr, "Phantom") {t.Error("error string should contain missing DB species")}if !containsSubstring(errStr, "GSK/Male") {t.Error("error string should contain missing calltype")}})}// Helper functionsfunc createTempFile(t *testing.T, content string) string {t.Helper()tmpDir := t.TempDir()path := filepath.Join(tmpDir, "mapping.json")if err := os.WriteFile(path, []byte(content), 0644); err != nil {t.Fatalf("failed to create temp file: %v", err)}return path}func containsSubstring(s, substr string) bool {return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsSubstringHelper(s, substr))}func containsSubstringHelper(s, substr string) bool {for i := 0; i <= len(s)-len(substr); i++ {if s[i:i+len(substr)] == substr {return true}}return false}func TestMappingClassify(t *testing.T) {m := MappingFile{"noise": {Species: MappingNegative},"ignore": {Species: MappingIgnore},"kiwi": {Species: "Kiwi"},}c, k, ok := m.Classify("noise")if !ok || k != MappingNeg || c != "" {t.Error("failed classify negative")}c, k, ok = m.Classify("ignore")if !ok || k != MappingIgn || c != "" {t.Error("failed classify ignore")}c, k, ok = m.Classify("kiwi")if !ok || k != MappingReal || c != "Kiwi" {t.Error("failed classify real")}_, _, ok = m.Classify("missing")if ok {t.Error("expected missing to be not ok")}}func TestMappingValidateCoversSpecies(t *testing.T) {m := MappingFile{"kiwi": {Species: "Kiwi"}}missing := m.ValidateCoversSpecies(map[string]bool{"kiwi": true, "tui": true})if len(missing) != 1 || missing[0] != "tui" {t.Errorf("expected [tui], got %v", missing)}}func TestMappingClasses(t *testing.T) {m := MappingFile{"noise": {Species: MappingNegative},"kiwi": {Species: "Kiwi"},"tui": {Species: "Tui"},"duplicate": {Species: "Kiwi"},}classes := m.Classes()if len(classes) != 2 || classes[0] != "Kiwi" || classes[1] != "Tui" {t.Errorf("expected [Kiwi, Tui], got %v", classes)}}
package utilsimport ("encoding/json""fmt""os""sort""strings")// SpeciesMapping maps .data species/calltype names to DB labelstype SpeciesMapping struct {Species string `json:"species"`Calltypes map[string]string `json:"calltypes,omitempty"`}// MappingFile represents the complete mapping file structure// Key is the .data file species nametype MappingFile map[string]SpeciesMapping// LoadMappingFile loads and parses a mapping JSON filefunc LoadMappingFile(path string) (MappingFile, error) {data, err := os.ReadFile(path)if err != nil {return nil, fmt.Errorf("failed to read mapping file: %w", err)}var mapping MappingFileif err := json.Unmarshal(data, &mapping); err != nil {return nil, fmt.Errorf("failed to parse mapping JSON: %w", err)}// Validate non-emptyif len(mapping) == 0 {return nil, fmt.Errorf("mapping file is empty")}// Validate each entry has speciesfor dataSpecies, sm := range mapping {if sm.Species == "" {return nil, fmt.Errorf("mapping entry '%s' has empty species field", dataSpecies)}}return mapping, nil}// MappingValidationResult contains validation errors for a mappingtype MappingValidationResult struct {MissingSpecies []string // .data species not in mappingMissingDBSpecies []string // mapped species not in DBMissingCalltypes map[string]string // "dataSpecies/dataCalltype" -> "dbSpecies/dbCalltype"}// HasErrors returns true if any validation errors existfunc (r MappingValidationResult) HasErrors() bool {return len(r.MissingSpecies) > 0 ||len(r.MissingDBSpecies) > 0 ||len(r.MissingCalltypes) > 0}// Error returns a formatted error messagefunc (r MappingValidationResult) Error() string {var parts []stringif len(r.MissingSpecies) > 0 {parts = append(parts, fmt.Sprintf("species in .data but not in mapping: [%s]",strings.Join(r.MissingSpecies, ", ")))}if len(r.MissingDBSpecies) > 0 {parts = append(parts, fmt.Sprintf("mapped species not found in DB: [%s]",strings.Join(r.MissingDBSpecies, ", ")))}if len(r.MissingCalltypes) > 0 {var ctErrors []stringfor k, v := range r.MissingCalltypes {ctErrors = append(ctErrors, fmt.Sprintf("%s->%s", k, v))}sort.Strings(ctErrors)parts = append(parts, fmt.Sprintf("calltypes not found in DB: [%s]",strings.Join(ctErrors, ", ")))}return strings.Join(parts, "; ")}// ValidateMappingAgainstDB validates that all mapped species and calltypes exist in the database// Also validates that the mapping covers all species/calltypes found in .data filesfunc ValidateMappingAgainstDB(queryer DB,mapping MappingFile,dataSpeciesSet map[string]bool,dataCalltypes map[string]map[string]bool, // species -> calltype -> true) (MappingValidationResult, error) {result := MappingValidationResult{MissingSpecies: make([]string, 0),MissingDBSpecies: make([]string, 0),MissingCalltypes: make(map[string]string),}// Check all .data species are in mappingfor species := range dataSpeciesSet {if _, exists := mapping[species]; !exists {result.MissingSpecies = append(result.MissingSpecies, species)}}sort.Strings(result.MissingSpecies)// Collect all mapped species and calltypesmappedSpeciesSet, mappedCalltypes := collectMappedLabels(mapping, dataCalltypes)// Validate species exist in DBif err := validateMappedSpecies(queryer, mappedSpeciesSet, &result); err != nil {return result, err}// Validate calltypes exist in DBif err := validateMappedCalltypes(queryer, mappedCalltypes, &result); err != nil {return result, err}return result, nil}// collectMappedLabels builds sets of mapped species and calltype labelsfor dataSpecies, ctSet := range dataCalltypes {sm, exists := mapping[dataSpecies]if !exists {}dbSpecies := sm.Speciesfor dataCT := range ctSet {dbCT := dataCTif sm.Calltypes != nil {if mapped, ok := sm.Calltypes[dataCT]; ok {dbCT = mapped}}if mappedCalltypes[dbSpecies] == nil {mappedCalltypes[dbSpecies] = make(map[string]string)}mappedCalltypes[dbSpecies][dbCT] = dataCT}}for _, sm := range mapping {if sm.Species == MappingNegative || sm.Species == MappingIgnore {continue}mappedSpeciesSet[sm.Species] = trueif len(sm.Calltypes) > 0 {if mappedCalltypes[sm.Species] == nil {mappedCalltypes[sm.Species] = make(map[string]string)}for dataCT, dbCT := range sm.Calltypes {mappedCalltypes[sm.Species][dbCT] = dataCT}}}collectUnmappedCalltypes(mapping, dataCalltypes, mappedCalltypes)return mappedSpeciesSet, mappedCalltypes}// validateMappedSpecies checks that all mapped species exist in the databasefunc validateMappedSpecies(queryer DB, mappedSpeciesSet map[string]bool, result *MappingValidationResult) error {speciesLabels := make([]string, 0, len(mappedSpeciesSet))for s := range mappedSpeciesSet {speciesLabels = append(speciesLabels, s)}sort.Strings(speciesLabels)args := make([]any, len(speciesLabels))for i, s := range speciesLabels {args[i] = s}rows, err := queryer.Query(query, args...)if err != nil {return fmt.Errorf("failed to query species: %w", err)}defer rows.Close()foundSpecies := make(map[string]bool)for rows.Next() {var label stringif err := rows.Scan(&label); err == nil {foundSpecies[label] = true}for _, s := range speciesLabels {if !foundSpecies[s] {result.MissingDBSpecies = append(result.MissingDBSpecies, s)}}// validateMappedCalltypes checks that all mapped calltypes exist in the databasefunc validateMappedCalltypes(queryer DB, mappedCalltypes map[string]map[string]string, result *MappingValidationResult) error {for dbSpecies, ctMap := range mappedCalltypes {if len(ctMap) == 0 {continue}ctLabels := make([]string, 0, len(ctMap))for dbCT := range ctMap {ctLabels = append(ctLabels, dbCT)}sort.Strings(ctLabels)query := `SELECT ct.labelFROM call_type ctJOIN species s ON ct.species_id = s.idargs := make([]any, 1+len(ctLabels))args[0] = dbSpeciesfor i, ct := range ctLabels {args[1+i] = ct}rows, err := queryer.Query(query, args...)if err != nil {return fmt.Errorf("failed to query calltypes for species %s: %w", dbSpecies, err)}defer rows.Close()foundCT := make(map[string]bool)for rows.Next() {var label stringif err := rows.Scan(&label); err == nil {foundCT[label] = true}}for dbCT, dataCT := range ctMap {if !foundCT[dbCT] {key := fmt.Sprintf("%s/%s", dbSpecies, dataCT)value := fmt.Sprintf("%s/%s", dbSpecies, dbCT)result.MissingCalltypes[key] = value}}}return nil}// GetDBSpecies returns the DB species label for a .data speciesfunc (m MappingFile) GetDBSpecies(dataSpecies string) (string, bool) {sm, exists := m[dataSpecies]if !exists {return "", false}return sm.Species, true}// GetDBCalltype returns the DB calltype label for a .data species/calltype// Returns the dataCalltype unchanged if no mapping existsfunc (m MappingFile) GetDBCalltype(dataSpecies, dataCalltype string) string {sm, exists := m[dataSpecies]if !exists || sm.Calltypes == nil {return dataCalltype}if dbCT, ok := sm.Calltypes[dataCalltype]; ok {return dbCT}return dataCalltype}// Mapping sentinels: special values for the SpeciesMapping.Species field.//// MappingNegative marks a .data species as "confirmed empty" (Noise-equivalent):// segments matching this name are treated as negative evidence — clips overlapping// them emit an all-zero row when no positive species also overlaps.//// MappingIgnore marks a .data species as "ignored entirely": segments matching// this name neither label clips nor block them.const (MappingNegative = "__NEGATIVE__"MappingIgnore = "__IGNORE__")// MappingKind describes how a .data species should be treated.type MappingKind intconst (MappingReal MappingKind = iotaMappingNegMappingIgn)// Classify returns the canonical class name and kind for a .data species.// ok is false if dataSpecies is not present in the mapping.// For MappingNeg and MappingIgn the canonical string is empty.func (m MappingFile) Classify(dataSpecies string) (canonical string, kind MappingKind, ok bool) {sm, exists := m[dataSpecies]if !exists {return "", MappingReal, false}switch sm.Species {case MappingNegative:return "", MappingNeg, truecase MappingIgnore:return "", MappingIgn, truedefault:return sm.Species, MappingReal, true}}// ValidateCoversSpecies returns the sorted list of species in speciesSet that// are missing from the mapping. Empty result means full coverage.func (m MappingFile) ValidateCoversSpecies(speciesSet map[string]bool) []string {missing := make([]string, 0)for s := range speciesSet {if _, exists := m[s]; !exists {missing = append(missing, s)}}sort.Strings(missing)return missing}// Classes returns the sorted unique non-sentinel canonical class names from the mapping.// Used to build the CSV column header for clip-labels.func (m MappingFile) Classes() []string {set := make(map[string]bool)for _, sm := range m {switch sm.Species {case MappingNegative, MappingIgnore, "":continuedefault:set[sm.Species] = true}}out := make([]string, 0, len(set))for s := range set {out = append(out, s)}sort.Strings(out)return out}WHERE s.label = ? AND ct.label IN (` + Placeholders(len(ctLabels)) + `) AND ct.active = true`return nil}}query := `SELECT label FROM species WHERE label IN (` + Placeholders(len(speciesLabels)) + `) AND active = true`if len(speciesLabels) == 0 {return nil}}func collectMappedLabels(mapping MappingFile, dataCalltypes map[string]map[string]bool) (map[string]bool, map[string]map[string]string) {mappedSpeciesSet := make(map[string]bool)mappedCalltypes := make(map[string]map[string]string)continue// collectUnmappedCalltypes adds calltypes from .data files that have no explicit// mapping entry (dataCT == dbCT by convention) to the mappedCalltypes set.func collectUnmappedCalltypes(mapping MappingFile, dataCalltypes map[string]map[string]bool, mappedCalltypes map[string]map[string]string) {
package utilsimport ("context""database/sql""fmt""os""path/filepath""time")// ClusterImportInput defines parameters for importing one clustertype ClusterImportInput struct {FolderPath string // Absolute path to folder with WAV filesDatasetID string // 12-char dataset IDLocationID string // 12-char location IDClusterID string // 12-char cluster IDRecursive bool // Scan subfolders?}// ClusterImportOutput provides results and statisticstype ClusterImportOutput struct {TotalFiles intImportedFiles intSkippedFiles int // DuplicatesFailedFiles intAudioMothFiles intTotalDuration float64ProcessingTime stringErrors []FileImportError}// LocationData holds location information needed for processingtype LocationData struct {Latitude float64Longitude float64TimezoneID string}// FileProcessingResult is used for both single-file and cluster import pipelines.//// This is the canonical cluster import logic used by both:// - import_files.go (single cluster)// - bulk_file_import.go (multiple clusters)//// Steps:// 1. Validate folder exists// 2. Get location metadata (lat/lon/timezone) from database// 3. Scan folder for WAV files (recursive or not)// 4. Batch process all files:// - Parse WAV headers (includes file mod time)// - Batch parse filename timestamps (variance-based)// - Resolve timestamps (AudioMoth → filename → file mod time)// - Calculate hashes// - Calculate astronomical data// - Check duplicates// - INSERT INTO file// - INSERT INTO file_dataset (ALWAYS)// - INSERT INTO moth_metadata (if AudioMoth)// 6. Return summary statisticsfunc ImportCluster(input ClusterImportInput,) (*ClusterImportOutput, error) {startTime := time.Now()// Validate folder existsinfo, err := os.Stat(input.FolderPath)if err != nil {return nil, fmt.Errorf("folder not accessible: %w", err)}if !info.IsDir() {return nil, fmt.Errorf("path is not a directory: %s", input.FolderPath)}// Get location data for astronomical calculationslocationData, err := GetLocationData(database, input.LocationID)if err != nil {return nil, fmt.Errorf("failed to get location data: %w", err)}// Scan folder for WAV fileswavFiles, err := FindFiles(input.FolderPath, FindFilesOptions{Extension: ".wav",Recursive: input.Recursive,SkipPrefixes: []string{"Clips_"},SkipHidden: true, // Standard to ignore hiddenMinSize: 1, // Must have size > 0})if err != nil {return nil, fmt.Errorf("failed to scan folder: %w", err)}// If no files, return earlyif len(wavFiles) == 0 {return &ClusterImportOutput{TotalFiles: 0,ProcessingTime: time.Since(startTime).String(),Errors: []FileImportError{},}, nil}// Batch process all filesfilesData, processErrors := batchProcessFiles(wavFiles, locationData)imported, skipped, insertErrors, err := insertClusterFiles(filesData,input.DatasetID,input.ClusterID,input.LocationID,)if err != nil {return nil, fmt.Errorf("database insertion failed: %w", err)}// Combine all errorsallErrors := append(processErrors, insertErrors...)// Calculate summary statisticsaudiomothCount := 0totalDuration := 0.0for _, fd := range filesData {if fd.IsAudioMoth {audiomothCount++}totalDuration += fd.Duration}return &ClusterImportOutput{TotalFiles: len(wavFiles),ImportedFiles: imported,SkippedFiles: skipped,FailedFiles: len(allErrors),AudioMothFiles: audiomothCount,TotalDuration: totalDuration,ProcessingTime: time.Since(startTime).String(),Errors: allErrors,}, nil}// GetLocationData retrieves location coordinates and timezonevar loc LocationDataerr := database.QueryRow("SELECT latitude, longitude, timezone_id FROM location WHERE id = ?",locationID,).Scan(&loc.Latitude, &loc.Longitude, &loc.TimezoneID)if err != nil {return nil, fmt.Errorf("failed to query location data: %w", err)}return &loc, nil}// Check if cluster already has a pathvar currentPath sql.NullStringerr := database.QueryRow("SELECT path FROM cluster WHERE id = ?", clusterID).Scan(¤tPath)if err != nil {return fmt.Errorf("failed to query cluster: %w", err)}// If path is already set, skipif currentPath.Valid && currentPath.String != "" {return nil}// Normalize folder pathnormalizedPath := NormalizeFolderPath(folderPath)// Update cluster with normalized path"UPDATE cluster SET path = ?, last_modified = now() WHERE id = ?",normalizedPath,clusterID,)if err != nil {return fmt.Errorf("failed to update cluster path: %w", err)}return nil}// batchProcessFiles extracts metadata and calculates hashes for all filesfunc batchProcessFiles(wavFiles []string, location *LocationData) ([]*FileProcessingResult, []FileImportError) {var filesData []*FileProcessingResultvar errors []FileImportError// Step 1: Extract WAV metadata and hash in single passwavInfos := make([]wavInfo, len(wavFiles))for i, path := range wavFiles {metadata, hash, err := ParseWAVHeaderWithHash(path)wavInfos[i] = wavInfo{path: path, metadata: metadata, hash: hash, err: err}}// Step 2: Collect filenames for batch timestamp parsingvar filenamesForParsing []stringvar filenameIndices []intfor i, info := range wavInfos {if info.err != nil {errors = append(errors, FileImportError{FileName: filepath.Base(info.path),Error: info.err.Error(),Stage: StageParse,})continue}if HasTimestampFilename(info.path) {filenamesForParsing = append(filenamesForParsing, filepath.Base(info.path))filenameIndices = append(filenameIndices, i)}}// Step 3: Parse filename timestamps in batch (if any)filenameTimestampMap := make(map[int]time.Time)if len(filenamesForParsing) > 0 {tsMap, tsErrors := parseFilenameTimestampsBatch(wavInfos, filenameIndices, filenamesForParsing, location.TimezoneID)errors = append(errors, tsErrors...)filenameTimestampMap = tsMap}// Step 4: Process each filefor i, info := range wavInfos {if info.err != nil {continue}var preParsedTime *time.Timeif ts, ok := filenameTimestampMap[i]; ok {preParsedTime = &ts}fd, err := resolveFileData(info, preParsedTime, location)if err != nil {errors = append(errors, FileImportError{FileName: filepath.Base(info.path),Error: err.Error(),Stage: StageParse,})continue}filesData = append(filesData, fd)}return filesData, errors}// insertSingleFile inserts one file's data into the database within an existing transaction.// Returns (imported=true, nil) on success, (imported=false, nil) if skipped, or (false, error) on failure.func insertSingleFile(ctx context.Context,fd *FileProcessingResult,datasetID, clusterID, locationID string,) (bool, error) {// Check for duplicate hash_, isDuplicate, err := CheckDuplicateHash(tx, fd.Hash)if err != nil {return false, fmt.Errorf("duplicate check failed: %w", err)}if isDuplicate {return false, nil // skipped}// Generate file IDfileID, err := GenerateLongID()if err != nil {return false, fmt.Errorf("ID generation failed: %w", err)}// Insert file recordfileID, fd.FileName, fd.Hash, locationID,fd.TimestampLocal, clusterID, fd.Duration, fd.SampleRate,fd.AstroData.SolarNight, fd.AstroData.CivilNight, fd.AstroData.MoonPhase,)if err != nil {return false, fmt.Errorf("file insert failed: %w", err)}// Insert file_dataset junction (ALWAYS)if err != nil {return false, fmt.Errorf("file_dataset insert failed: %w", err)}// If AudioMoth, insert moth_metadataif fd.IsAudioMoth && fd.MothData != nil {fileID,fd.MothData.Timestamp,&fd.MothData.RecorderID,&fd.MothData.Gain,&fd.MothData.BatteryV,&fd.MothData.TempC,)if err != nil {return false, fmt.Errorf("moth_metadata insert failed: %w", err)}}return true, nil}func insertClusterFiles(filesData []*FileProcessingResult,datasetID, clusterID, locationID string,) (imported, skipped int, errors []FileImportError, err error) {ctx := context.Background()for _, fd := range filesData {if insertErr != nil {errors = append(errors, FileImportError{FileName: fd.FileName,Error: insertErr.Error(),Stage: StageInsert,})continue}if wasImported {imported++} else {skipped++}}return imported, skipped, errors, nil}wasImported, insertErr := insertSingleFile(ctx, tx, fd, datasetID, clusterID, locationID)tx Mutator,// insertClusterFiles inserts all file data into database using the provided transaction.// The caller is responsible for committing or rolling back the transaction._, err = tx.ExecContext(ctx, `INSERT INTO moth_metadata (file_id, timestamp, recorder_id, gain, battery_v, temp_c,created_at, last_modified, active) VALUES (?, ?, ?, ?, ?, ?, now(), now(), true)`,_, err = tx.ExecContext(ctx, `INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified)VALUES (?, ?, now(), now())`, fileID, datasetID)_, err = tx.ExecContext(ctx, `INSERT INTO file (id, file_name, xxh64_hash, location_id, timestamp_local,cluster_id, duration, sample_rate, maybe_solar_night, maybe_civil_night,moon_phase, created_at, last_modified, active) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, now(), now(), true)`,tx Mutator,adjustedTimestamps, err := ApplyTimezoneOffset(filenameTimestamps, timezoneID)if err != nil {for _, idx := range filenameIndices {errors = append(errors, FileImportError{FileName: filepath.Base(wavInfos[idx].path),Error: fmt.Sprintf("timezone offset failed: %v", err),Stage: StageParse,})}return result, errors}for j, idx := range filenameIndices {result[idx] = adjustedTimestamps[j]}return result, errors}// resolveFileData resolves timestamp and calculates astronomical data for a single WAV file.func resolveFileData(info wavInfo, preParsedTime *time.Time, location *LocationData) (*FileProcessingResult, error) {tsResult, err := ResolveTimestamp(info.metadata, info.path, location.TimezoneID, true, preParsedTime)if err != nil {return nil, err}astroData := CalculateAstronomicalData(tsResult.Timestamp.UTC(),info.metadata.Duration,location.Latitude,location.Longitude,)return &FileProcessingResult{FileName: filepath.Base(info.path),Hash: info.hash,Duration: info.metadata.Duration,SampleRate: info.metadata.SampleRate,TimestampLocal: tsResult.Timestamp,IsAudioMoth: tsResult.IsAudioMoth,MothData: tsResult.MothData,AstroData: astroData,}, nil}// wavInfo holds WAV metadata and hash for a single file during batch processingtype wavInfo struct {path stringmetadata *WAVMetadatahash stringerr error}// parseFilenameTimestampsBatch parses filename timestamps and applies timezone offsets.// Returns a map from wavInfos index to adjusted timestamp, and any errors.func parseFilenameTimestampsBatch(wavInfos []wavInfo,filenameIndices []int,filenames []string,timezoneID string,) (map[int]time.Time, []FileImportError) {var errors []FileImportErrorresult := make(map[int]time.Time)filenameTimestamps, err := ParseFilenameTimestamps(filenames)if err != nil {for _, idx := range filenameIndices {errors = append(errors, FileImportError{FileName: filepath.Base(wavInfos[idx].path),Error: fmt.Sprintf("filename timestamp parsing failed: %v", err),Stage: StageParse,})}return result, errors}_, err = database.ExecContext(ctx,// EnsureClusterPath sets the cluster's path field if it's currently empty.// Accepts any type with QueryRow and ExecContext (e.g. *sql.DB, *sql.Tx, *db.LoggedTx).func EnsureClusterPath(database Mutator, clusterID, folderPath string) error {ctx := context.Background()func GetLocationData(database DB, locationID string) (*LocationData, error) {tx,// Batch insert into database using the provided transactiondatabase DB,tx Mutator,// 5. Batch insert using the provided transaction:// ImportCluster imports all WAV files from a folder into a cluster.// The caller must provide an open transaction via tx; this function does NOT// commit or rollback — the caller owns the transaction lifecycle.
const (StageScan ImportStage = "scan" // directory scanningStageHash ImportStage = "hash" // hash computationStageParse ImportStage = "parse" // WAV header / filename parsingStageProcess ImportStage = "process" // file processingStageValidation ImportStage = "validation" // validation checksStageInsert ImportStage = "insert" // database insertionStageImport ImportStage = "import" // database import (segment pipeline))// FileImportError records errors encountered during file processingtype FileImportError struct {FileName string `json:"file_name"`Error string `json:"error"`Stage ImportStage `json:"stage"`}
// DB is an interface satisfied by both *sql.DB and *sql.Tx.// Used throughout utils for database queries that must work with either.type DB interface {Query(query string, args ...any) (*sql.Rows, error)QueryRow(query string, args ...any) *sql.RowExec(query string, args ...any) (sql.Result, error)}// CheckDuplicateHash checks if a file with the given XXH64 hash already exists.// Returns the existing file ID if found, or empty string if no duplicate.// Works with both *sql.DB, *sql.Tx, and *db.LoggedTx.func CheckDuplicateHash(q interface {QueryRow(query string, args ...any) *sql.Row}, hash string) (existingID string, isDuplicate bool, err error) {err = q.QueryRow("SELECT id FROM file WHERE xxh64_hash = ? AND active = true",hash,).Scan(&existingID)if err == nil {return existingID, true, nil}if err == sql.ErrNoRows {return "", false, nil}return "", false, fmt.Errorf("duplicate check failed: %w", err)}
package impimport ("database/sql""slices""strings""testing"_ "github.com/duckdb/duckdb-go/v2")// setupMappingTestDB creates an in-memory DB with schema + test species/calltypes.// Species: Kiwi (sp_kiwi000000), Roroa (sp_roroa00000)// Calltypes: Kiwi/song (ct_kiwi000001), Kiwi/duet (ct_kiwi000002), Roroa/brrr (ct_roroa00001)func setupMappingTestDB(t *testing.T) *sql.DB {t.Helper()db, err := sql.Open("duckdb", ":memory:")if err != nil {t.Fatalf("open: %v", err)}// Create minimal tables needed by mapping validation queriesmustExecMapping(t, db, `CREATE TABLE species (id VARCHAR(12) PRIMARY KEY,label VARCHAR(100) UNIQUE NOT NULL,active BOOLEAN DEFAULT TRUE)`)mustExecMapping(t, db, `CREATE TABLE call_type (id VARCHAR(12) PRIMARY KEY,species_id VARCHAR(12) NOT NULL,label VARCHAR(100) NOT NULL,active BOOLEAN DEFAULT TRUE)`)// Insert test speciesmustExecMapping(t, db, "INSERT INTO species (id, label, active) VALUES ('sp_kiwi000000', 'Kiwi', true)")mustExecMapping(t, db, "INSERT INTO species (id, label, active) VALUES ('sp_roroa00000', 'Roroa', true)")mustExecMapping(t, db, "INSERT INTO species (id, label, active) VALUES ('sp_tui0000000', 'Tui', false)") // inactive// Insert test calltypesmustExecMapping(t, db, "INSERT INTO call_type (id, species_id, label, active) VALUES ('ct_kiwi000001', 'sp_kiwi000000', 'song', true)")mustExecMapping(t, db, "INSERT INTO call_type (id, species_id, label, active) VALUES ('ct_kiwi000002', 'sp_kiwi000000', 'duet', true)")mustExecMapping(t, db, "INSERT INTO call_type (id, species_id, label, active) VALUES ('ct_roroa00001', 'sp_roroa00000', 'brrr', true)")return db}// assertStringSlice checks that got matches want (order-insensitive).func assertStringSlice(t *testing.T, label string, got, want []string) {t.Helper()if len(want) == 0 && len(got) == 0 {return}if len(got) != len(want) {t.Errorf("%s: got %v, want %v", label, got, want)return}for _, w := range want {found := slices.Contains(got, w)if !found {t.Errorf("%s: missing %q in %v", label, w, got)}}}func mustExecMapping(t *testing.T, db *sql.DB, query string) {t.Helper()if _, err := db.Exec(query); err != nil {t.Fatalf("exec: %v", err)}}// --- collectMappedLabels ---func TestCollectMappedLabels(t *testing.T) {mapping := MappingFile{"GSK": {Species: "Roroa", Calltypes: map[string]string{"brrr": "brrr"}},"K-M": {Species: "Kiwi"},"noise": {Species: MappingNegative},}dataCalltypes := map[string]map[string]bool{"GSK": {"brrr": true},"K-M": {"song": true, "duet": true},}speciesSet, calltypes := collectMappedLabels(mapping, dataCalltypes)if !speciesSet["Roroa"] || !speciesSet["Kiwi"] {t.Errorf("speciesSet=%v, want Kiwi and Roroa", speciesSet)}if speciesSet[MappingNegative] {t.Error("sentinel species should be excluded")}// Roroa has explicit calltype mappingif calltypes["Roroa"]["brrr"] != "brrr" {t.Errorf("Roroa calltypes=%v", calltypes["Roroa"])}// Kiwi has no calltype mapping, so data calltypes pass throughif calltypes["Kiwi"]["song"] != "song" || calltypes["Kiwi"]["duet"] != "duet" {t.Errorf("Kiwi calltypes=%v", calltypes["Kiwi"])}}// --- collectUnmappedCalltypes ---func TestCollectUnmappedCalltypes(t *testing.T) {mapping := MappingFile{"GSK": {Species: "Roroa", Calltypes: map[string]string{"Male": "brrr"}},}dataCalltypes := map[string]map[string]bool{"GSK": {"Male": true, "Female": true},}mappedCalltypes := make(map[string]map[string]string)collectUnmappedCalltypes(mapping, dataCalltypes, mappedCalltypes)// Male maps to brrrif mappedCalltypes["Roroa"]["brrr"] != "Male" {t.Errorf("mapped Male->brrr: %v", mappedCalltypes["Roroa"])}// Female has no mapping entry, passes through as-isif mappedCalltypes["Roroa"]["Female"] != "Female" {t.Errorf("unmapped Female passthrough: %v", mappedCalltypes["Roroa"])}}// --- validateMappedSpecies ---func TestValidateMappedSpecies(t *testing.T) {db := setupMappingTestDB(t)defer db.Close()t.Run("all species exist in DB", func(t *testing.T) {result := &MappingValidationResult{MissingDBSpecies: make([]string, 0)}err := validateMappedSpecies(db, map[string]bool{"Kiwi": true, "Roroa": true}, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingDBSpecies) > 0 {t.Errorf("missing species: %v", result.MissingDBSpecies)}})t.Run("species not in DB reported", func(t *testing.T) {result := &MappingValidationResult{MissingDBSpecies: make([]string, 0)}err := validateMappedSpecies(db, map[string]bool{"Phantom": true}, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingDBSpecies) != 1 || result.MissingDBSpecies[0] != "Phantom" {t.Errorf("expected [Phantom], got %v", result.MissingDBSpecies)}})t.Run("inactive species not found", func(t *testing.T) {result := &MappingValidationResult{MissingDBSpecies: make([]string, 0)}err := validateMappedSpecies(db, map[string]bool{"Tui": true}, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingDBSpecies) != 1 {t.Errorf("inactive species should be missing, got %v", result.MissingDBSpecies)}})t.Run("empty set is no-op", func(t *testing.T) {result := &MappingValidationResult{MissingDBSpecies: make([]string, 0)}err := validateMappedSpecies(db, map[string]bool{}, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingDBSpecies) != 0 {t.Errorf("expected no missing, got %v", result.MissingDBSpecies)}})}// --- validateMappedCalltypes ---func TestValidateMappedCalltypes(t *testing.T) {db := setupMappingTestDB(t)defer db.Close()t.Run("all calltypes exist", func(t *testing.T) {result := &MappingValidationResult{MissingCalltypes: make(map[string]string)}ctMap := map[string]map[string]string{"Kiwi": {"song": "data-song", "duet": "data-duet"},}err := validateMappedCalltypes(db, ctMap, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingCalltypes) > 0 {t.Errorf("missing calltypes: %v", result.MissingCalltypes)}})t.Run("missing calltype reported", func(t *testing.T) {result := &MappingValidationResult{MissingCalltypes: make(map[string]string)}ctMap := map[string]map[string]string{"Kiwi": {"phantom": "data-phantom"},}err := validateMappedCalltypes(db, ctMap, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingCalltypes) != 1 {t.Errorf("expected 1 missing, got %v", result.MissingCalltypes)}})t.Run("empty calltype map skips species", func(t *testing.T) {result := &MappingValidationResult{MissingCalltypes: make(map[string]string)}ctMap := map[string]map[string]string{"Kiwi": {},}err := validateMappedCalltypes(db, ctMap, result)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(result.MissingCalltypes) != 0 {t.Errorf("expected none missing, got %v", result.MissingCalltypes)}})}// --- ValidateMappingAgainstDB (integration of all above) ---func TestValidateMappingAgainstDB(t *testing.T) {db := setupMappingTestDB(t)defer db.Close()tests := []struct {name stringmapping MappingFiledataSpecies map[string]booldataCT map[string]map[string]boolhasErrors boolmissingSpecies []stringmissingDBSpecies []stringmissingCalltypeCT string // substring expected in MissingCalltypes keyerrorContains string // substring expected in result.Error()}{{name: "valid mapping - no errors",mapping: MappingFile{"GSK": {Species: "Roroa", Calltypes: map[string]string{"brrr": "brrr"}},"K-M": {Species: "Kiwi"},},dataSpecies: map[string]bool{"GSK": true, "K-M": true},dataCT: map[string]map[string]bool{"GSK": {"brrr": true}, "K-M": {"song": true}},},{name: "missing species in mapping",mapping: MappingFile{"GSK": {Species: "Roroa"}},dataSpecies: map[string]bool{"GSK": true, "K-M": true},hasErrors: true,missingSpecies: []string{"K-M"},},{name: "mapped species not in DB",mapping: MappingFile{"PHANTOM": {Species: "Phantom"}},dataSpecies: map[string]bool{"PHANTOM": true},hasErrors: true,missingDBSpecies: []string{"Phantom"},},{name: "sentinel species excluded from DB check",mapping: MappingFile{"noise": {Species: MappingNegative}, "ignore": {Species: MappingIgnore}},dataSpecies: map[string]bool{"noise": true, "ignore": true},},{name: "missing calltype in DB",mapping: MappingFile{"K-M": {Species: "Kiwi", Calltypes: map[string]string{"song": "song", "phantom": "phantom"}},},dataSpecies: map[string]bool{"K-M": true},dataCT: map[string]map[string]bool{"K-M": {"song": true, "phantom": true}},hasErrors: true,missingCalltypeCT: "phantom",errorContains: "phantom",},}for _, tt := range tests {t.Run(tt.name, func(t *testing.T) {result, err := ValidateMappingAgainstDB(db, tt.mapping, tt.dataSpecies, tt.dataCT)if err != nil {t.Fatalf("unexpected error: %v", err)}if result.HasErrors() != tt.hasErrors {t.Errorf("HasErrors()=%v, want %v", result.HasErrors(), tt.hasErrors)}assertStringSlice(t, "MissingSpecies", result.MissingSpecies, tt.missingSpecies)assertStringSlice(t, "MissingDBSpecies", result.MissingDBSpecies, tt.missingDBSpecies)if tt.missingCalltypeCT != "" && len(result.MissingCalltypes) == 0 {t.Error("expected missing calltype")}if tt.errorContains != "" && !strings.Contains(result.Error(), tt.errorContains) {t.Errorf("error should contain %q: %s", tt.errorContains, result.Error())}})}}
package impimport ("context""database/sql""fmt""sort""skraak/mapping""skraak/utils")// Re-export mapping types for convenience within this package.// External callers should use the mapping package directly.type (MappingFile = mapping.FileSpeciesMapping = mapping.SpeciesMappingMappingValidationResult = mapping.ValidationResult)const (MappingNegative = mapping.NegativeMappingIgnore = mapping.Ignore)var (LoadMappingFile = mapping.Load)type (MappingKind = mapping.Kind)const (MappingReal = mapping.RealMappingNeg = mapping.NegMappingIgn = mapping.Ign)// MappingQuerier is the read-only interface needed for mapping validation.// Satisfied by *sql.DB, *sql.Tx, and *db.LoggedTx.type MappingQuerier interface {QueryContext(ctx context.Context, query string, args ...any) (*sql.Rows, error)QueryRowContext(ctx context.Context, query string, args ...any) *sql.Row}// ValidateMappingAgainstDB validates that all mapped species and calltypes exist in the database// Also validates that the mapping covers all species/calltypes found in .data filesfunc ValidateMappingAgainstDB(queryer MappingQuerier,m mapping.File,dataSpeciesSet map[string]bool,dataCalltypes map[string]map[string]bool, // species -> calltype -> true) (mapping.ValidationResult, error) {result := mapping.ValidationResult{MissingSpecies: make([]string, 0),MissingDBSpecies: make([]string, 0),MissingCalltypes: make(map[string]string),}// Check all .data species are in mappingfor species := range dataSpeciesSet {if _, exists := m[species]; !exists {result.MissingSpecies = append(result.MissingSpecies, species)}}sort.Strings(result.MissingSpecies)// Collect all mapped species and calltypesmappedSpeciesSet, mappedCalltypes := collectMappedLabels(m, dataCalltypes)// Validate species exist in DBif err := validateMappedSpecies(queryer, mappedSpeciesSet, &result); err != nil {return result, err}// Validate calltypes exist in DBif err := validateMappedCalltypes(queryer, mappedCalltypes, &result); err != nil {return result, err}return result, nil}// collectUnmappedCalltypes adds calltypes from .data files that have no explicit// mapping entry (dataCT == dbCT by convention) to the mappedCalltypes set.func collectUnmappedCalltypes(m mapping.File, dataCalltypes map[string]map[string]bool, mappedCalltypes map[string]map[string]string) {for dataSpecies, ctSet := range dataCalltypes {sm, exists := m[dataSpecies]if !exists {continue}dbSpecies := sm.Speciesfor dataCT := range ctSet {dbCT := dataCTif sm.Calltypes != nil {if mapped, ok := sm.Calltypes[dataCT]; ok {dbCT = mapped}}if mappedCalltypes[dbSpecies] == nil {mappedCalltypes[dbSpecies] = make(map[string]string)}mappedCalltypes[dbSpecies][dbCT] = dataCT}}}func collectMappedLabels(m mapping.File, dataCalltypes map[string]map[string]bool) (map[string]bool, map[string]map[string]string) {mappedSpeciesSet := make(map[string]bool)mappedCalltypes := make(map[string]map[string]string)for _, sm := range m {if sm.Species == mapping.Negative || sm.Species == mapping.Ignore {continue}mappedSpeciesSet[sm.Species] = trueif len(sm.Calltypes) > 0 {if mappedCalltypes[sm.Species] == nil {mappedCalltypes[sm.Species] = make(map[string]string)}for dataCT, dbCT := range sm.Calltypes {mappedCalltypes[sm.Species][dbCT] = dataCT}}}collectUnmappedCalltypes(m, dataCalltypes, mappedCalltypes)return mappedSpeciesSet, mappedCalltypes}// validateMappedSpecies checks that all mapped species exist in the databasefunc validateMappedSpecies(queryer MappingQuerier, mappedSpeciesSet map[string]bool, result *mapping.ValidationResult) error {speciesLabels := make([]string, 0, len(mappedSpeciesSet))for s := range mappedSpeciesSet {speciesLabels = append(speciesLabels, s)}sort.Strings(speciesLabels)if len(speciesLabels) == 0 {return nil}query := `SELECT label FROM species WHERE label IN (` + utils.Placeholders(len(speciesLabels)) + `) AND active = true`args := make([]any, len(speciesLabels))for i, s := range speciesLabels {args[i] = s}rows, err := queryer.QueryContext(context.Background(), query, args...)if err != nil {return fmt.Errorf("failed to query species: %w", err)}defer rows.Close()foundSpecies := make(map[string]bool)for rows.Next() {var label stringif err := rows.Scan(&label); err == nil {foundSpecies[label] = true}}for _, s := range speciesLabels {if !foundSpecies[s] {result.MissingDBSpecies = append(result.MissingDBSpecies, s)}}return nil}// validateMappedCalltypes checks that all mapped calltypes exist in the databasefunc validateMappedCalltypes(queryer MappingQuerier, mappedCalltypes map[string]map[string]string, result *mapping.ValidationResult) error {for dbSpecies, ctMap := range mappedCalltypes {if len(ctMap) == 0 {continue}ctLabels := make([]string, 0, len(ctMap))for dbCT := range ctMap {ctLabels = append(ctLabels, dbCT)}sort.Strings(ctLabels)query := `SELECT ct.labelFROM call_type ctJOIN species s ON ct.species_id = s.idWHERE s.label = ? AND ct.label IN (` + utils.Placeholders(len(ctLabels)) + `) AND ct.active = true`args := make([]any, 1+len(ctLabels))args[0] = dbSpeciesfor i, ct := range ctLabels {args[1+i] = ct}rows, err := queryer.QueryContext(context.Background(), query, args...)if err != nil {return fmt.Errorf("failed to query calltypes for species %s: %w", dbSpecies, err)}defer rows.Close()foundCT := make(map[string]bool)for rows.Next() {var label stringif err := rows.Scan(&label); err == nil {foundCT[label] = true}}for dbCT, dataCT := range ctMap {if !foundCT[dbCT] {key := fmt.Sprintf("%s/%s", dbSpecies, dataCT)value := fmt.Sprintf("%s/%s", dbSpecies, dbCT)result.MissingCalltypes[key] = value}}}return nil}
TotalFiles int `json:"total_files"`ImportedFiles int `json:"imported_files"`SkippedFiles int `json:"skipped_files"` // DuplicatesFailedFiles int `json:"failed_files"`TotalDuration float64 `json:"total_duration_seconds"`ProcessingTime string `json:"processing_time"`Errors []utils.FileImportError `json:"errors,omitempty"`
TotalFiles int `json:"total_files"`ImportedFiles int `json:"imported_files"`SkippedFiles int `json:"skipped_files"` // DuplicatesFailedFiles int `json:"failed_files"`TotalDuration float64 `json:"total_duration_seconds"`ProcessingTime string `json:"processing_time"`Errors []FileImportError `json:"errors,omitempty"`
File string `json:"file,omitempty"`Stage utils.ImportStage `json:"stage"`Message string `json:"message"`
File string `json:"file,omitempty"`Stage ImportStage `json:"stage"`Message string `json:"message"`
func loadCalltypeIDs(q db.Querier, mapping utils.MappingFile, uniqueCalltypes map[string]map[string]bool) (map[string]map[string]string, error) {
func loadCalltypeIDs(q db.Querier, mapping MappingFile, uniqueCalltypes map[string]map[string]bool) (map[string]map[string]string, error) {
Summary ImportSummary `json:"summary"`FileIDs []string `json:"file_ids"`Errors []utils.FileImportError `json:"errors,omitempty"`
Summary ImportSummary `json:"summary"`FileIDs []string `json:"file_ids"`Errors []FileImportError `json:"errors,omitempty"`
package impimport ("context""database/sql""fmt")// ImportStage identifies the pipeline stage where an error occurred.type ImportStage stringconst (StageScan ImportStage = "scan" // directory scanningStageHash ImportStage = "hash" // hash computationStageParse ImportStage = "parse" // WAV header / filename parsingStageProcess ImportStage = "process" // file processingStageValidation ImportStage = "validation" // validation checksStageInsert ImportStage = "insert" // database insertionStageImport ImportStage = "import" // database import (segment pipeline))// FileImportError records errors encountered during file processingtype FileImportError struct {FileName string `json:"file_name"`Error string `json:"error"`Stage ImportStage `json:"stage"`}// CheckDuplicateHash checks if a file with the given XXH64 hash already exists.// Returns the existing file ID if found, or empty string if no duplicate.// Works with both *sql.DB, *sql.Tx, and *db.LoggedTx.func CheckDuplicateHash(q interface {QueryRowContext(ctx context.Context, query string, args ...any) *sql.Row}, hash string) (existingID string, isDuplicate bool, err error) {err = q.QueryRowContext(context.Background(),"SELECT id FROM file WHERE xxh64_hash = ? AND active = true",hash,).Scan(&existingID)if err == nil {return existingID, true, nil}if err == sql.ErrNoRows {return "", false, nil}return "", false, fmt.Errorf("duplicate check failed: %w", err)}
package impimport ("context""database/sql""fmt""os""path/filepath""time""skraak/utils")// Mutator represents a transaction-like object that supports both reads and writes.// Both *sql.Tx and *db.LoggedTx satisfy this interface.// Uses Context variants exclusively so all DB-facing interfaces compose as// compatible subsets of *sql.DB / *sql.Tx.type Mutator interface {ExecContext(ctx context.Context, query string, args ...any) (sql.Result, error)QueryRowContext(ctx context.Context, query string, args ...any) *sql.Row}// Reader is a read-only interface for database queries.// Both *sql.DB and *db.LoggedTx satisfy this interface.type Reader interface {QueryRowContext(ctx context.Context, query string, args ...any) *sql.Row}// ClusterImportInput defines parameters for importing one clustertype ClusterImportInput struct {FolderPath string // Absolute path to folder with WAV filesDatasetID string // 12-char dataset IDLocationID string // 12-char location IDClusterID string // 12-char cluster IDRecursive bool // Scan subfolders?}// ClusterImportOutput provides results and statisticstype ClusterImportOutput struct {TotalFiles intImportedFiles intSkippedFiles int // DuplicatesFailedFiles intAudioMothFiles intTotalDuration float64ProcessingTime stringErrors []FileImportError}// LocationData holds location information needed for processingtype LocationData struct {Latitude float64Longitude float64TimezoneID string}// ImportCluster imports all WAV files from a folder into a cluster.// The caller must provide an open transaction via tx; this function does NOT// commit or rollback — the caller owns the transaction lifecycle.//// This is the canonical cluster import logic used by both:// - import_files.go (single cluster)// - bulk_file_import.go (multiple clusters)//// Steps:// 1. Validate folder exists// 2. Get location metadata (lat/lon/timezone) from database// 3. Scan folder for WAV files (recursive or not)// 4. Batch process all files:// - Parse WAV headers (includes file mod time)// - Batch parse filename timestamps (variance-based)// - Resolve timestamps (AudioMoth → filename → file mod time)// - Calculate hashes// - Calculate astronomical data// 5. Batch insert using the provided transaction:// - Check duplicates// - INSERT INTO file// - INSERT INTO file_dataset (ALWAYS)// - INSERT INTO moth_metadata (if AudioMoth)// 6. Return summary statisticsfunc ImportCluster(database Reader,tx Mutator,input ClusterImportInput,) (*ClusterImportOutput, error) {startTime := time.Now()// Validate folder existsinfo, err := os.Stat(input.FolderPath)if err != nil {return nil, fmt.Errorf("folder not accessible: %w", err)}if !info.IsDir() {return nil, fmt.Errorf("path is not a directory: %s", input.FolderPath)}// Get location data for astronomical calculationslocationData, err := GetLocationData(database, input.LocationID)if err != nil {return nil, fmt.Errorf("failed to get location data: %w", err)}// Scan folder for WAV fileswavFiles, err := utils.FindFiles(input.FolderPath, utils.FindFilesOptions{Extension: ".wav",Recursive: input.Recursive,SkipPrefixes: []string{"Clips_"},SkipHidden: true, // Standard to ignore hiddenMinSize: 1, // Must have size > 0})if err != nil {return nil, fmt.Errorf("failed to scan folder: %w", err)}// If no files, return earlyif len(wavFiles) == 0 {return &ClusterImportOutput{TotalFiles: 0,ProcessingTime: time.Since(startTime).String(),Errors: []FileImportError{},}, nil}// Batch process all filesfilesData, processErrors := batchProcessFiles(wavFiles, locationData)// Batch insert into database using the provided transactionimported, skipped, insertErrors, err := insertClusterFiles(tx,filesData,input.DatasetID,input.ClusterID,input.LocationID,)if err != nil {return nil, fmt.Errorf("database insertion failed: %w", err)}// Combine all errorsallErrors := append(processErrors, insertErrors...)// Calculate summary statisticsaudiomothCount := 0totalDuration := 0.0for _, fd := range filesData {if fd.IsAudioMoth {audiomothCount++}totalDuration += fd.Duration}return &ClusterImportOutput{TotalFiles: len(wavFiles),ImportedFiles: imported,SkippedFiles: skipped,FailedFiles: len(allErrors),AudioMothFiles: audiomothCount,TotalDuration: totalDuration,ProcessingTime: time.Since(startTime).String(),Errors: allErrors,}, nil}// GetLocationData retrieves location coordinates and timezonefunc GetLocationData(database Reader, locationID string) (*LocationData, error) {ctx := context.Background()var loc LocationDataerr := database.QueryRowContext(ctx,"SELECT latitude, longitude, timezone_id FROM location WHERE id = ?",locationID,).Scan(&loc.Latitude, &loc.Longitude, &loc.TimezoneID)if err != nil {return nil, fmt.Errorf("failed to query location data: %w", err)}return &loc, nil}// EnsureClusterPath sets the cluster's path field if it's currently empty.// Accepts any type with QueryRowContext and ExecContext (e.g. *sql.DB, *sql.Tx, *db.LoggedTx).func EnsureClusterPath(database Mutator, clusterID, folderPath string) error {ctx := context.Background()// Check if cluster already has a pathvar currentPath sql.NullStringerr := database.QueryRowContext(ctx, "SELECT path FROM cluster WHERE id = ?", clusterID).Scan(¤tPath)if err != nil {return fmt.Errorf("failed to query cluster: %w", err)}// If path is already set, skipif currentPath.Valid && currentPath.String != "" {return nil}// Normalize folder pathnormalizedPath := utils.NormalizeFolderPath(folderPath)// Update cluster with normalized path_, err = database.ExecContext(ctx,"UPDATE cluster SET path = ?, last_modified = now() WHERE id = ?",normalizedPath,clusterID,)if err != nil {return fmt.Errorf("failed to update cluster path: %w", err)}return nil}// wavInfo holds WAV metadata and hash for a single file during batch processingtype wavInfo struct {path stringmetadata *utils.WAVMetadatahash stringerr error}// parseFilenameTimestampsBatch parses filename timestamps and applies timezone offsets.// Returns a map from wavInfos index to adjusted timestamp, and any errors.func parseFilenameTimestampsBatch(wavInfos []wavInfo,filenameIndices []int,filenames []string,timezoneID string,) (map[int]time.Time, []FileImportError) {var errors []FileImportErrorresult := make(map[int]time.Time)filenameTimestamps, err := utils.ParseFilenameTimestamps(filenames)if err != nil {for _, idx := range filenameIndices {errors = append(errors, FileImportError{FileName: filepath.Base(wavInfos[idx].path),Error: fmt.Sprintf("filename timestamp parsing failed: %v", err),Stage: StageParse,})}return result, errors}adjustedTimestamps, err := utils.ApplyTimezoneOffset(filenameTimestamps, timezoneID)if err != nil {for _, idx := range filenameIndices {errors = append(errors, FileImportError{FileName: filepath.Base(wavInfos[idx].path),Error: fmt.Sprintf("timezone offset failed: %v", err),Stage: StageParse,})}return result, errors}for j, idx := range filenameIndices {result[idx] = adjustedTimestamps[j]}return result, errors}// resolveFileData resolves timestamp and calculates astronomical data for a single WAV file.func resolveFileData(info wavInfo, preParsedTime *time.Time, location *LocationData) (*utils.FileProcessingResult, error) {tsResult, err := utils.ResolveTimestamp(info.metadata, info.path, location.TimezoneID, true, preParsedTime)if err != nil {return nil, err}astroData := utils.CalculateAstronomicalData(tsResult.Timestamp.UTC(),info.metadata.Duration,location.Latitude,location.Longitude,)return &utils.FileProcessingResult{FileName: filepath.Base(info.path),Hash: info.hash,Duration: info.metadata.Duration,SampleRate: info.metadata.SampleRate,TimestampLocal: tsResult.Timestamp,IsAudioMoth: tsResult.IsAudioMoth,MothData: tsResult.MothData,AstroData: astroData,}, nil}// batchProcessFiles extracts metadata and calculates hashes for all filesfunc batchProcessFiles(wavFiles []string, location *LocationData) ([]*utils.FileProcessingResult, []FileImportError) {var filesData []*utils.FileProcessingResultvar errors []FileImportError// Step 1: Extract WAV metadata and hash in single passwavInfos := make([]wavInfo, len(wavFiles))for i, path := range wavFiles {metadata, hash, err := utils.ParseWAVHeaderWithHash(path)wavInfos[i] = wavInfo{path: path, metadata: metadata, hash: hash, err: err}}// Step 2: Collect filenames for batch timestamp parsingvar filenamesForParsing []stringvar filenameIndices []intfor i, info := range wavInfos {if info.err != nil {errors = append(errors, FileImportError{FileName: filepath.Base(info.path),Error: info.err.Error(),Stage: StageParse,})continue}if utils.HasTimestampFilename(info.path) {filenamesForParsing = append(filenamesForParsing, filepath.Base(info.path))filenameIndices = append(filenameIndices, i)}}// Step 3: Parse filename timestamps in batch (if any)filenameTimestampMap := make(map[int]time.Time)if len(filenamesForParsing) > 0 {tsMap, tsErrors := parseFilenameTimestampsBatch(wavInfos, filenameIndices, filenamesForParsing, location.TimezoneID)errors = append(errors, tsErrors...)filenameTimestampMap = tsMap}// Step 4: Process each filefor i, info := range wavInfos {if info.err != nil {continue}var preParsedTime *time.Timeif ts, ok := filenameTimestampMap[i]; ok {preParsedTime = &ts}fd, err := resolveFileData(info, preParsedTime, location)if err != nil {errors = append(errors, FileImportError{FileName: filepath.Base(info.path),Error: err.Error(),Stage: StageParse,})continue}filesData = append(filesData, fd)}return filesData, errors}// insertSingleFile inserts one file's data into the database within an existing transaction.// Returns (imported=true, nil) on success, (imported=false, nil) if skipped, or (false, error) on failure.func insertSingleFile(ctx context.Context,tx Mutator,fd *utils.FileProcessingResult,datasetID, clusterID, locationID string,) (bool, error) {// Check for duplicate hash_, isDuplicate, err := CheckDuplicateHash(tx, fd.Hash)if err != nil {return false, fmt.Errorf("duplicate check failed: %w", err)}if isDuplicate {return false, nil // skipped}// Generate file IDfileID, err := utils.GenerateLongID()if err != nil {return false, fmt.Errorf("ID generation failed: %w", err)}// Insert file record_, err = tx.ExecContext(ctx, `INSERT INTO file (id, file_name, xxh64_hash, location_id, timestamp_local,cluster_id, duration, sample_rate, maybe_solar_night, maybe_civil_night,moon_phase, created_at, last_modified, active) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, now(), now(), true)`,fileID, fd.FileName, fd.Hash, locationID,fd.TimestampLocal, clusterID, fd.Duration, fd.SampleRate,fd.AstroData.SolarNight, fd.AstroData.CivilNight, fd.AstroData.MoonPhase,)if err != nil {return false, fmt.Errorf("file insert failed: %w", err)}// Insert file_dataset junction (ALWAYS)_, err = tx.ExecContext(ctx, `INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified)VALUES (?, ?, now(), now())`, fileID, datasetID)if err != nil {return false, fmt.Errorf("file_dataset insert failed: %w", err)}// If AudioMoth, insert moth_metadataif fd.IsAudioMoth && fd.MothData != nil {_, err = tx.ExecContext(ctx, `INSERT INTO moth_metadata (file_id, timestamp, recorder_id, gain, battery_v, temp_c,created_at, last_modified, active) VALUES (?, ?, ?, ?, ?, ?, now(), now(), true)`,fileID,fd.MothData.Timestamp,&fd.MothData.RecorderID,&fd.MothData.Gain,&fd.MothData.BatteryV,&fd.MothData.TempC,)if err != nil {return false, fmt.Errorf("moth_metadata insert failed: %w", err)}}return true, nil}// insertClusterFiles inserts all file data into database using the provided transaction.// The caller is responsible for committing or rolling back the transaction.func insertClusterFiles(tx Mutator,filesData []*utils.FileProcessingResult,datasetID, clusterID, locationID string,) (imported, skipped int, errors []FileImportError, err error) {ctx := context.Background()for _, fd := range filesData {wasImported, insertErr := insertSingleFile(ctx, tx, fd, datasetID, clusterID, locationID)if insertErr != nil {errors = append(errors, FileImportError{FileName: fd.FileName,Error: insertErr.Error(),Stage: StageInsert,})continue}if wasImported {imported++} else {skipped++}}return imported, skipped, errors, nil}
package impimport ("database/sql""testing"_ "github.com/duckdb/duckdb-go/v2")func TestCheckDuplicateHash_NoRows(t *testing.T) {db := openTestDB(t)defer db.Close()// No rows exist — should return not-duplicateid, dup, err := CheckDuplicateHash(db, "abcdef0123456789")if err != nil {t.Fatalf("unexpected error: %v", err)}if dup {t.Error("expected isDuplicate=false when no rows")}if id != "" {t.Errorf("expected empty id, got %q", id)}}func TestCheckDuplicateHash_FoundDuplicate(t *testing.T) {db := openTestDB(t)defer db.Close()// Insert a file with known hashhash := "deadbeef12345678"fileID := "test_file_id_123"_, err := db.Exec(`INSERT INTO file (id, path, dataset_id, xxh64_hash, active)VALUES (?, '/test/file.wav', 'ds1', ?, true)`, fileID, hash)if err != nil {t.Fatalf("insert: %v", err)}id, dup, err := CheckDuplicateHash(db, hash)if err != nil {t.Fatalf("unexpected error: %v", err)}if !dup {t.Error("expected isDuplicate=true")}if id != fileID {t.Errorf("expected id=%q, got %q", fileID, id)}}func TestCheckDuplicateHash_InactiveNotDuplicate(t *testing.T) {db := openTestDB(t)defer db.Close()// Insert an INACTIVE file with known hashhash := "cafebeef12345678"fileID := "inactive_file_id"_, err := db.Exec(`INSERT INTO file (id, path, dataset_id, xxh64_hash, active)VALUES (?, '/test/old.wav', 'ds1', ?, false)`, fileID, hash)if err != nil {t.Fatalf("insert: %v", err)}// Inactive files should NOT be considered duplicatesid, dup, err := CheckDuplicateHash(db, hash)if err != nil {t.Fatalf("unexpected error: %v", err)}if dup {t.Error("expected isDuplicate=false for inactive file")}if id != "" {t.Errorf("expected empty id, got %q", id)}}func TestCheckDuplicateHash_DifferentHashNoDuplicate(t *testing.T) {db := openTestDB(t)defer db.Close()// Insert file with hash A_, err := db.Exec(`INSERT INTO file (id, path, dataset_id, xxh64_hash, active)VALUES ('id1', '/test/a.wav', 'ds1', 'hash_aaaa', true)`)if err != nil {t.Fatalf("insert: %v", err)}// Query for hash B — no duplicateid, dup, err := CheckDuplicateHash(db, "hash_bbbb")if err != nil {t.Fatalf("unexpected error: %v", err)}if dup {t.Error("expected isDuplicate=false for different hash")}if id != "" {t.Errorf("expected empty id, got %q", id)}}// openTestDB creates a DuckDB in-memory database with the minimal schema// needed for the file table.func openTestDB(t *testing.T) *sql.DB {t.Helper()db, err := sql.Open("duckdb", "")if err != nil {t.Fatalf("open duckdb: %v", err)}_, err = db.Exec(`CREATE TABLE file (id VARCHAR PRIMARY KEY,path VARCHAR,dataset_id VARCHAR,xxh64_hash VARCHAR,active BOOLEAN DEFAULT true)`)if err != nil {db.Close()t.Fatalf("create table: %v", err)}return db}
fileRows, err := processClipLabelsFile(pf.path, pf.df, ctx.mapping, ctx.classIdx, ctx.classes, input, ctx.finalClipMode, ctx.cwd, ctx.folderAbs, &out)
fileRows, err := processClipLabelsFile(pf.path, pf.df, ctx.mf, ctx.classIdx, ctx.classes, input, ctx.finalClipMode, ctx.cwd, ctx.folderAbs, &out)
func resolveLabel(lbl *utils.Label, seg *utils.Segment, filter string, mapping utils.MappingFile, classIdx map[string]int) (resolvedSeg, bool, bool) {
func resolveLabel(lbl *utils.Label, seg *utils.Segment, filter string, mf mapping.File, classIdx map[string]int) (resolvedSeg, bool, bool) {
package mappingimport ("os""path/filepath""testing")func TestLoad(t *testing.T) {t.Run("valid mapping", func(t *testing.T) {content := `{"GSK": {"species": "Roroa", "calltypes": {"Male": "Male - Solo"}},"Don't Know": {"species": "Don't Know"}}`path := createTempFile(t, content)defer os.Remove(path)mapping, err := Load(path)if err != nil {t.Fatalf("expected no error, got: %v", err)}if len(mapping) != 2 {t.Errorf("expected 2 entries, got %d", len(mapping))}if mapping["GSK"].Species != "Roroa" {t.Errorf("expected GSK -> Roroa, got %s", mapping["GSK"].Species)}if mapping["GSK"].Calltypes["Male"] != "Male - Solo" {t.Errorf("expected GSK Male -> Male - Solo, got %s", mapping["GSK"].Calltypes["Male"])}})t.Run("invalid JSON", func(t *testing.T) {content := `{invalid json}`path := createTempFile(t, content)defer os.Remove(path)_, err := Load(path)if err == nil {t.Fatal("expected error for invalid JSON")}})t.Run("empty file", func(t *testing.T) {content := `{}`path := createTempFile(t, content)defer os.Remove(path)_, err := Load(path)if err == nil {t.Fatal("expected error for empty mapping")}})t.Run("missing species field", func(t *testing.T) {content := `{"GSK": {"calltypes": {"Male": "Male - Solo"}}}`path := createTempFile(t, content)defer os.Remove(path)_, err := Load(path)if err == nil {t.Fatal("expected error for missing species field")}})t.Run("empty species field", func(t *testing.T) {content := `{"GSK": {"species": ""}}`path := createTempFile(t, content)defer os.Remove(path)_, err := Load(path)if err == nil {t.Fatal("expected error for empty species field")}})t.Run("nonexistent file", func(t *testing.T) {_, err := Load("/nonexistent/path/mapping.json")if err == nil {t.Fatal("expected error for nonexistent file")}})}func TestGetDBSpecies(t *testing.T) {mapping := File{"GSK": {Species: "Roroa"},"K-M": {Species: "Kiwi"},}t.Run("found", func(t *testing.T) {species, ok := mapping.GetDBSpecies("GSK")if !ok {t.Fatal("expected to find GSK")}if species != "Roroa" {t.Errorf("expected Roroa, got %s", species)}})t.Run("not found", func(t *testing.T) {_, ok := mapping.GetDBSpecies("UNKNOWN")if ok {t.Fatal("expected not to find UNKNOWN")}})}func TestGetDBCalltype(t *testing.T) {mapping := File{"GSK": {Species: "Roroa",Calltypes: map[string]string{"Male": "Male - Solo","Female": "Female - Solo",},},"K-M": {Species: "Kiwi"}, // no calltype mapping}t.Run("with mapping", func(t *testing.T) {ct := mapping.GetDBCalltype("GSK", "Male")if ct != "Male - Solo" {t.Errorf("expected 'Male - Solo', got %s", ct)}})t.Run("without mapping - passthrough", func(t *testing.T) {ct := mapping.GetDBCalltype("GSK", "Unknown")if ct != "Unknown" {t.Errorf("expected passthrough 'Unknown', got %s", ct)}})t.Run("species not in mapping - passthrough", func(t *testing.T) {ct := mapping.GetDBCalltype("UNKNOWN", "Male")if ct != "Male" {t.Errorf("expected passthrough 'Male', got %s", ct)}})t.Run("species without calltypes - passthrough", func(t *testing.T) {ct := mapping.GetDBCalltype("K-M", "Male")if ct != "Male" {t.Errorf("expected passthrough 'Male', got %s", ct)}})}func TestValidationResult(t *testing.T) {t.Run("HasErrors - no errors", func(t *testing.T) {r := ValidationResult{}if r.HasErrors() {t.Error("expected no errors")}})t.Run("HasErrors - missing species", func(t *testing.T) {r := ValidationResult{MissingSpecies: []string{"UNKNOWN"}}if !r.HasErrors() {t.Error("expected errors")}})t.Run("HasErrors - missing DB species", func(t *testing.T) {r := ValidationResult{MissingDBSpecies: []string{"Phantom"}}if !r.HasErrors() {t.Error("expected errors")}})t.Run("HasErrors - missing calltypes", func(t *testing.T) {r := ValidationResult{MissingCalltypes: map[string]string{"GSK/Male": "Roroa/Male - Solo"}}if !r.HasErrors() {t.Error("expected errors")}})t.Run("Error - all error types", func(t *testing.T) {r := ValidationResult{MissingSpecies: []string{"UNKNOWN"},MissingDBSpecies: []string{"Phantom"},MissingCalltypes: map[string]string{"GSK/Male": "Roroa/Male - Solo"},}errStr := r.Error()if errStr == "" {t.Error("expected non-empty error string")}// Check all parts are presentif !containsSubstring(errStr, "UNKNOWN") {t.Error("error string should contain MISSING species")}if !containsSubstring(errStr, "Phantom") {t.Error("error string should contain missing DB species")}if !containsSubstring(errStr, "GSK/Male") {t.Error("error string should contain missing calltype")}})}// Helper functionsfunc createTempFile(t *testing.T, content string) string {t.Helper()tmpDir := t.TempDir()path := filepath.Join(tmpDir, "mapping.json")if err := os.WriteFile(path, []byte(content), 0644); err != nil {t.Fatalf("failed to create temp file: %v", err)}return path}func containsSubstring(s, substr string) bool {return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsSubstringHelper(s, substr))}func containsSubstringHelper(s, substr string) bool {for i := 0; i <= len(s)-len(substr); i++ {if s[i:i+len(substr)] == substr {return true}}return false}func TestMappingClassify(t *testing.T) {m := File{"noise": {Species: Negative},"ignore": {Species: Ignore},"kiwi": {Species: "Kiwi"},}c, k, ok := m.Classify("noise")if !ok || k != Neg || c != "" {t.Error("failed classify negative")}c, k, ok = m.Classify("ignore")if !ok || k != Ign || c != "" {t.Error("failed classify ignore")}c, k, ok = m.Classify("kiwi")if !ok || k != Real || c != "Kiwi" {t.Error("failed classify real")}_, _, ok = m.Classify("missing")if ok {t.Error("expected missing to be not ok")}}func TestMappingValidateCoversSpecies(t *testing.T) {m := File{"kiwi": {Species: "Kiwi"}}missing := m.ValidateCoversSpecies(map[string]bool{"kiwi": true, "tui": true})if len(missing) != 1 || missing[0] != "tui" {t.Errorf("expected [tui], got %v", missing)}}func TestMappingClasses(t *testing.T) {m := File{"noise": {Species: Negative},"kiwi": {Species: "Kiwi"},"tui": {Species: "Tui"},"duplicate": {Species: "Kiwi"},}classes := m.Classes()if len(classes) != 2 || classes[0] != "Kiwi" || classes[1] != "Tui" {t.Errorf("expected [Kiwi, Tui], got %v", classes)}}
// Package mapping provides types and utilities for translating .data file// species/calltype names to database labels via a mapping JSON file.//// This is a leaf package: no imports of skraak/db or skraak/tools.package mappingimport ("encoding/json""fmt""os""sort""strings")// SpeciesMapping maps .data species/calltype names to DB labelstype SpeciesMapping struct {Species string `json:"species"`Calltypes map[string]string `json:"calltypes,omitempty"`}// File represents the complete mapping file structure.// Key is the .data file species name.type File map[string]SpeciesMapping// Load loads and parses a mapping JSON filefunc Load(path string) (File, error) {data, err := os.ReadFile(path)if err != nil {return nil, fmt.Errorf("failed to read mapping file: %w", err)}var m Fileif err := json.Unmarshal(data, &m); err != nil {return nil, fmt.Errorf("failed to parse mapping JSON: %w", err)}// Validate non-emptyif len(m) == 0 {return nil, fmt.Errorf("mapping file is empty")}// Validate each entry has speciesfor dataSpecies, sm := range m {if sm.Species == "" {return nil, fmt.Errorf("mapping entry '%s' has empty species field", dataSpecies)}}return m, nil}// Mapping sentinels: special values for the SpeciesMapping.Species field.//// Negative marks a .data species as "confirmed empty" (Noise-equivalent):// segments matching this name are treated as negative evidence — clips overlapping// them emit an all-zero row when no positive species also overlaps.//// Ignore marks a .data species as "ignored entirely": segments matching// this name neither label clips nor block them.const (Negative = "__NEGATIVE__"Ignore = "__IGNORE__")// Kind describes how a .data species should be treated.type Kind intconst (Real Kind = iotaNegIgn)// Classify returns the canonical class name and kind for a .data species.// ok is false if dataSpecies is not present in the mapping.// For Neg and Ign the canonical string is empty.func (m File) Classify(dataSpecies string) (canonical string, kind Kind, ok bool) {sm, exists := m[dataSpecies]if !exists {return "", Real, false}switch sm.Species {case Negative:return "", Neg, truecase Ignore:return "", Ign, truedefault:return sm.Species, Real, true}}// ValidateCoversSpecies returns the sorted list of species in speciesSet that// are missing from the mapping. Empty result means full coverage.func (m File) ValidateCoversSpecies(speciesSet map[string]bool) []string {missing := make([]string, 0)for s := range speciesSet {if _, exists := m[s]; !exists {missing = append(missing, s)}}sort.Strings(missing)return missing}// Classes returns the sorted unique non-sentinel canonical class names from the mapping.// Used to build the CSV column header for clip-labels.func (m File) Classes() []string {set := make(map[string]bool)for _, sm := range m {switch sm.Species {case Negative, Ignore, "":continuedefault:set[sm.Species] = true}}out := make([]string, 0, len(set))for s := range set {out = append(out, s)}sort.Strings(out)return out}// GetDBSpecies returns the DB species label for a .data speciesfunc (m File) GetDBSpecies(dataSpecies string) (string, bool) {sm, exists := m[dataSpecies]if !exists {return "", false}return sm.Species, true}// GetDBCalltype returns the DB calltype label for a .data species/calltype.// Returns the dataCalltype unchanged if no mapping exists.func (m File) GetDBCalltype(dataSpecies, dataCalltype string) string {sm, exists := m[dataSpecies]if !exists || sm.Calltypes == nil {return dataCalltype}if dbCT, ok := sm.Calltypes[dataCalltype]; ok {return dbCT}return dataCalltype}// ValidationResult contains validation errors for a mappingtype ValidationResult struct {MissingSpecies []string // .data species not in mappingMissingDBSpecies []string // mapped species not in DBMissingCalltypes map[string]string // "dataSpecies/dataCalltype" -> "dbSpecies/dbCalltype"}// HasErrors returns true if any validation errors existfunc (r ValidationResult) HasErrors() bool {return len(r.MissingSpecies) > 0 ||len(r.MissingDBSpecies) > 0 ||len(r.MissingCalltypes) > 0}// Error returns a formatted error messagefunc (r ValidationResult) Error() string {var parts []stringif len(r.MissingSpecies) > 0 {parts = append(parts, fmt.Sprintf("species in .data but not in mapping: [%s]",strings.Join(r.MissingSpecies, ", ")))}if len(r.MissingDBSpecies) > 0 {parts = append(parts, fmt.Sprintf("mapped species not found in DB: [%s]",strings.Join(r.MissingDBSpecies, ", ")))}if len(r.MissingCalltypes) > 0 {var ctErrors []stringfor k, v := range r.MissingCalltypes {ctErrors = append(ctErrors, fmt.Sprintf("%s->%s", k, v))}sort.Strings(ctErrors)parts = append(parts, fmt.Sprintf("calltypes not found in DB: [%s]",strings.Join(ctErrors, ", ")))}return strings.Join(parts, "; ")}
err := q.QueryRow("SELECT dataset_id FROM location WHERE id = ? AND active = true", locationID).Scan(&locationDatasetID)
err := q.QueryRowContext(context.Background(), "SELECT dataset_id FROM location WHERE id = ? AND active = true", locationID).Scan(&locationDatasetID)
// mutator is the local interface that *LoggedTx must satisfy.// Defined here because db/ is the consumer. Uses Context variants exclusively// so all DB-facing interfaces compose as compatible subsets of *sql.DB / *sql.Tx.type mutator interface {ExecContext(ctx context.Context, query string, args ...any) (sql.Result, error)QueryRowContext(ctx context.Context, query string, args ...any) *sql.Row}
If called by `cmd/`, it goes in `tools/`. If called by `tools/`, it goes in `utils/`.
If called by `cmd/`, it goes in `tools/`. If called by `tools/`, it goes in `utils/`.
db/ → Database connection + types + transactionsutils/*.go → Reusable helpers (leaf package, no db import, no `*Input`/`*Output` structs)
db/ → Database connection + types + transactions (defines local mutator interface)mapping/ → Mapping file types + loader (leaf package, shared by tools/calls/ and tools/import/)utils/*.go → Reusable helpers (leaf package, no database/sql import, no `*Input`/`*Output` structs)
**use cli tool `gosymdb agent-context` for exploratory dev tools**
**DB interface convention:** Each consumer defines its own minimal interface (Mutator, Reader, MappingQuerier, etc.) using `*Context` method variants. No god-interfaces in `db/`. Two near-identical interfaces in two packages beats the wrong abstraction.
## [2026-05-19] Refactor utils/: extract DB-aware code, add mapping/ package### Context migration (PR 1)- Standardized all DB-facing interfaces on `*Context` method variants (`QueryContext`, `QueryRowContext`, `ExecContext`)- Updated `db.Querier` interface: dropped non-Context `Query`/`QueryRow` methods- Updated all callers in `db/validation.go`, `tools/import/`, `tools/pattern.go`, `tools/dataset.go`- Updated `utils.DB`, `utils.Mutator`, `utils.CheckDuplicateHash` to use Context variants- Updated `utils/mapping.go` to use `QueryContext`### Phase 1: Move DB-aware import code to tools/import/ (PR 2)- **Moved `utils/cluster_import.go` → `tools/import/cluster_import.go`**: ImportCluster, GetLocationData, EnsureClusterPath, ClusterImportInput/Output, LocationData- **Moved DB-aware parts of `utils/file_import.go` → `tools/import/file_import.go`**: CheckDuplicateHash, FileImportError, ImportStage, Stage* constants- **Deleted `utils/mutator.go`**: Mutator interface replaced by consumer-local interfaces- **Moved `utils/mapping.go` → `tools/import/mapping.go`**: ValidateMappingAgainstDB, MappingQuerier, collectMappedLabels, validateMappedSpecies/Calltypes- **Created `mapping/` leaf package**: MappingFile (now File), SpeciesMapping, Load, sentinel constants (Negative/Ignore), Kind, Classify, Classes, ValidationResult — shared by tools/calls/ and tools/import/- **Kept in `utils/file_import.go`**: ResolveTimestamp, TimestampResult, ProcessSingleFile, FileProcessingResult (leaf-level, no DB)- **Updated `db/tx_logger.go`**: Replaced `utils.Mutator` with local `mutator` interface; removed `skraak/utils` import- **Updated `tools/calls/calls_clip_labels.go`**: Uses `mapping` package instead of `utils.MappingFile`
### Result- `utils/` contains zero `database/sql` imports- `utils/` defines zero DB interfaces (DB, Mutator)- `utils/` contains zero `*Input`/`*Output` structs- `db/` no longer imports `skraak/utils` for Mutator (only for Placeholders and GainLevel aliases)- Each DB consumer defines its own minimal interface (Mutator, Reader, MappingQuerier)
depguard:rules:# Package dependency rules — see CLAUDE.md "Package Organization"# Packages may only import packages below them in the list:# cmd → tools, tools/calls, tools/import, tui, utils, db# tools/calls → utils, db# tools/import → utils, db# tools → utils, db# tui → tools/calls, utils# db → utils# utils → (nothing — leaf package)utils:files:- "**/utils/*.go"deny:- pkg: "skraak/cmd"desc: "utils is the leaf package"- pkg: "skraak/tools"desc: "utils is the leaf package"- pkg: "skraak/tui"desc: "utils is the leaf package"- pkg: "skraak/db"desc: "utils is the leaf package"db:files:- "**/db/*.go"deny:- pkg: "skraak/cmd"desc: "db may only import utils"- pkg: "skraak/tools"desc: "db may only import utils"- pkg: "skraak/tui"desc: "db may only import utils"tui:files:- "**/tui/*.go"deny:- pkg: "skraak/cmd"desc: "tui must not import cmd"- pkg: "skraak/db"desc: "tui must not import db"- pkg: "skraak/tools$"desc: "tui must import from tools/calls, not tools"calls:files:- "**/tools/calls/*.go"deny:- pkg: "skraak/cmd"desc: "tools/calls must not import cmd"- pkg: "skraak/tools"desc: "tools/calls must not import parent package"- pkg: "skraak/tui"desc: "tools/calls must not import tui"import:files:- "**/tools/import/*.go"deny:- pkg: "skraak/cmd"desc: "tools/import must not import cmd"- pkg: "skraak/tools"desc: "tools/import must not import parent package"- pkg: "skraak/tui"desc: "tools/import must not import tui"tools:files:- "**/tools/*.go"deny:- pkg: "skraak/cmd"desc: "tools must not import cmd"- pkg: "skraak/tui"desc: "tools must not import tui"- pkg: "skraak/tools/calls"desc: "tools must not import tools/calls (sub-package)"- pkg: "skraak/tools/import"desc: "tools must not import tools/import (sub-package)"
# depguard:# rules:# # Package dependency rules — see CLAUDE.md "Package Organization"# # Packages may only import packages below them in the list:# # cmd → tools, tools/calls, tools/import, tui, utils, db# # tools/calls → utils, db# # tools/import → utils, db# # tools → utils, db# # tui → tools/calls, utils# # db → utils# # utils → (nothing — leaf package)# utils:# files:# - "**/utils/*.go"# deny:# - pkg: "skraak/cmd"# desc: "utils is the leaf package"# - pkg: "skraak/tools"# desc: "utils is the leaf package"# - pkg: "skraak/tui"# desc: "utils is the leaf package"# - pkg: "skraak/db"# desc: "utils is the leaf package"# db:# files:# - "**/db/*.go"# deny:# - pkg: "skraak/cmd"# desc: "db may only import utils"# - pkg: "skraak/tools"# desc: "db may only import utils"# - pkg: "skraak/tui"# desc: "db may only import utils"# tui:# files:# - "**/tui/*.go"# deny:# - pkg: "skraak/cmd"# desc: "tui must not import cmd"# - pkg: "skraak/db"# desc: "tui must not import db"# - pkg: "skraak/tools$"# desc: "tui must import from tools/calls, not tools"# calls:# files:# - "**/tools/calls/*.go"# deny:# - pkg: "skraak/cmd"# desc: "tools/calls must not import cmd"# - pkg: "skraak/tools"# desc: "tools/calls must not import parent package"# - pkg: "skraak/tui"# desc: "tools/calls must not import tui"# import:# files:# - "**/tools/import/*.go"# deny:# - pkg: "skraak/cmd"# desc: "tools/import must not import cmd"# - pkg: "skraak/tools"# desc: "tools/import must not import parent package"# - pkg: "skraak/tui"# desc: "tools/import must not import tui"# tools:# files:# - "**/tools/*.go"# deny:# - pkg: "skraak/cmd"# desc: "tools must not import cmd"# - pkg: "skraak/tui"# desc: "tools must not import tui"# - pkg: "skraak/tools/calls"# desc: "tools must not import tools/calls (sub-package)"# - pkg: "skraak/tools/import"# desc: "tools must not import tools/import (sub-package)"