first phase of utils refactor, all realted to db interfaces

quietlight
May 18, 2026, 10:21 PM
NQPVZ3PPQG6EPTTAEHXOXXGK27HZCISHZCOZU6K6RKWTRTOHMY6QC

Dependencies

  • [2] HCOBJB6W ck 4
  • [3] M34GDDTW fill calls add, check duration
  • [4] KLUEQ6X5 cyclo 21+
  • [5] 43TMU2JO more tests, glm much better than claude
  • [6] ZKLAOPUR fix event logging
  • [7] AVQ66WO4 tools/ refactor
  • [8] 2HAQZPV3 more refactoring with glm
  • [9] VYNOHQJW tidied up CLAUDE.md
  • [10] JAT3DXOL cyclo over 15
  • [11] TUC452XH new util shared by 3 cmd's needing location
  • [12] 2P27XV3D fixed cyclo over 30
  • [13] Q4JPMGET fixed tests
  • [14] A6MCX2V6 emptied audio/ and moved files into testdata folders
  • [15] TSOJUMHV more tests
  • [16] JZRF7OBJ refactor to get db omports out of utils, but still have failing tests, may need updating
  • [17] VNFPBXF7 moved dep tests to golangci-lint
  • [18] LBWQJEDH minor refactor and more tests for utils/
  • [19] P4CJMBYK added first version of --bandpass flag to calls classify, work to do
  • [20] DS22DKV3 added shell script integration tests.
  • [21] QVIGQOQZ more work on utils/ with glm
  • [22] WKQ7LFTP refactor of utils/
  • [23] LQLC7S3A trying gemini: Inconsistent Standards in @utils/ refactoring
  • [24] 2Y5U3QPU added gosymdb
  • [25] RUVJ3V4N cyclo to 14 now
  • [26] IFLKNMMP ck 1
  • [27] 3DVPQOKB big tidy up of tools/
  • [28] KZKLAINJ run out of space on nest, cleaned out
  • [29] DHIPFBFP added tests
  • [30] VU3KBTQ6 more tests
  • [31] I4CMOMXF dot files
  • [32] ZDZDASRT complexity over 12 now gone, but have some lint fails
  • [33] FCCJNYCV more tests for utils/
  • [34] ZCCQ4P5T reduce complexity to under 14, gocyclo but cilint test still has 3 functions over
  • [*] SJN7IKIV

Change contents

  • file deletion: mutator_test.go (----------)
    [4.1][4.627:666](),[4.666][4.1:1]()
    package utils
    import (
    "database/sql"
    "testing"
    )
    // TestMutator_InterfaceCompliance verifies that *sql.Tx satisfies Mutator.
    // The *db.LoggedTx check is in db/tx_logger_test.go.
    func TestMutator_InterfaceCompliance(t *testing.T) {
    // *sql.Tx must satisfy Mutator (compile-time check is in mutator.go)
    var _ Mutator = (*sql.Tx)(nil)
    }
    // TestMutator_InterfaceMethods verifies the Mutator interface has the expected method set.
    func TestMutator_InterfaceMethods(t *testing.T) {
    // Ensure the interface is non-empty and has the right methods
    var m Mutator = nil // will be nil, but confirms the type exists
    _ = m
    }
  • file deletion: mutator.go (----------)
    [4.1][4.1390:1424](),[4.1424][4.668:668]()
    package utils
    import (
    "context"
    "database/sql"
    )
    // Mutator represents a transaction-like object that supports both reads and writes.
    // Both *sql.Tx and *db.LoggedTx satisfy this interface.
    //
    // Use Mutator instead of *sql.Tx when the caller needs mutation logging.
    // This avoids the import cycle that would result from utils importing db.
    type Mutator interface {
    ExecContext(ctx context.Context, query string, args ...any) (sql.Result, error)
    QueryRow(query string, args ...any) *sql.Row
    }
    // Compile-time interface compliance checks.
    // These ensure that both *sql.Tx and *db.LoggedTx satisfy Mutator.
    // Note: *db.LoggedTx check is in db/tx_logger.go to avoid import cycle.
    var _ Mutator = (*sql.Tx)(nil)
  • file deletion: mapping_db_test.go (----------)
    [4.1][4.9641:9683](),[4.9683][4.1:1]()
    package utils
    import (
    "database/sql"
    "slices"
    "strings"
    "testing"
    _ "github.com/duckdb/duckdb-go/v2"
    )
    // setupMappingTestDB creates an in-memory DB with schema + test species/calltypes.
    // Species: Kiwi (sp_kiwi000000), Roroa (sp_roroa00000)
    // Calltypes: Kiwi/song (ct_kiwi000001), Kiwi/duet (ct_kiwi000002), Roroa/brrr (ct_roroa00001)
    func setupMappingTestDB(t *testing.T) *sql.DB {
    t.Helper()
    db, err := sql.Open("duckdb", ":memory:")
    if err != nil {
    t.Fatalf("open: %v", err)
    }
    // Create minimal tables needed by mapping validation queries
    mustExecMapping(t, db, `CREATE TABLE species (
    id VARCHAR(12) PRIMARY KEY,
    label VARCHAR(100) UNIQUE NOT NULL,
    active BOOLEAN DEFAULT TRUE
    )`)
    mustExecMapping(t, db, `CREATE TABLE call_type (
    id VARCHAR(12) PRIMARY KEY,
    species_id VARCHAR(12) NOT NULL,
    label VARCHAR(100) NOT NULL,
    active BOOLEAN DEFAULT TRUE
    )`)
    // Insert test species
    mustExecMapping(t, db, "INSERT INTO species (id, label, active) VALUES ('sp_kiwi000000', 'Kiwi', true)")
    mustExecMapping(t, db, "INSERT INTO species (id, label, active) VALUES ('sp_roroa00000', 'Roroa', true)")
    mustExecMapping(t, db, "INSERT INTO species (id, label, active) VALUES ('sp_tui0000000', 'Tui', false)") // inactive
    // Insert test calltypes
    mustExecMapping(t, db, "INSERT INTO call_type (id, species_id, label, active) VALUES ('ct_kiwi000001', 'sp_kiwi000000', 'song', true)")
    mustExecMapping(t, db, "INSERT INTO call_type (id, species_id, label, active) VALUES ('ct_kiwi000002', 'sp_kiwi000000', 'duet', true)")
    mustExecMapping(t, db, "INSERT INTO call_type (id, species_id, label, active) VALUES ('ct_roroa00001', 'sp_roroa00000', 'brrr', true)")
    return db
    }
    func mustExecMapping(t *testing.T, db *sql.DB, query string) {
    t.Helper()
    if _, err := db.Exec(query); err != nil {
    t.Fatalf("exec: %v", err)
    }
    }
    // --- collectMappedLabels ---
    func TestCollectMappedLabels(t *testing.T) {
    mapping := MappingFile{
    "GSK": {Species: "Roroa", Calltypes: map[string]string{"brrr": "brrr"}},
    "K-M": {Species: "Kiwi"},
    "noise": {Species: MappingNegative},
    }
    dataCalltypes := map[string]map[string]bool{
    "GSK": {"brrr": true},
    "K-M": {"song": true, "duet": true},
    }
    speciesSet, calltypes := collectMappedLabels(mapping, dataCalltypes)
    if !speciesSet["Roroa"] || !speciesSet["Kiwi"] {
    t.Errorf("speciesSet=%v, want Kiwi and Roroa", speciesSet)
    }
    if speciesSet[MappingNegative] {
    t.Error("sentinel species should be excluded")
    }
    // Roroa has explicit calltype mapping
    if calltypes["Roroa"]["brrr"] != "brrr" {
    t.Errorf("Roroa calltypes=%v", calltypes["Roroa"])
    }
    // Kiwi has no calltype mapping, so data calltypes pass through
    if calltypes["Kiwi"]["song"] != "song" || calltypes["Kiwi"]["duet"] != "duet" {
    t.Errorf("Kiwi calltypes=%v", calltypes["Kiwi"])
    }
    }
    // --- collectUnmappedCalltypes ---
    func TestCollectUnmappedCalltypes(t *testing.T) {
    mapping := MappingFile{
    "GSK": {Species: "Roroa", Calltypes: map[string]string{"Male": "brrr"}},
    }
    dataCalltypes := map[string]map[string]bool{
    "GSK": {"Male": true, "Female": true},
    }
    mappedCalltypes := make(map[string]map[string]string)
    collectUnmappedCalltypes(mapping, dataCalltypes, mappedCalltypes)
    // Male maps to brrr
    if mappedCalltypes["Roroa"]["brrr"] != "Male" {
    t.Errorf("mapped Male->brrr: %v", mappedCalltypes["Roroa"])
    }
    // Female has no mapping entry, passes through as-is
    if mappedCalltypes["Roroa"]["Female"] != "Female" {
    t.Errorf("unmapped Female passthrough: %v", mappedCalltypes["Roroa"])
    }
    }
    // --- validateMappedSpecies ---
    func TestValidateMappedSpecies(t *testing.T) {
    db := setupMappingTestDB(t)
    defer db.Close()
    t.Run("all species exist in DB", func(t *testing.T) {
    result := &MappingValidationResult{MissingDBSpecies: make([]string, 0)}
    err := validateMappedSpecies(db, map[string]bool{"Kiwi": true, "Roroa": true}, result)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if len(result.MissingDBSpecies) > 0 {
    t.Errorf("missing species: %v", result.MissingDBSpecies)
    }
    })
    t.Run("species not in DB reported", func(t *testing.T) {
    result := &MappingValidationResult{MissingDBSpecies: make([]string, 0)}
    err := validateMappedSpecies(db, map[string]bool{"Phantom": true}, result)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if len(result.MissingDBSpecies) != 1 || result.MissingDBSpecies[0] != "Phantom" {
    t.Errorf("expected [Phantom], got %v", result.MissingDBSpecies)
    }
    })
    t.Run("inactive species not found", func(t *testing.T) {
    result := &MappingValidationResult{MissingDBSpecies: make([]string, 0)}
    err := validateMappedSpecies(db, map[string]bool{"Tui": true}, result)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if len(result.MissingDBSpecies) != 1 {
    t.Errorf("inactive species should be missing, got %v", result.MissingDBSpecies)
    }
    })
    t.Run("empty set is no-op", func(t *testing.T) {
    result := &MappingValidationResult{MissingDBSpecies: make([]string, 0)}
    err := validateMappedSpecies(db, map[string]bool{}, result)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if len(result.MissingDBSpecies) != 0 {
    t.Errorf("expected no missing, got %v", result.MissingDBSpecies)
    }
    })
    }
    // --- validateMappedCalltypes ---
    func TestValidateMappedCalltypes(t *testing.T) {
    db := setupMappingTestDB(t)
    defer db.Close()
    t.Run("all calltypes exist", func(t *testing.T) {
    result := &MappingValidationResult{MissingCalltypes: make(map[string]string)}
    ctMap := map[string]map[string]string{
    "Kiwi": {"song": "data-song", "duet": "data-duet"},
    }
    err := validateMappedCalltypes(db, ctMap, result)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if len(result.MissingCalltypes) > 0 {
    t.Errorf("missing calltypes: %v", result.MissingCalltypes)
    }
    })
    t.Run("missing calltype reported", func(t *testing.T) {
    result := &MappingValidationResult{MissingCalltypes: make(map[string]string)}
    ctMap := map[string]map[string]string{
    "Kiwi": {"phantom": "data-phantom"},
    }
    err := validateMappedCalltypes(db, ctMap, result)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if len(result.MissingCalltypes) != 1 {
    t.Errorf("expected 1 missing, got %v", result.MissingCalltypes)
    }
    })
    t.Run("empty calltype map skips species", func(t *testing.T) {
    result := &MappingValidationResult{MissingCalltypes: make(map[string]string)}
    ctMap := map[string]map[string]string{
    "Kiwi": {},
    }
    err := validateMappedCalltypes(db, ctMap, result)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if len(result.MissingCalltypes) != 0 {
    t.Errorf("expected none missing, got %v", result.MissingCalltypes)
    }
    })
    }
    // --- ValidateMappingAgainstDB (integration of all above) ---
    func TestValidateMappingAgainstDB(t *testing.T) {
    db := setupMappingTestDB(t)
    defer db.Close()
    }
    for _, tt := range tests {
    t.Run(tt.name, func(t *testing.T) {
    result, err := ValidateMappingAgainstDB(db, tt.mapping, tt.dataSpecies, tt.dataCT)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if result.HasErrors() != tt.hasErrors {
    t.Errorf("HasErrors()=%v, want %v", result.HasErrors(), tt.hasErrors)
    }
    assertStringSlice(t, "MissingSpecies", result.MissingSpecies, tt.missingSpecies)
    assertStringSlice(t, "MissingDBSpecies", result.MissingDBSpecies, tt.missingDBSpecies)
    if tt.missingCalltypeCT != "" && len(result.MissingCalltypes) == 0 {
    t.Error("expected missing calltype")
    }
    if tt.errorContains != "" && !strings.Contains(result.Error(), tt.errorContains) {
    t.Errorf("error should contain %q: %s", tt.errorContains, result.Error())
    }
    })
    }
    tests := []struct {
    name string
    mapping MappingFile
    dataSpecies map[string]bool
    dataCT map[string]map[string]bool
    hasErrors bool
    missingSpecies []string
    missingDBSpecies []string
    missingCalltypeCT string // substring expected in MissingCalltypes key
    errorContains string // substring expected in result.Error()
    }{
    {
    name: "valid mapping - no errors",
    mapping: MappingFile{
    "GSK": {Species: "Roroa", Calltypes: map[string]string{"brrr": "brrr"}},
    "K-M": {Species: "Kiwi"},
    },
    dataSpecies: map[string]bool{"GSK": true, "K-M": true},
    dataCT: map[string]map[string]bool{"GSK": {"brrr": true}, "K-M": {"song": true}},
    },
    {
    name: "missing species in mapping",
    mapping: MappingFile{"GSK": {Species: "Roroa"}},
    dataSpecies: map[string]bool{"GSK": true, "K-M": true},
    hasErrors: true,
    missingSpecies: []string{"K-M"},
    },
    {
    name: "mapped species not in DB",
    mapping: MappingFile{"PHANTOM": {Species: "Phantom"}},
    dataSpecies: map[string]bool{"PHANTOM": true},
    hasErrors: true,
    missingDBSpecies: []string{"Phantom"},
    },
    {
    name: "sentinel species excluded from DB check",
    mapping: MappingFile{"noise": {Species: MappingNegative}, "ignore": {Species: MappingIgnore}},
    dataSpecies: map[string]bool{"noise": true, "ignore": true},
    },
    {
    name: "missing calltype in DB",
    mapping: MappingFile{
    "K-M": {Species: "Kiwi", Calltypes: map[string]string{"song": "song", "phantom": "phantom"}},
    },
    dataSpecies: map[string]bool{"K-M": true},
    dataCT: map[string]map[string]bool{"K-M": {"song": true, "phantom": true}},
    hasErrors: true,
    missingCalltypeCT: "phantom",
    errorContains: "phantom",
    },
    }
    }
    // assertStringSlice checks that got matches want (order-insensitive).
    func assertStringSlice(t *testing.T, label string, got, want []string) {
    t.Helper()
    if len(want) == 0 && len(got) == 0 {
    return
    }
    if len(got) != len(want) {
    t.Errorf("%s: got %v, want %v", label, got, want)
    return
    }
    for _, w := range want {
    found := slices.Contains(got, w)
    if !found {
    t.Errorf("%s: missing %q in %v", label, w, got)
    }
    }
  • file deletion: check_duplicate_hash_test.go (----------)
    [4.1][4.10315:10367](),[4.10367][4.6609:6609]()
    package utils
    import (
    "database/sql"
    "testing"
    )
    func TestCheckDuplicateHash_NoRows(t *testing.T) {
    db := openTestDB(t)
    defer db.Close()
    // No rows exist — should return not-duplicate
    id, dup, err := CheckDuplicateHash(db, "abcdef0123456789")
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if dup {
    t.Error("expected isDuplicate=false when no rows")
    }
    if id != "" {
    t.Errorf("expected empty id, got %q", id)
    }
    }
    func TestCheckDuplicateHash_FoundDuplicate(t *testing.T) {
    db := openTestDB(t)
    defer db.Close()
    // Insert a file with known hash
    hash := "deadbeef12345678"
    fileID := "test_file_id_123"
    _, err := db.Exec(`INSERT INTO file (id, path, dataset_id, xxh64_hash, active)
    VALUES (?, '/test/file.wav', 'ds1', ?, true)`, fileID, hash)
    if err != nil {
    t.Fatalf("insert: %v", err)
    }
    id, dup, err := CheckDuplicateHash(db, hash)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if !dup {
    t.Error("expected isDuplicate=true")
    }
    if id != fileID {
    t.Errorf("expected id=%q, got %q", fileID, id)
    }
    }
    func TestCheckDuplicateHash_InactiveNotDuplicate(t *testing.T) {
    db := openTestDB(t)
    defer db.Close()
    // Insert an INACTIVE file with known hash
    hash := "cafebeef12345678"
    fileID := "inactive_file_id"
    _, err := db.Exec(`INSERT INTO file (id, path, dataset_id, xxh64_hash, active)
    VALUES (?, '/test/old.wav', 'ds1', ?, false)`, fileID, hash)
    if err != nil {
    t.Fatalf("insert: %v", err)
    }
    // Inactive files should NOT be considered duplicates
    id, dup, err := CheckDuplicateHash(db, hash)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if dup {
    t.Error("expected isDuplicate=false for inactive file")
    }
    if id != "" {
    t.Errorf("expected empty id, got %q", id)
    }
    }
    func TestCheckDuplicateHash_DifferentHashNoDuplicate(t *testing.T) {
    db := openTestDB(t)
    defer db.Close()
    // Insert file with hash A
    _, err := db.Exec(`INSERT INTO file (id, path, dataset_id, xxh64_hash, active)
    VALUES ('id1', '/test/a.wav', 'ds1', 'hash_aaaa', true)`)
    if err != nil {
    t.Fatalf("insert: %v", err)
    }
    // Query for hash B — no duplicate
    id, dup, err := CheckDuplicateHash(db, "hash_bbbb")
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if dup {
    t.Error("expected isDuplicate=false for different hash")
    }
    if id != "" {
    t.Errorf("expected empty id, got %q", id)
    }
    }
    // openTestDB creates a DuckDB in-memory database with the minimal schema
    // needed for the file table.
    func openTestDB(t *testing.T) *sql.DB {
    t.Helper()
    db, err := sql.Open("duckdb", "")
    if err != nil {
    t.Fatalf("open duckdb: %v", err)
    }
    _, err = db.Exec(`
    CREATE TABLE file (
    id VARCHAR PRIMARY KEY,
    path VARCHAR,
    dataset_id VARCHAR,
    xxh64_hash VARCHAR,
    active BOOLEAN DEFAULT true
    )
    `)
    if err != nil {
    db.Close()
    t.Fatalf("create table: %v", err)
    }
    return db
    }
    _ "github.com/duckdb/duckdb-go/v2"
  • file deletion: mapping_test.go (----------)
    [4.1][4.89514:89553](),[4.89553][4.83836:83836]()
    package utils
    import (
    "os"
    "path/filepath"
    "testing"
    )
    func TestLoadMappingFile(t *testing.T) {
    t.Run("valid mapping", func(t *testing.T) {
    content := `{
    "GSK": {"species": "Roroa", "calltypes": {"Male": "Male - Solo"}},
    "Don't Know": {"species": "Don't Know"}
    }`
    path := createTempFile(t, content)
    defer os.Remove(path)
    mapping, err := LoadMappingFile(path)
    if err != nil {
    t.Fatalf("expected no error, got: %v", err)
    }
    if len(mapping) != 2 {
    t.Errorf("expected 2 entries, got %d", len(mapping))
    }
    if mapping["GSK"].Species != "Roroa" {
    t.Errorf("expected GSK -> Roroa, got %s", mapping["GSK"].Species)
    }
    if mapping["GSK"].Calltypes["Male"] != "Male - Solo" {
    t.Errorf("expected GSK Male -> Male - Solo, got %s", mapping["GSK"].Calltypes["Male"])
    }
    })
    t.Run("invalid JSON", func(t *testing.T) {
    content := `{invalid json}`
    path := createTempFile(t, content)
    defer os.Remove(path)
    _, err := LoadMappingFile(path)
    if err == nil {
    t.Fatal("expected error for invalid JSON")
    }
    })
    t.Run("empty file", func(t *testing.T) {
    content := `{}`
    path := createTempFile(t, content)
    defer os.Remove(path)
    _, err := LoadMappingFile(path)
    if err == nil {
    t.Fatal("expected error for empty mapping")
    }
    })
    t.Run("missing species field", func(t *testing.T) {
    content := `{"GSK": {"calltypes": {"Male": "Male - Solo"}}}`
    path := createTempFile(t, content)
    defer os.Remove(path)
    _, err := LoadMappingFile(path)
    if err == nil {
    t.Fatal("expected error for missing species field")
    }
    })
    t.Run("empty species field", func(t *testing.T) {
    content := `{"GSK": {"species": ""}}`
    path := createTempFile(t, content)
    defer os.Remove(path)
    _, err := LoadMappingFile(path)
    if err == nil {
    t.Fatal("expected error for empty species field")
    }
    })
    t.Run("nonexistent file", func(t *testing.T) {
    _, err := LoadMappingFile("/nonexistent/path/mapping.json")
    if err == nil {
    t.Fatal("expected error for nonexistent file")
    }
    })
    }
    func TestGetDBSpecies(t *testing.T) {
    mapping := MappingFile{
    "GSK": {Species: "Roroa"},
    "K-M": {Species: "Kiwi"},
    }
    t.Run("found", func(t *testing.T) {
    species, ok := mapping.GetDBSpecies("GSK")
    if !ok {
    t.Fatal("expected to find GSK")
    }
    if species != "Roroa" {
    t.Errorf("expected Roroa, got %s", species)
    }
    })
    t.Run("not found", func(t *testing.T) {
    _, ok := mapping.GetDBSpecies("UNKNOWN")
    if ok {
    t.Fatal("expected not to find UNKNOWN")
    }
    })
    }
    func TestGetDBCalltype(t *testing.T) {
    mapping := MappingFile{
    "GSK": {
    Species: "Roroa",
    Calltypes: map[string]string{
    "Male": "Male - Solo",
    "Female": "Female - Solo",
    },
    },
    "K-M": {Species: "Kiwi"}, // no calltype mapping
    }
    t.Run("with mapping", func(t *testing.T) {
    ct := mapping.GetDBCalltype("GSK", "Male")
    if ct != "Male - Solo" {
    t.Errorf("expected 'Male - Solo', got %s", ct)
    }
    })
    t.Run("without mapping - passthrough", func(t *testing.T) {
    ct := mapping.GetDBCalltype("GSK", "Unknown")
    if ct != "Unknown" {
    t.Errorf("expected passthrough 'Unknown', got %s", ct)
    }
    })
    t.Run("species not in mapping - passthrough", func(t *testing.T) {
    ct := mapping.GetDBCalltype("UNKNOWN", "Male")
    if ct != "Male" {
    t.Errorf("expected passthrough 'Male', got %s", ct)
    }
    })
    t.Run("species without calltypes - passthrough", func(t *testing.T) {
    ct := mapping.GetDBCalltype("K-M", "Male")
    if ct != "Male" {
    t.Errorf("expected passthrough 'Male', got %s", ct)
    }
    })
    }
    func TestMappingValidationResult(t *testing.T) {
    t.Run("HasErrors - no errors", func(t *testing.T) {
    r := MappingValidationResult{}
    if r.HasErrors() {
    t.Error("expected no errors")
    }
    })
    t.Run("HasErrors - missing species", func(t *testing.T) {
    r := MappingValidationResult{MissingSpecies: []string{"UNKNOWN"}}
    if !r.HasErrors() {
    t.Error("expected errors")
    }
    })
    t.Run("HasErrors - missing DB species", func(t *testing.T) {
    r := MappingValidationResult{MissingDBSpecies: []string{"Phantom"}}
    if !r.HasErrors() {
    t.Error("expected errors")
    }
    })
    t.Run("HasErrors - missing calltypes", func(t *testing.T) {
    r := MappingValidationResult{MissingCalltypes: map[string]string{"GSK/Male": "Roroa/Male - Solo"}}
    if !r.HasErrors() {
    t.Error("expected errors")
    }
    })
    t.Run("Error - all error types", func(t *testing.T) {
    r := MappingValidationResult{
    MissingSpecies: []string{"UNKNOWN"},
    MissingDBSpecies: []string{"Phantom"},
    MissingCalltypes: map[string]string{"GSK/Male": "Roroa/Male - Solo"},
    }
    errStr := r.Error()
    if errStr == "" {
    t.Error("expected non-empty error string")
    }
    // Check all parts are present
    if !containsSubstring(errStr, "UNKNOWN") {
    t.Error("error string should contain MISSING species")
    }
    if !containsSubstring(errStr, "Phantom") {
    t.Error("error string should contain missing DB species")
    }
    if !containsSubstring(errStr, "GSK/Male") {
    t.Error("error string should contain missing calltype")
    }
    })
    }
    // Helper functions
    func createTempFile(t *testing.T, content string) string {
    t.Helper()
    tmpDir := t.TempDir()
    path := filepath.Join(tmpDir, "mapping.json")
    if err := os.WriteFile(path, []byte(content), 0644); err != nil {
    t.Fatalf("failed to create temp file: %v", err)
    }
    return path
    }
    func containsSubstring(s, substr string) bool {
    return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsSubstringHelper(s, substr))
    }
    func containsSubstringHelper(s, substr string) bool {
    for i := 0; i <= len(s)-len(substr); i++ {
    if s[i:i+len(substr)] == substr {
    return true
    }
    }
    return false
    }
    func TestMappingClassify(t *testing.T) {
    m := MappingFile{
    "noise": {Species: MappingNegative},
    "ignore": {Species: MappingIgnore},
    "kiwi": {Species: "Kiwi"},
    }
    c, k, ok := m.Classify("noise")
    if !ok || k != MappingNeg || c != "" {
    t.Error("failed classify negative")
    }
    c, k, ok = m.Classify("ignore")
    if !ok || k != MappingIgn || c != "" {
    t.Error("failed classify ignore")
    }
    c, k, ok = m.Classify("kiwi")
    if !ok || k != MappingReal || c != "Kiwi" {
    t.Error("failed classify real")
    }
    _, _, ok = m.Classify("missing")
    if ok {
    t.Error("expected missing to be not ok")
    }
    }
    func TestMappingValidateCoversSpecies(t *testing.T) {
    m := MappingFile{"kiwi": {Species: "Kiwi"}}
    missing := m.ValidateCoversSpecies(map[string]bool{"kiwi": true, "tui": true})
    if len(missing) != 1 || missing[0] != "tui" {
    t.Errorf("expected [tui], got %v", missing)
    }
    }
    func TestMappingClasses(t *testing.T) {
    m := MappingFile{
    "noise": {Species: MappingNegative},
    "kiwi": {Species: "Kiwi"},
    "tui": {Species: "Tui"},
    "duplicate": {Species: "Kiwi"},
    }
    classes := m.Classes()
    if len(classes) != 2 || classes[0] != "Kiwi" || classes[1] != "Tui" {
    t.Errorf("expected [Kiwi, Tui], got %v", classes)
    }
    }
  • file deletion: mapping.go (----------)
    [4.1][4.98949:98983](),[4.98983][4.89555:89555]()
    package utils
    import (
    "encoding/json"
    "fmt"
    "os"
    "sort"
    "strings"
    )
    // SpeciesMapping maps .data species/calltype names to DB labels
    type SpeciesMapping struct {
    Species string `json:"species"`
    Calltypes map[string]string `json:"calltypes,omitempty"`
    }
    // MappingFile represents the complete mapping file structure
    // Key is the .data file species name
    type MappingFile map[string]SpeciesMapping
    // LoadMappingFile loads and parses a mapping JSON file
    func LoadMappingFile(path string) (MappingFile, error) {
    data, err := os.ReadFile(path)
    if err != nil {
    return nil, fmt.Errorf("failed to read mapping file: %w", err)
    }
    var mapping MappingFile
    if err := json.Unmarshal(data, &mapping); err != nil {
    return nil, fmt.Errorf("failed to parse mapping JSON: %w", err)
    }
    // Validate non-empty
    if len(mapping) == 0 {
    return nil, fmt.Errorf("mapping file is empty")
    }
    // Validate each entry has species
    for dataSpecies, sm := range mapping {
    if sm.Species == "" {
    return nil, fmt.Errorf("mapping entry '%s' has empty species field", dataSpecies)
    }
    }
    return mapping, nil
    }
    // MappingValidationResult contains validation errors for a mapping
    type MappingValidationResult struct {
    MissingSpecies []string // .data species not in mapping
    MissingDBSpecies []string // mapped species not in DB
    MissingCalltypes map[string]string // "dataSpecies/dataCalltype" -> "dbSpecies/dbCalltype"
    }
    // HasErrors returns true if any validation errors exist
    func (r MappingValidationResult) HasErrors() bool {
    return len(r.MissingSpecies) > 0 ||
    len(r.MissingDBSpecies) > 0 ||
    len(r.MissingCalltypes) > 0
    }
    // Error returns a formatted error message
    func (r MappingValidationResult) Error() string {
    var parts []string
    if len(r.MissingSpecies) > 0 {
    parts = append(parts, fmt.Sprintf("species in .data but not in mapping: [%s]",
    strings.Join(r.MissingSpecies, ", ")))
    }
    if len(r.MissingDBSpecies) > 0 {
    parts = append(parts, fmt.Sprintf("mapped species not found in DB: [%s]",
    strings.Join(r.MissingDBSpecies, ", ")))
    }
    if len(r.MissingCalltypes) > 0 {
    var ctErrors []string
    for k, v := range r.MissingCalltypes {
    ctErrors = append(ctErrors, fmt.Sprintf("%s->%s", k, v))
    }
    sort.Strings(ctErrors)
    parts = append(parts, fmt.Sprintf("calltypes not found in DB: [%s]",
    strings.Join(ctErrors, ", ")))
    }
    return strings.Join(parts, "; ")
    }
    // ValidateMappingAgainstDB validates that all mapped species and calltypes exist in the database
    // Also validates that the mapping covers all species/calltypes found in .data files
    func ValidateMappingAgainstDB(
    queryer DB,
    mapping MappingFile,
    dataSpeciesSet map[string]bool,
    dataCalltypes map[string]map[string]bool, // species -> calltype -> true
    ) (MappingValidationResult, error) {
    result := MappingValidationResult{
    MissingSpecies: make([]string, 0),
    MissingDBSpecies: make([]string, 0),
    MissingCalltypes: make(map[string]string),
    }
    // Check all .data species are in mapping
    for species := range dataSpeciesSet {
    if _, exists := mapping[species]; !exists {
    result.MissingSpecies = append(result.MissingSpecies, species)
    }
    }
    sort.Strings(result.MissingSpecies)
    // Collect all mapped species and calltypes
    mappedSpeciesSet, mappedCalltypes := collectMappedLabels(mapping, dataCalltypes)
    // Validate species exist in DB
    if err := validateMappedSpecies(queryer, mappedSpeciesSet, &result); err != nil {
    return result, err
    }
    // Validate calltypes exist in DB
    if err := validateMappedCalltypes(queryer, mappedCalltypes, &result); err != nil {
    return result, err
    }
    return result, nil
    }
    // collectMappedLabels builds sets of mapped species and calltype labels
    for dataSpecies, ctSet := range dataCalltypes {
    sm, exists := mapping[dataSpecies]
    if !exists {
    }
    dbSpecies := sm.Species
    for dataCT := range ctSet {
    dbCT := dataCT
    if sm.Calltypes != nil {
    if mapped, ok := sm.Calltypes[dataCT]; ok {
    dbCT = mapped
    }
    }
    if mappedCalltypes[dbSpecies] == nil {
    mappedCalltypes[dbSpecies] = make(map[string]string)
    }
    mappedCalltypes[dbSpecies][dbCT] = dataCT
    }
    }
    for _, sm := range mapping {
    if sm.Species == MappingNegative || sm.Species == MappingIgnore {
    continue
    }
    mappedSpeciesSet[sm.Species] = true
    if len(sm.Calltypes) > 0 {
    if mappedCalltypes[sm.Species] == nil {
    mappedCalltypes[sm.Species] = make(map[string]string)
    }
    for dataCT, dbCT := range sm.Calltypes {
    mappedCalltypes[sm.Species][dbCT] = dataCT
    }
    }
    }
    collectUnmappedCalltypes(mapping, dataCalltypes, mappedCalltypes)
    return mappedSpeciesSet, mappedCalltypes
    }
    // validateMappedSpecies checks that all mapped species exist in the database
    func validateMappedSpecies(queryer DB, mappedSpeciesSet map[string]bool, result *MappingValidationResult) error {
    speciesLabels := make([]string, 0, len(mappedSpeciesSet))
    for s := range mappedSpeciesSet {
    speciesLabels = append(speciesLabels, s)
    }
    sort.Strings(speciesLabels)
    args := make([]any, len(speciesLabels))
    for i, s := range speciesLabels {
    args[i] = s
    }
    rows, err := queryer.Query(query, args...)
    if err != nil {
    return fmt.Errorf("failed to query species: %w", err)
    }
    defer rows.Close()
    foundSpecies := make(map[string]bool)
    for rows.Next() {
    var label string
    if err := rows.Scan(&label); err == nil {
    foundSpecies[label] = true
    }
    for _, s := range speciesLabels {
    if !foundSpecies[s] {
    result.MissingDBSpecies = append(result.MissingDBSpecies, s)
    }
    }
    // validateMappedCalltypes checks that all mapped calltypes exist in the database
    func validateMappedCalltypes(queryer DB, mappedCalltypes map[string]map[string]string, result *MappingValidationResult) error {
    for dbSpecies, ctMap := range mappedCalltypes {
    if len(ctMap) == 0 {
    continue
    }
    ctLabels := make([]string, 0, len(ctMap))
    for dbCT := range ctMap {
    ctLabels = append(ctLabels, dbCT)
    }
    sort.Strings(ctLabels)
    query := `
    SELECT ct.label
    FROM call_type ct
    JOIN species s ON ct.species_id = s.id
    args := make([]any, 1+len(ctLabels))
    args[0] = dbSpecies
    for i, ct := range ctLabels {
    args[1+i] = ct
    }
    rows, err := queryer.Query(query, args...)
    if err != nil {
    return fmt.Errorf("failed to query calltypes for species %s: %w", dbSpecies, err)
    }
    defer rows.Close()
    foundCT := make(map[string]bool)
    for rows.Next() {
    var label string
    if err := rows.Scan(&label); err == nil {
    foundCT[label] = true
    }
    }
    for dbCT, dataCT := range ctMap {
    if !foundCT[dbCT] {
    key := fmt.Sprintf("%s/%s", dbSpecies, dataCT)
    value := fmt.Sprintf("%s/%s", dbSpecies, dbCT)
    result.MissingCalltypes[key] = value
    }
    }
    }
    return nil
    }
    // GetDBSpecies returns the DB species label for a .data species
    func (m MappingFile) GetDBSpecies(dataSpecies string) (string, bool) {
    sm, exists := m[dataSpecies]
    if !exists {
    return "", false
    }
    return sm.Species, true
    }
    // GetDBCalltype returns the DB calltype label for a .data species/calltype
    // Returns the dataCalltype unchanged if no mapping exists
    func (m MappingFile) GetDBCalltype(dataSpecies, dataCalltype string) string {
    sm, exists := m[dataSpecies]
    if !exists || sm.Calltypes == nil {
    return dataCalltype
    }
    if dbCT, ok := sm.Calltypes[dataCalltype]; ok {
    return dbCT
    }
    return dataCalltype
    }
    // Mapping sentinels: special values for the SpeciesMapping.Species field.
    //
    // MappingNegative marks a .data species as "confirmed empty" (Noise-equivalent):
    // segments matching this name are treated as negative evidence — clips overlapping
    // them emit an all-zero row when no positive species also overlaps.
    //
    // MappingIgnore marks a .data species as "ignored entirely": segments matching
    // this name neither label clips nor block them.
    const (
    MappingNegative = "__NEGATIVE__"
    MappingIgnore = "__IGNORE__"
    )
    // MappingKind describes how a .data species should be treated.
    type MappingKind int
    const (
    MappingReal MappingKind = iota
    MappingNeg
    MappingIgn
    )
    // Classify returns the canonical class name and kind for a .data species.
    // ok is false if dataSpecies is not present in the mapping.
    // For MappingNeg and MappingIgn the canonical string is empty.
    func (m MappingFile) Classify(dataSpecies string) (canonical string, kind MappingKind, ok bool) {
    sm, exists := m[dataSpecies]
    if !exists {
    return "", MappingReal, false
    }
    switch sm.Species {
    case MappingNegative:
    return "", MappingNeg, true
    case MappingIgnore:
    return "", MappingIgn, true
    default:
    return sm.Species, MappingReal, true
    }
    }
    // ValidateCoversSpecies returns the sorted list of species in speciesSet that
    // are missing from the mapping. Empty result means full coverage.
    func (m MappingFile) ValidateCoversSpecies(speciesSet map[string]bool) []string {
    missing := make([]string, 0)
    for s := range speciesSet {
    if _, exists := m[s]; !exists {
    missing = append(missing, s)
    }
    }
    sort.Strings(missing)
    return missing
    }
    // Classes returns the sorted unique non-sentinel canonical class names from the mapping.
    // Used to build the CSV column header for clip-labels.
    func (m MappingFile) Classes() []string {
    set := make(map[string]bool)
    for _, sm := range m {
    switch sm.Species {
    case MappingNegative, MappingIgnore, "":
    continue
    default:
    set[sm.Species] = true
    }
    }
    out := make([]string, 0, len(set))
    for s := range set {
    out = append(out, s)
    }
    sort.Strings(out)
    return out
    }
    WHERE s.label = ? AND ct.label IN (` + Placeholders(len(ctLabels)) + `) AND ct.active = true`
    return nil
    }
    }
    query := `SELECT label FROM species WHERE label IN (` + Placeholders(len(speciesLabels)) + `) AND active = true`
    if len(speciesLabels) == 0 {
    return nil
    }
    }
    func collectMappedLabels(mapping MappingFile, dataCalltypes map[string]map[string]bool) (map[string]bool, map[string]map[string]string) {
    mappedSpeciesSet := make(map[string]bool)
    mappedCalltypes := make(map[string]map[string]string)
    continue
    // collectUnmappedCalltypes adds calltypes from .data files that have no explicit
    // mapping entry (dataCT == dbCT by convention) to the mappedCalltypes set.
    func collectUnmappedCalltypes(mapping MappingFile, dataCalltypes map[string]map[string]bool, mappedCalltypes map[string]map[string]string) {
  • file deletion: cluster_import.go (----------)
    [4.1][4.186647:186688](),[4.186688][4.171513:171513]()
    package utils
    import (
    "context"
    "database/sql"
    "fmt"
    "os"
    "path/filepath"
    "time"
    )
    // ClusterImportInput defines parameters for importing one cluster
    type ClusterImportInput struct {
    FolderPath string // Absolute path to folder with WAV files
    DatasetID string // 12-char dataset ID
    LocationID string // 12-char location ID
    ClusterID string // 12-char cluster ID
    Recursive bool // Scan subfolders?
    }
    // ClusterImportOutput provides results and statistics
    type ClusterImportOutput struct {
    TotalFiles int
    ImportedFiles int
    SkippedFiles int // Duplicates
    FailedFiles int
    AudioMothFiles int
    TotalDuration float64
    ProcessingTime string
    Errors []FileImportError
    }
    // LocationData holds location information needed for processing
    type LocationData struct {
    Latitude float64
    Longitude float64
    TimezoneID string
    }
    // FileProcessingResult is used for both single-file and cluster import pipelines.
    //
    // This is the canonical cluster import logic used by both:
    // - import_files.go (single cluster)
    // - bulk_file_import.go (multiple clusters)
    //
    // Steps:
    // 1. Validate folder exists
    // 2. Get location metadata (lat/lon/timezone) from database
    // 3. Scan folder for WAV files (recursive or not)
    // 4. Batch process all files:
    // - Parse WAV headers (includes file mod time)
    // - Batch parse filename timestamps (variance-based)
    // - Resolve timestamps (AudioMoth → filename → file mod time)
    // - Calculate hashes
    // - Calculate astronomical data
    // - Check duplicates
    // - INSERT INTO file
    // - INSERT INTO file_dataset (ALWAYS)
    // - INSERT INTO moth_metadata (if AudioMoth)
    // 6. Return summary statistics
    func ImportCluster(
    input ClusterImportInput,
    ) (*ClusterImportOutput, error) {
    startTime := time.Now()
    // Validate folder exists
    info, err := os.Stat(input.FolderPath)
    if err != nil {
    return nil, fmt.Errorf("folder not accessible: %w", err)
    }
    if !info.IsDir() {
    return nil, fmt.Errorf("path is not a directory: %s", input.FolderPath)
    }
    // Get location data for astronomical calculations
    locationData, err := GetLocationData(database, input.LocationID)
    if err != nil {
    return nil, fmt.Errorf("failed to get location data: %w", err)
    }
    // Scan folder for WAV files
    wavFiles, err := FindFiles(input.FolderPath, FindFilesOptions{
    Extension: ".wav",
    Recursive: input.Recursive,
    SkipPrefixes: []string{"Clips_"},
    SkipHidden: true, // Standard to ignore hidden
    MinSize: 1, // Must have size > 0
    })
    if err != nil {
    return nil, fmt.Errorf("failed to scan folder: %w", err)
    }
    // If no files, return early
    if len(wavFiles) == 0 {
    return &ClusterImportOutput{
    TotalFiles: 0,
    ProcessingTime: time.Since(startTime).String(),
    Errors: []FileImportError{},
    }, nil
    }
    // Batch process all files
    filesData, processErrors := batchProcessFiles(wavFiles, locationData)
    imported, skipped, insertErrors, err := insertClusterFiles(
    filesData,
    input.DatasetID,
    input.ClusterID,
    input.LocationID,
    )
    if err != nil {
    return nil, fmt.Errorf("database insertion failed: %w", err)
    }
    // Combine all errors
    allErrors := append(processErrors, insertErrors...)
    // Calculate summary statistics
    audiomothCount := 0
    totalDuration := 0.0
    for _, fd := range filesData {
    if fd.IsAudioMoth {
    audiomothCount++
    }
    totalDuration += fd.Duration
    }
    return &ClusterImportOutput{
    TotalFiles: len(wavFiles),
    ImportedFiles: imported,
    SkippedFiles: skipped,
    FailedFiles: len(allErrors),
    AudioMothFiles: audiomothCount,
    TotalDuration: totalDuration,
    ProcessingTime: time.Since(startTime).String(),
    Errors: allErrors,
    }, nil
    }
    // GetLocationData retrieves location coordinates and timezone
    var loc LocationData
    err := database.QueryRow(
    "SELECT latitude, longitude, timezone_id FROM location WHERE id = ?",
    locationID,
    ).Scan(&loc.Latitude, &loc.Longitude, &loc.TimezoneID)
    if err != nil {
    return nil, fmt.Errorf("failed to query location data: %w", err)
    }
    return &loc, nil
    }
    // Check if cluster already has a path
    var currentPath sql.NullString
    err := database.QueryRow("SELECT path FROM cluster WHERE id = ?", clusterID).Scan(&currentPath)
    if err != nil {
    return fmt.Errorf("failed to query cluster: %w", err)
    }
    // If path is already set, skip
    if currentPath.Valid && currentPath.String != "" {
    return nil
    }
    // Normalize folder path
    normalizedPath := NormalizeFolderPath(folderPath)
    // Update cluster with normalized path
    "UPDATE cluster SET path = ?, last_modified = now() WHERE id = ?",
    normalizedPath,
    clusterID,
    )
    if err != nil {
    return fmt.Errorf("failed to update cluster path: %w", err)
    }
    return nil
    }
    // batchProcessFiles extracts metadata and calculates hashes for all files
    func batchProcessFiles(wavFiles []string, location *LocationData) ([]*FileProcessingResult, []FileImportError) {
    var filesData []*FileProcessingResult
    var errors []FileImportError
    // Step 1: Extract WAV metadata and hash in single pass
    wavInfos := make([]wavInfo, len(wavFiles))
    for i, path := range wavFiles {
    metadata, hash, err := ParseWAVHeaderWithHash(path)
    wavInfos[i] = wavInfo{path: path, metadata: metadata, hash: hash, err: err}
    }
    // Step 2: Collect filenames for batch timestamp parsing
    var filenamesForParsing []string
    var filenameIndices []int
    for i, info := range wavInfos {
    if info.err != nil {
    errors = append(errors, FileImportError{
    FileName: filepath.Base(info.path),
    Error: info.err.Error(),
    Stage: StageParse,
    })
    continue
    }
    if HasTimestampFilename(info.path) {
    filenamesForParsing = append(filenamesForParsing, filepath.Base(info.path))
    filenameIndices = append(filenameIndices, i)
    }
    }
    // Step 3: Parse filename timestamps in batch (if any)
    filenameTimestampMap := make(map[int]time.Time)
    if len(filenamesForParsing) > 0 {
    tsMap, tsErrors := parseFilenameTimestampsBatch(wavInfos, filenameIndices, filenamesForParsing, location.TimezoneID)
    errors = append(errors, tsErrors...)
    filenameTimestampMap = tsMap
    }
    // Step 4: Process each file
    for i, info := range wavInfos {
    if info.err != nil {
    continue
    }
    var preParsedTime *time.Time
    if ts, ok := filenameTimestampMap[i]; ok {
    preParsedTime = &ts
    }
    fd, err := resolveFileData(info, preParsedTime, location)
    if err != nil {
    errors = append(errors, FileImportError{
    FileName: filepath.Base(info.path),
    Error: err.Error(),
    Stage: StageParse,
    })
    continue
    }
    filesData = append(filesData, fd)
    }
    return filesData, errors
    }
    // insertSingleFile inserts one file's data into the database within an existing transaction.
    // Returns (imported=true, nil) on success, (imported=false, nil) if skipped, or (false, error) on failure.
    func insertSingleFile(
    ctx context.Context,
    fd *FileProcessingResult,
    datasetID, clusterID, locationID string,
    ) (bool, error) {
    // Check for duplicate hash
    _, isDuplicate, err := CheckDuplicateHash(tx, fd.Hash)
    if err != nil {
    return false, fmt.Errorf("duplicate check failed: %w", err)
    }
    if isDuplicate {
    return false, nil // skipped
    }
    // Generate file ID
    fileID, err := GenerateLongID()
    if err != nil {
    return false, fmt.Errorf("ID generation failed: %w", err)
    }
    // Insert file record
    fileID, fd.FileName, fd.Hash, locationID,
    fd.TimestampLocal, clusterID, fd.Duration, fd.SampleRate,
    fd.AstroData.SolarNight, fd.AstroData.CivilNight, fd.AstroData.MoonPhase,
    )
    if err != nil {
    return false, fmt.Errorf("file insert failed: %w", err)
    }
    // Insert file_dataset junction (ALWAYS)
    if err != nil {
    return false, fmt.Errorf("file_dataset insert failed: %w", err)
    }
    // If AudioMoth, insert moth_metadata
    if fd.IsAudioMoth && fd.MothData != nil {
    fileID,
    fd.MothData.Timestamp,
    &fd.MothData.RecorderID,
    &fd.MothData.Gain,
    &fd.MothData.BatteryV,
    &fd.MothData.TempC,
    )
    if err != nil {
    return false, fmt.Errorf("moth_metadata insert failed: %w", err)
    }
    }
    return true, nil
    }
    func insertClusterFiles(
    filesData []*FileProcessingResult,
    datasetID, clusterID, locationID string,
    ) (imported, skipped int, errors []FileImportError, err error) {
    ctx := context.Background()
    for _, fd := range filesData {
    if insertErr != nil {
    errors = append(errors, FileImportError{
    FileName: fd.FileName,
    Error: insertErr.Error(),
    Stage: StageInsert,
    })
    continue
    }
    if wasImported {
    imported++
    } else {
    skipped++
    }
    }
    return imported, skipped, errors, nil
    }
    wasImported, insertErr := insertSingleFile(ctx, tx, fd, datasetID, clusterID, locationID)
    tx Mutator,
    // insertClusterFiles inserts all file data into database using the provided transaction.
    // The caller is responsible for committing or rolling back the transaction.
    _, err = tx.ExecContext(ctx, `
    INSERT INTO moth_metadata (
    file_id, timestamp, recorder_id, gain, battery_v, temp_c,
    created_at, last_modified, active
    ) VALUES (?, ?, ?, ?, ?, ?, now(), now(), true)
    `,
    _, err = tx.ExecContext(ctx, `
    INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified)
    VALUES (?, ?, now(), now())
    `, fileID, datasetID)
    _, err = tx.ExecContext(ctx, `
    INSERT INTO file (
    id, file_name, xxh64_hash, location_id, timestamp_local,
    cluster_id, duration, sample_rate, maybe_solar_night, maybe_civil_night,
    moon_phase, created_at, last_modified, active
    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, now(), now(), true)
    `,
    tx Mutator,
    adjustedTimestamps, err := ApplyTimezoneOffset(filenameTimestamps, timezoneID)
    if err != nil {
    for _, idx := range filenameIndices {
    errors = append(errors, FileImportError{
    FileName: filepath.Base(wavInfos[idx].path),
    Error: fmt.Sprintf("timezone offset failed: %v", err),
    Stage: StageParse,
    })
    }
    return result, errors
    }
    for j, idx := range filenameIndices {
    result[idx] = adjustedTimestamps[j]
    }
    return result, errors
    }
    // resolveFileData resolves timestamp and calculates astronomical data for a single WAV file.
    func resolveFileData(info wavInfo, preParsedTime *time.Time, location *LocationData) (*FileProcessingResult, error) {
    tsResult, err := ResolveTimestamp(info.metadata, info.path, location.TimezoneID, true, preParsedTime)
    if err != nil {
    return nil, err
    }
    astroData := CalculateAstronomicalData(
    tsResult.Timestamp.UTC(),
    info.metadata.Duration,
    location.Latitude,
    location.Longitude,
    )
    return &FileProcessingResult{
    FileName: filepath.Base(info.path),
    Hash: info.hash,
    Duration: info.metadata.Duration,
    SampleRate: info.metadata.SampleRate,
    TimestampLocal: tsResult.Timestamp,
    IsAudioMoth: tsResult.IsAudioMoth,
    MothData: tsResult.MothData,
    AstroData: astroData,
    }, nil
    }
    // wavInfo holds WAV metadata and hash for a single file during batch processing
    type wavInfo struct {
    path string
    metadata *WAVMetadata
    hash string
    err error
    }
    // parseFilenameTimestampsBatch parses filename timestamps and applies timezone offsets.
    // Returns a map from wavInfos index to adjusted timestamp, and any errors.
    func parseFilenameTimestampsBatch(
    wavInfos []wavInfo,
    filenameIndices []int,
    filenames []string,
    timezoneID string,
    ) (map[int]time.Time, []FileImportError) {
    var errors []FileImportError
    result := make(map[int]time.Time)
    filenameTimestamps, err := ParseFilenameTimestamps(filenames)
    if err != nil {
    for _, idx := range filenameIndices {
    errors = append(errors, FileImportError{
    FileName: filepath.Base(wavInfos[idx].path),
    Error: fmt.Sprintf("filename timestamp parsing failed: %v", err),
    Stage: StageParse,
    })
    }
    return result, errors
    }
    _, err = database.ExecContext(ctx,
    // EnsureClusterPath sets the cluster's path field if it's currently empty.
    // Accepts any type with QueryRow and ExecContext (e.g. *sql.DB, *sql.Tx, *db.LoggedTx).
    func EnsureClusterPath(database Mutator, clusterID, folderPath string) error {
    ctx := context.Background()
    func GetLocationData(database DB, locationID string) (*LocationData, error) {
    tx,
    // Batch insert into database using the provided transaction
    database DB,
    tx Mutator,
    // 5. Batch insert using the provided transaction:
    // ImportCluster imports all WAV files from a folder into a cluster.
    // The caller must provide an open transaction via tx; this function does NOT
    // commit or rollback — the caller owns the transaction lifecycle.
  • edit in utils/file_import.go at line 4
    [4.135092][4.135092:135108]()
    "database/sql"
  • edit in utils/file_import.go at line 8
    [4.135142][4.135142:135143](),[4.135143][4.1144:1238]()
    // ImportStage identifies the pipeline stage where an error occurred.
    type ImportStage string
  • edit in utils/file_import.go at line 9
    [4.1239][4.1239:1733](),[4.1733][4.479:578](),[4.135143][4.479:578](),[4.578][4.1734:1849](),[4.1849][4.727:730](),[4.727][4.727:730]()
    const (
    StageScan ImportStage = "scan" // directory scanning
    StageHash ImportStage = "hash" // hash computation
    StageParse ImportStage = "parse" // WAV header / filename parsing
    StageProcess ImportStage = "process" // file processing
    StageValidation ImportStage = "validation" // validation checks
    StageInsert ImportStage = "insert" // database insertion
    StageImport ImportStage = "import" // database import (segment pipeline)
    )
    // FileImportError records errors encountered during file processing
    type FileImportError struct {
    FileName string `json:"file_name"`
    Error string `json:"error"`
    Stage ImportStage `json:"stage"`
    }
  • edit in utils/file_import.go at line 115
    [4.138444][4.138444:138445](),[4.138445][4.455:663](),[4.663][4.138596:138642](),[4.138596][4.138596:138642](),[4.138642][4.3005:3058](),[4.3058][4.138642:138801](),[4.138642][4.138642:138801](),[4.138801][4.1459:1665](),[4.758][4.138944:139229](),[4.1665][4.138944:139229](),[4.138944][4.138944:139229]()
    // DB is an interface satisfied by both *sql.DB and *sql.Tx.
    // Used throughout utils for database queries that must work with either.
    type DB interface {
    Query(query string, args ...any) (*sql.Rows, error)
    QueryRow(query string, args ...any) *sql.Row
    Exec(query string, args ...any) (sql.Result, error)
    }
    // CheckDuplicateHash checks if a file with the given XXH64 hash already exists.
    // Returns the existing file ID if found, or empty string if no duplicate.
    // Works with both *sql.DB, *sql.Tx, and *db.LoggedTx.
    func CheckDuplicateHash(q interface {
    QueryRow(query string, args ...any) *sql.Row
    }, hash string) (existingID string, isDuplicate bool, err error) {
    err = q.QueryRow(
    "SELECT id FROM file WHERE xxh64_hash = ? AND active = true",
    hash,
    ).Scan(&existingID)
    if err == nil {
    return existingID, true, nil
    }
    if err == sql.ErrNoRows {
    return "", false, nil
    }
    return "", false, fmt.Errorf("duplicate check failed: %w", err)
    }
  • replacement in tools/pattern.go at line 194
    [4.11915][4.11915:11940]()
    if err := tx.QueryRow(
    [4.11915]
    [4.11940]
    if err := tx.QueryRowContext(context.Background(),
  • file addition: mapping_validate_test.go (----------)
    [4.1]
    package imp
    import (
    "database/sql"
    "slices"
    "strings"
    "testing"
    _ "github.com/duckdb/duckdb-go/v2"
    )
    // setupMappingTestDB creates an in-memory DB with schema + test species/calltypes.
    // Species: Kiwi (sp_kiwi000000), Roroa (sp_roroa00000)
    // Calltypes: Kiwi/song (ct_kiwi000001), Kiwi/duet (ct_kiwi000002), Roroa/brrr (ct_roroa00001)
    func setupMappingTestDB(t *testing.T) *sql.DB {
    t.Helper()
    db, err := sql.Open("duckdb", ":memory:")
    if err != nil {
    t.Fatalf("open: %v", err)
    }
    // Create minimal tables needed by mapping validation queries
    mustExecMapping(t, db, `CREATE TABLE species (
    id VARCHAR(12) PRIMARY KEY,
    label VARCHAR(100) UNIQUE NOT NULL,
    active BOOLEAN DEFAULT TRUE
    )`)
    mustExecMapping(t, db, `CREATE TABLE call_type (
    id VARCHAR(12) PRIMARY KEY,
    species_id VARCHAR(12) NOT NULL,
    label VARCHAR(100) NOT NULL,
    active BOOLEAN DEFAULT TRUE
    )`)
    // Insert test species
    mustExecMapping(t, db, "INSERT INTO species (id, label, active) VALUES ('sp_kiwi000000', 'Kiwi', true)")
    mustExecMapping(t, db, "INSERT INTO species (id, label, active) VALUES ('sp_roroa00000', 'Roroa', true)")
    mustExecMapping(t, db, "INSERT INTO species (id, label, active) VALUES ('sp_tui0000000', 'Tui', false)") // inactive
    // Insert test calltypes
    mustExecMapping(t, db, "INSERT INTO call_type (id, species_id, label, active) VALUES ('ct_kiwi000001', 'sp_kiwi000000', 'song', true)")
    mustExecMapping(t, db, "INSERT INTO call_type (id, species_id, label, active) VALUES ('ct_kiwi000002', 'sp_kiwi000000', 'duet', true)")
    mustExecMapping(t, db, "INSERT INTO call_type (id, species_id, label, active) VALUES ('ct_roroa00001', 'sp_roroa00000', 'brrr', true)")
    return db
    }
    // assertStringSlice checks that got matches want (order-insensitive).
    func assertStringSlice(t *testing.T, label string, got, want []string) {
    t.Helper()
    if len(want) == 0 && len(got) == 0 {
    return
    }
    if len(got) != len(want) {
    t.Errorf("%s: got %v, want %v", label, got, want)
    return
    }
    for _, w := range want {
    found := slices.Contains(got, w)
    if !found {
    t.Errorf("%s: missing %q in %v", label, w, got)
    }
    }
    }
    func mustExecMapping(t *testing.T, db *sql.DB, query string) {
    t.Helper()
    if _, err := db.Exec(query); err != nil {
    t.Fatalf("exec: %v", err)
    }
    }
    // --- collectMappedLabels ---
    func TestCollectMappedLabels(t *testing.T) {
    mapping := MappingFile{
    "GSK": {Species: "Roroa", Calltypes: map[string]string{"brrr": "brrr"}},
    "K-M": {Species: "Kiwi"},
    "noise": {Species: MappingNegative},
    }
    dataCalltypes := map[string]map[string]bool{
    "GSK": {"brrr": true},
    "K-M": {"song": true, "duet": true},
    }
    speciesSet, calltypes := collectMappedLabels(mapping, dataCalltypes)
    if !speciesSet["Roroa"] || !speciesSet["Kiwi"] {
    t.Errorf("speciesSet=%v, want Kiwi and Roroa", speciesSet)
    }
    if speciesSet[MappingNegative] {
    t.Error("sentinel species should be excluded")
    }
    // Roroa has explicit calltype mapping
    if calltypes["Roroa"]["brrr"] != "brrr" {
    t.Errorf("Roroa calltypes=%v", calltypes["Roroa"])
    }
    // Kiwi has no calltype mapping, so data calltypes pass through
    if calltypes["Kiwi"]["song"] != "song" || calltypes["Kiwi"]["duet"] != "duet" {
    t.Errorf("Kiwi calltypes=%v", calltypes["Kiwi"])
    }
    }
    // --- collectUnmappedCalltypes ---
    func TestCollectUnmappedCalltypes(t *testing.T) {
    mapping := MappingFile{
    "GSK": {Species: "Roroa", Calltypes: map[string]string{"Male": "brrr"}},
    }
    dataCalltypes := map[string]map[string]bool{
    "GSK": {"Male": true, "Female": true},
    }
    mappedCalltypes := make(map[string]map[string]string)
    collectUnmappedCalltypes(mapping, dataCalltypes, mappedCalltypes)
    // Male maps to brrr
    if mappedCalltypes["Roroa"]["brrr"] != "Male" {
    t.Errorf("mapped Male->brrr: %v", mappedCalltypes["Roroa"])
    }
    // Female has no mapping entry, passes through as-is
    if mappedCalltypes["Roroa"]["Female"] != "Female" {
    t.Errorf("unmapped Female passthrough: %v", mappedCalltypes["Roroa"])
    }
    }
    // --- validateMappedSpecies ---
    func TestValidateMappedSpecies(t *testing.T) {
    db := setupMappingTestDB(t)
    defer db.Close()
    t.Run("all species exist in DB", func(t *testing.T) {
    result := &MappingValidationResult{MissingDBSpecies: make([]string, 0)}
    err := validateMappedSpecies(db, map[string]bool{"Kiwi": true, "Roroa": true}, result)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if len(result.MissingDBSpecies) > 0 {
    t.Errorf("missing species: %v", result.MissingDBSpecies)
    }
    })
    t.Run("species not in DB reported", func(t *testing.T) {
    result := &MappingValidationResult{MissingDBSpecies: make([]string, 0)}
    err := validateMappedSpecies(db, map[string]bool{"Phantom": true}, result)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if len(result.MissingDBSpecies) != 1 || result.MissingDBSpecies[0] != "Phantom" {
    t.Errorf("expected [Phantom], got %v", result.MissingDBSpecies)
    }
    })
    t.Run("inactive species not found", func(t *testing.T) {
    result := &MappingValidationResult{MissingDBSpecies: make([]string, 0)}
    err := validateMappedSpecies(db, map[string]bool{"Tui": true}, result)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if len(result.MissingDBSpecies) != 1 {
    t.Errorf("inactive species should be missing, got %v", result.MissingDBSpecies)
    }
    })
    t.Run("empty set is no-op", func(t *testing.T) {
    result := &MappingValidationResult{MissingDBSpecies: make([]string, 0)}
    err := validateMappedSpecies(db, map[string]bool{}, result)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if len(result.MissingDBSpecies) != 0 {
    t.Errorf("expected no missing, got %v", result.MissingDBSpecies)
    }
    })
    }
    // --- validateMappedCalltypes ---
    func TestValidateMappedCalltypes(t *testing.T) {
    db := setupMappingTestDB(t)
    defer db.Close()
    t.Run("all calltypes exist", func(t *testing.T) {
    result := &MappingValidationResult{MissingCalltypes: make(map[string]string)}
    ctMap := map[string]map[string]string{
    "Kiwi": {"song": "data-song", "duet": "data-duet"},
    }
    err := validateMappedCalltypes(db, ctMap, result)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if len(result.MissingCalltypes) > 0 {
    t.Errorf("missing calltypes: %v", result.MissingCalltypes)
    }
    })
    t.Run("missing calltype reported", func(t *testing.T) {
    result := &MappingValidationResult{MissingCalltypes: make(map[string]string)}
    ctMap := map[string]map[string]string{
    "Kiwi": {"phantom": "data-phantom"},
    }
    err := validateMappedCalltypes(db, ctMap, result)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if len(result.MissingCalltypes) != 1 {
    t.Errorf("expected 1 missing, got %v", result.MissingCalltypes)
    }
    })
    t.Run("empty calltype map skips species", func(t *testing.T) {
    result := &MappingValidationResult{MissingCalltypes: make(map[string]string)}
    ctMap := map[string]map[string]string{
    "Kiwi": {},
    }
    err := validateMappedCalltypes(db, ctMap, result)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if len(result.MissingCalltypes) != 0 {
    t.Errorf("expected none missing, got %v", result.MissingCalltypes)
    }
    })
    }
    // --- ValidateMappingAgainstDB (integration of all above) ---
    func TestValidateMappingAgainstDB(t *testing.T) {
    db := setupMappingTestDB(t)
    defer db.Close()
    tests := []struct {
    name string
    mapping MappingFile
    dataSpecies map[string]bool
    dataCT map[string]map[string]bool
    hasErrors bool
    missingSpecies []string
    missingDBSpecies []string
    missingCalltypeCT string // substring expected in MissingCalltypes key
    errorContains string // substring expected in result.Error()
    }{
    {
    name: "valid mapping - no errors",
    mapping: MappingFile{
    "GSK": {Species: "Roroa", Calltypes: map[string]string{"brrr": "brrr"}},
    "K-M": {Species: "Kiwi"},
    },
    dataSpecies: map[string]bool{"GSK": true, "K-M": true},
    dataCT: map[string]map[string]bool{"GSK": {"brrr": true}, "K-M": {"song": true}},
    },
    {
    name: "missing species in mapping",
    mapping: MappingFile{"GSK": {Species: "Roroa"}},
    dataSpecies: map[string]bool{"GSK": true, "K-M": true},
    hasErrors: true,
    missingSpecies: []string{"K-M"},
    },
    {
    name: "mapped species not in DB",
    mapping: MappingFile{"PHANTOM": {Species: "Phantom"}},
    dataSpecies: map[string]bool{"PHANTOM": true},
    hasErrors: true,
    missingDBSpecies: []string{"Phantom"},
    },
    {
    name: "sentinel species excluded from DB check",
    mapping: MappingFile{"noise": {Species: MappingNegative}, "ignore": {Species: MappingIgnore}},
    dataSpecies: map[string]bool{"noise": true, "ignore": true},
    },
    {
    name: "missing calltype in DB",
    mapping: MappingFile{
    "K-M": {Species: "Kiwi", Calltypes: map[string]string{"song": "song", "phantom": "phantom"}},
    },
    dataSpecies: map[string]bool{"K-M": true},
    dataCT: map[string]map[string]bool{"K-M": {"song": true, "phantom": true}},
    hasErrors: true,
    missingCalltypeCT: "phantom",
    errorContains: "phantom",
    },
    }
    for _, tt := range tests {
    t.Run(tt.name, func(t *testing.T) {
    result, err := ValidateMappingAgainstDB(db, tt.mapping, tt.dataSpecies, tt.dataCT)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if result.HasErrors() != tt.hasErrors {
    t.Errorf("HasErrors()=%v, want %v", result.HasErrors(), tt.hasErrors)
    }
    assertStringSlice(t, "MissingSpecies", result.MissingSpecies, tt.missingSpecies)
    assertStringSlice(t, "MissingDBSpecies", result.MissingDBSpecies, tt.missingDBSpecies)
    if tt.missingCalltypeCT != "" && len(result.MissingCalltypes) == 0 {
    t.Error("expected missing calltype")
    }
    if tt.errorContains != "" && !strings.Contains(result.Error(), tt.errorContains) {
    t.Errorf("error should contain %q: %s", tt.errorContains, result.Error())
    }
    })
    }
    }
  • file addition: mapping.go (----------)
    [4.1]
    package imp
    import (
    "context"
    "database/sql"
    "fmt"
    "sort"
    "skraak/mapping"
    "skraak/utils"
    )
    // Re-export mapping types for convenience within this package.
    // External callers should use the mapping package directly.
    type (
    MappingFile = mapping.File
    SpeciesMapping = mapping.SpeciesMapping
    MappingValidationResult = mapping.ValidationResult
    )
    const (
    MappingNegative = mapping.Negative
    MappingIgnore = mapping.Ignore
    )
    var (
    LoadMappingFile = mapping.Load
    )
    type (
    MappingKind = mapping.Kind
    )
    const (
    MappingReal = mapping.Real
    MappingNeg = mapping.Neg
    MappingIgn = mapping.Ign
    )
    // MappingQuerier is the read-only interface needed for mapping validation.
    // Satisfied by *sql.DB, *sql.Tx, and *db.LoggedTx.
    type MappingQuerier interface {
    QueryContext(ctx context.Context, query string, args ...any) (*sql.Rows, error)
    QueryRowContext(ctx context.Context, query string, args ...any) *sql.Row
    }
    // ValidateMappingAgainstDB validates that all mapped species and calltypes exist in the database
    // Also validates that the mapping covers all species/calltypes found in .data files
    func ValidateMappingAgainstDB(
    queryer MappingQuerier,
    m mapping.File,
    dataSpeciesSet map[string]bool,
    dataCalltypes map[string]map[string]bool, // species -> calltype -> true
    ) (mapping.ValidationResult, error) {
    result := mapping.ValidationResult{
    MissingSpecies: make([]string, 0),
    MissingDBSpecies: make([]string, 0),
    MissingCalltypes: make(map[string]string),
    }
    // Check all .data species are in mapping
    for species := range dataSpeciesSet {
    if _, exists := m[species]; !exists {
    result.MissingSpecies = append(result.MissingSpecies, species)
    }
    }
    sort.Strings(result.MissingSpecies)
    // Collect all mapped species and calltypes
    mappedSpeciesSet, mappedCalltypes := collectMappedLabels(m, dataCalltypes)
    // Validate species exist in DB
    if err := validateMappedSpecies(queryer, mappedSpeciesSet, &result); err != nil {
    return result, err
    }
    // Validate calltypes exist in DB
    if err := validateMappedCalltypes(queryer, mappedCalltypes, &result); err != nil {
    return result, err
    }
    return result, nil
    }
    // collectUnmappedCalltypes adds calltypes from .data files that have no explicit
    // mapping entry (dataCT == dbCT by convention) to the mappedCalltypes set.
    func collectUnmappedCalltypes(m mapping.File, dataCalltypes map[string]map[string]bool, mappedCalltypes map[string]map[string]string) {
    for dataSpecies, ctSet := range dataCalltypes {
    sm, exists := m[dataSpecies]
    if !exists {
    continue
    }
    dbSpecies := sm.Species
    for dataCT := range ctSet {
    dbCT := dataCT
    if sm.Calltypes != nil {
    if mapped, ok := sm.Calltypes[dataCT]; ok {
    dbCT = mapped
    }
    }
    if mappedCalltypes[dbSpecies] == nil {
    mappedCalltypes[dbSpecies] = make(map[string]string)
    }
    mappedCalltypes[dbSpecies][dbCT] = dataCT
    }
    }
    }
    func collectMappedLabels(m mapping.File, dataCalltypes map[string]map[string]bool) (map[string]bool, map[string]map[string]string) {
    mappedSpeciesSet := make(map[string]bool)
    mappedCalltypes := make(map[string]map[string]string)
    for _, sm := range m {
    if sm.Species == mapping.Negative || sm.Species == mapping.Ignore {
    continue
    }
    mappedSpeciesSet[sm.Species] = true
    if len(sm.Calltypes) > 0 {
    if mappedCalltypes[sm.Species] == nil {
    mappedCalltypes[sm.Species] = make(map[string]string)
    }
    for dataCT, dbCT := range sm.Calltypes {
    mappedCalltypes[sm.Species][dbCT] = dataCT
    }
    }
    }
    collectUnmappedCalltypes(m, dataCalltypes, mappedCalltypes)
    return mappedSpeciesSet, mappedCalltypes
    }
    // validateMappedSpecies checks that all mapped species exist in the database
    func validateMappedSpecies(queryer MappingQuerier, mappedSpeciesSet map[string]bool, result *mapping.ValidationResult) error {
    speciesLabels := make([]string, 0, len(mappedSpeciesSet))
    for s := range mappedSpeciesSet {
    speciesLabels = append(speciesLabels, s)
    }
    sort.Strings(speciesLabels)
    if len(speciesLabels) == 0 {
    return nil
    }
    query := `SELECT label FROM species WHERE label IN (` + utils.Placeholders(len(speciesLabels)) + `) AND active = true`
    args := make([]any, len(speciesLabels))
    for i, s := range speciesLabels {
    args[i] = s
    }
    rows, err := queryer.QueryContext(context.Background(), query, args...)
    if err != nil {
    return fmt.Errorf("failed to query species: %w", err)
    }
    defer rows.Close()
    foundSpecies := make(map[string]bool)
    for rows.Next() {
    var label string
    if err := rows.Scan(&label); err == nil {
    foundSpecies[label] = true
    }
    }
    for _, s := range speciesLabels {
    if !foundSpecies[s] {
    result.MissingDBSpecies = append(result.MissingDBSpecies, s)
    }
    }
    return nil
    }
    // validateMappedCalltypes checks that all mapped calltypes exist in the database
    func validateMappedCalltypes(queryer MappingQuerier, mappedCalltypes map[string]map[string]string, result *mapping.ValidationResult) error {
    for dbSpecies, ctMap := range mappedCalltypes {
    if len(ctMap) == 0 {
    continue
    }
    ctLabels := make([]string, 0, len(ctMap))
    for dbCT := range ctMap {
    ctLabels = append(ctLabels, dbCT)
    }
    sort.Strings(ctLabels)
    query := `
    SELECT ct.label
    FROM call_type ct
    JOIN species s ON ct.species_id = s.id
    WHERE s.label = ? AND ct.label IN (` + utils.Placeholders(len(ctLabels)) + `) AND ct.active = true`
    args := make([]any, 1+len(ctLabels))
    args[0] = dbSpecies
    for i, ct := range ctLabels {
    args[1+i] = ct
    }
    rows, err := queryer.QueryContext(context.Background(), query, args...)
    if err != nil {
    return fmt.Errorf("failed to query calltypes for species %s: %w", dbSpecies, err)
    }
    defer rows.Close()
    foundCT := make(map[string]bool)
    for rows.Next() {
    var label string
    if err := rows.Scan(&label); err == nil {
    foundCT[label] = true
    }
    }
    for dbCT, dataCT := range ctMap {
    if !foundCT[dbCT] {
    key := fmt.Sprintf("%s/%s", dbSpecies, dataCT)
    value := fmt.Sprintf("%s/%s", dbSpecies, dbCT)
    result.MissingCalltypes[key] = value
    }
    }
    }
    return nil
    }
  • replacement in tools/import/import_unstructured.go at line 27
    [4.567][4.567:1034]()
    TotalFiles int `json:"total_files"`
    ImportedFiles int `json:"imported_files"`
    SkippedFiles int `json:"skipped_files"` // Duplicates
    FailedFiles int `json:"failed_files"`
    TotalDuration float64 `json:"total_duration_seconds"`
    ProcessingTime string `json:"processing_time"`
    Errors []utils.FileImportError `json:"errors,omitempty"`
    [4.567]
    [4.1034]
    TotalFiles int `json:"total_files"`
    ImportedFiles int `json:"imported_files"`
    SkippedFiles int `json:"skipped_files"` // Duplicates
    FailedFiles int `json:"failed_files"`
    TotalDuration float64 `json:"total_duration_seconds"`
    ProcessingTime string `json:"processing_time"`
    Errors []FileImportError `json:"errors,omitempty"`
  • replacement in tools/import/import_unstructured.go at line 74
    [4.2330][4.2330:2395]()
    output.Errors = append(output.Errors, utils.FileImportError{
    [4.2330]
    [4.2395]
    output.Errors = append(output.Errors, FileImportError{
  • replacement in tools/import/import_unstructured.go at line 77
    [4.2467][4.2467:2502]()
    Stage: utils.StageProcess,
    [4.2467]
    [4.2502]
    Stage: StageProcess,
  • replacement in tools/import/import_unstructured.go at line 122
    [4.3630][4.3630:3689]()
    _, isDuplicate, err := utils.CheckDuplicateHash(tx, hash)
    [4.3630]
    [4.3689]
    _, isDuplicate, err := CheckDuplicateHash(tx, hash)
  • replacement in tools/import/import_unstructured.go at line 143
    [4.4248][4.4248:4268]()
    _, err = tx.Exec(`
    [4.4248]
    [4.4268]
    _, err = tx.ExecContext(context.Background(), `
  • replacement in tools/import/import_unstructured.go at line 163
    [4.4745][4.4745:4764]()
    _, err = tx.Exec(
    [4.4745]
    [4.4764]
    _, err = tx.ExecContext(context.Background(),
  • replacement in tools/import/import_unstructured.go at line 207
    [4.5933][4.5933:6024]()
    func scanWavFiles(folderPath string, recursive bool) ([]string, []utils.FileImportError) {
    [4.5933]
    [4.6024]
    func scanWavFiles(folderPath string, recursive bool) ([]string, []FileImportError) {
  • replacement in tools/import/import_unstructured.go at line 209
    [4.6044][4.6044:6080]()
    var errors []utils.FileImportError
    [4.6044]
    [4.6080]
    var errors []FileImportError
  • replacement in tools/import/import_unstructured.go at line 213
    [4.6164][4.6164:6214]()
    errors = append(errors, utils.FileImportError{
    [4.6164]
    [4.6214]
    errors = append(errors, FileImportError{
  • replacement in tools/import/import_unstructured.go at line 216
    [4.6261][4.6261:6292]()
    Stage: utils.StageScan,
    [4.6261]
    [4.6292]
    Stage: StageScan,
  • replacement in tools/import/import_unstructured.go at line 239
    [4.6703][4.6703:6753]()
    errors = append(errors, utils.FileImportError{
    [4.6703]
    [4.6753]
    errors = append(errors, FileImportError{
  • replacement in tools/import/import_unstructured.go at line 242
    [4.6806][4.6806:6837]()
    Stage: utils.StageScan,
    [4.6806]
    [4.6837]
    Stage: StageScan,
  • replacement in tools/import/import_unstructured.go at line 249
    [4.6956][4.6956:7006]()
    errors = append(errors, utils.FileImportError{
    [4.6956]
    [4.7006]
    errors = append(errors, FileImportError{
  • replacement in tools/import/import_unstructured.go at line 252
    [4.7059][4.7059:7090]()
    Stage: utils.StageScan,
    [4.7059]
    [4.7090]
    Stage: StageScan,
  • replacement in tools/import/import_segments_validation_test.go at line 136
    [4.4562][4.4562:4593]()
    mapping := utils.MappingFile{
    [4.4562]
    [4.4593]
    mapping := MappingFile{
  • replacement in tools/import/import_segments.go at line 68
    [4.12100][4.12100:12237]()
    File string `json:"file,omitempty"`
    Stage utils.ImportStage `json:"stage"`
    Message string `json:"message"`
    [4.12100]
    [4.12237]
    File string `json:"file,omitempty"`
    Stage ImportStage `json:"stage"`
    Message string `json:"message"`
  • replacement in tools/import/import_segments.go at line 96
    [4.12927][4.12927:12955]()
    mapping utils.MappingFile,
    [4.12927]
    [4.12955]
    mapping MappingFile,
  • replacement in tools/import/import_segments.go at line 117
    [4.13734][4.13734:13842]()
    validationResult, err := utils.ValidateMappingAgainstDB(database, mapping, uniqueSpecies, uniqueCalltypes)
    [4.13734]
    [4.13842]
    validationResult, err := ValidateMappingAgainstDB(database, mapping, uniqueSpecies, uniqueCalltypes)
  • replacement in tools/import/import_segments.go at line 157
    [4.15220][4.15220:15274]()
    mapping, err := utils.LoadMappingFile(input.Mapping)
    [4.15220]
    [4.15274]
    mapping, err := LoadMappingFile(input.Mapping)
  • replacement in tools/import/import_segments.go at line 278
    [4.19391][4.19391:19427]()
    Stage: utils.StageValidation,
    [4.19391]
    [4.19427]
    Stage: StageValidation,
  • replacement in tools/import/import_segments.go at line 289
    [4.19708][4.19708:19744]()
    Stage: utils.StageValidation,
    [4.19708]
    [4.19744]
    Stage: StageValidation,
  • replacement in tools/import/import_segments.go at line 339
    [4.21064][4.17287:17325]()
    rows, err := q.Query(query, args...)
    [4.21064]
    [4.21107]
    rows, err := q.QueryContext(context.Background(), query, args...)
  • replacement in tools/import/import_segments.go at line 370
    [4.17341][4.21762:21790](),[4.21762][4.21762:21790]()
    mapping utils.MappingFile,
    [4.17341]
    [4.21790]
    mapping MappingFile,
  • replacement in tools/import/import_segments.go at line 388
    [4.5672][4.5672:5793]()
    func loadSpeciesIDs(q db.Querier, mapping utils.MappingFile, uniqueSpecies map[string]bool) (map[string]string, error) {
    [4.5672]
    [4.21928]
    func loadSpeciesIDs(q db.Querier, mapping MappingFile, uniqueSpecies map[string]bool) (map[string]string, error) {
  • replacement in tools/import/import_segments.go at line 413
    [4.22703][4.6202:6240]()
    rows, err := q.Query(query, args...)
    [4.22703]
    [4.6240]
    rows, err := q.QueryContext(context.Background(), query, args...)
  • replacement in tools/import/import_segments.go at line 430
    [4.6580][4.6580:6726]()
    func loadCalltypeIDs(q db.Querier, mapping utils.MappingFile, uniqueCalltypes map[string]map[string]bool) (map[string]map[string]string, error) {
    [4.6580]
    [4.6726]
    func loadCalltypeIDs(q db.Querier, mapping MappingFile, uniqueCalltypes map[string]map[string]bool) (map[string]map[string]string, error) {
  • replacement in tools/import/import_segments.go at line 447
    [4.23398][4.17382:17405]()
    err := q.QueryRow(`
    [4.23398]
    [4.23426]
    err := q.QueryRowContext(context.Background(), `
  • replacement in tools/import/import_segments.go at line 479
    [4.24305][4.24305:24335]()
    Stage: utils.StageHash,
    [4.24305]
    [4.24335]
    Stage: StageHash,
  • replacement in tools/import/import_segments.go at line 489
    [4.24516][4.17422:17443]()
    err = q.QueryRow(`
    [4.24516]
    [4.24542]
    err = q.QueryRowContext(context.Background(), `
  • replacement in tools/import/import_segments.go at line 496
    [4.24796][4.24796:24832]()
    Stage: utils.StageValidation,
    [4.24796]
    [4.24832]
    Stage: StageValidation,
  • replacement in tools/import/import_segments.go at line 504
    [4.25049][4.25049:25085]()
    Stage: utils.StageValidation,
    [4.25049]
    [4.25085]
    Stage: StageValidation,
  • replacement in tools/import/import_segments.go at line 515
    [4.25330][4.17444:17465]()
    err = q.QueryRow(`
    [4.25330]
    [4.25356]
    err = q.QueryRowContext(context.Background(), `
  • replacement in tools/import/import_segments.go at line 521
    [4.25594][4.25594:25630]()
    Stage: utils.StageValidation,
    [4.25594]
    [4.25630]
    Stage: StageValidation,
  • replacement in tools/import/import_segments.go at line 529
    [4.25840][4.25840:25876]()
    Stage: utils.StageValidation,
    [4.25840]
    [4.25876]
    Stage: StageValidation,
  • replacement in tools/import/import_segments.go at line 537
    [4.26059][4.17466:17487]()
    err = q.QueryRow(`
    [4.26059]
    [4.26085]
    err = q.QueryRowContext(context.Background(), `
  • replacement in tools/import/import_segments.go at line 546
    [4.26338][4.26338:26374]()
    Stage: utils.StageValidation,
    [4.26338]
    [4.26374]
    Stage: StageValidation,
  • replacement in tools/import/import_segments.go at line 555
    [4.26576][4.26576:26612]()
    Stage: utils.StageValidation,
    [4.26576]
    [4.26612]
    Stage: StageValidation,
  • replacement in tools/import/import_segments.go at line 591
    [4.27463][4.27463:27491]()
    mapping utils.MappingFile,
    [4.27463]
    [4.27491]
    mapping MappingFile,
  • replacement in tools/import/import_segments.go at line 599
    [4.27739][4.27739:27802]()
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    [4.27739]
    [4.27802]
    File: filepath.Base(sf.DataPath), Stage: StageImport,
  • replacement in tools/import/import_segments.go at line 607
    [4.28007][4.28007:28070]()
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    [4.28007]
    [4.28070]
    File: filepath.Base(sf.DataPath), Stage: StageImport,
  • replacement in tools/import/import_segments.go at line 615
    [4.28264][4.28264:28327]()
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    [4.28264]
    [4.28327]
    File: filepath.Base(sf.DataPath), Stage: StageImport,
  • replacement in tools/import/import_segments.go at line 623
    [4.28527][4.28527:28590]()
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    [4.28527]
    [4.28590]
    File: filepath.Base(sf.DataPath), Stage: StageImport,
  • replacement in tools/import/import_segments.go at line 634
    [4.28994][4.28994:29057]()
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    [4.28994]
    [4.29057]
    File: filepath.Base(sf.DataPath), Stage: StageImport,
  • replacement in tools/import/import_segments.go at line 648
    [4.29595][4.29595:29659]()
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    [4.29595]
    [4.29659]
    File: filepath.Base(sf.DataPath), Stage: StageImport,
  • replacement in tools/import/import_segments.go at line 684
    [4.30658][4.30658:30686]()
    mapping utils.MappingFile,
    [4.30658]
    [4.30686]
    mapping MappingFile,
  • replacement in tools/import/import_segments.go at line 696
    [4.31009][4.31009:31072]()
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    [4.31009]
    [4.31072]
    File: filepath.Base(sf.DataPath), Stage: StageImport,
  • replacement in tools/import/import_segments.go at line 704
    [4.31249][4.31249:31312]()
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    [4.31249]
    [4.31312]
    File: filepath.Base(sf.DataPath), Stage: StageImport,
  • replacement in tools/import/import_segments.go at line 715
    [4.31693][4.31693:31756]()
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    [4.31693]
    [4.31756]
    File: filepath.Base(sf.DataPath), Stage: StageImport,
  • replacement in tools/import/import_segments.go at line 728
    [4.32044][4.32044:32072]()
    mapping utils.MappingFile,
    [4.32044]
    [4.32072]
    mapping MappingFile,
  • replacement in tools/import/import_segments.go at line 744
    [4.32610][4.32610:32641]()
    Stage: utils.StageImport,
    [4.32610]
    [4.32641]
    Stage: StageImport,
  • replacement in tools/import/import_segments.go at line 779
    [4.33648][4.33648:33714]()
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    [4.33648]
    [4.33714]
    File: filepath.Base(sf.DataPath), Stage: StageImport,
  • replacement in tools/import/import_segments.go at line 795
    [4.34094][4.34094:34125]()
    Stage: utils.StageImport,
    [4.34094]
    [4.34125]
    Stage: StageImport,
  • replacement in tools/import/import_segments.go at line 812
    [4.34519][4.34519:34547]()
    mapping utils.MappingFile,
    [4.34519]
    [4.34547]
    mapping MappingFile,
  • replacement in tools/import/import_segments.go at line 821
    [4.34835][4.34835:34898]()
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    [4.34835]
    [4.34898]
    File: filepath.Base(sf.DataPath), Stage: StageImport,
  • replacement in tools/import/import_segments.go at line 829
    [4.35129][4.35129:35192]()
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    [4.35129]
    [4.35192]
    File: filepath.Base(sf.DataPath), Stage: StageImport,
  • replacement in tools/import/import_segments.go at line 838
    [4.35454][4.35454:35517]()
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    [4.35454]
    [4.35517]
    File: filepath.Base(sf.DataPath), Stage: StageImport,
  • replacement in tools/import/import_segments.go at line 850
    [4.35998][4.35998:36061]()
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    [4.35998]
    [4.36061]
    File: filepath.Base(sf.DataPath), Stage: StageImport,
  • replacement in tools/import/import_segments.go at line 901
    [4.37568][4.37568:37600]()
    Stage: utils.StageImport,
    [4.37568]
    [4.37600]
    Stage: StageImport,
  • replacement in tools/import/import_segments.go at line 935
    [4.38460][4.38460:38492]()
    Stage: utils.StageImport,
    [4.38460]
    [4.38492]
    Stage: StageImport,
  • replacement in tools/import/import_files.go at line 26
    [4.39350][4.39350:39510]()
    Summary ImportSummary `json:"summary"`
    FileIDs []string `json:"file_ids"`
    Errors []utils.FileImportError `json:"errors,omitempty"`
    [4.39350]
    [4.39510]
    Summary ImportSummary `json:"summary"`
    FileIDs []string `json:"file_ids"`
    Errors []FileImportError `json:"errors,omitempty"`
  • replacement in tools/import/import_files.go at line 75
    [4.3105][4.3105:3175]()
    err = utils.EnsureClusterPath(tx, input.ClusterID, input.FolderPath)
    [4.3105]
    [4.41071]
    err = EnsureClusterPath(tx, input.ClusterID, input.FolderPath)
  • replacement in tools/import/import_files.go at line 81
    [4.41160][4.3260:3343]()
    clusterOutput, err := utils.ImportCluster(database, tx, utils.ClusterImportInput{
    [4.41160]
    [4.41258]
    clusterOutput, err := ImportCluster(database, tx, ClusterImportInput{
  • replacement in tools/import/import_file.go at line 67
    [4.45782][4.45782:45849]()
    locData, err := utils.GetLocationData(database, input.LocationID)
    [4.45782]
    [4.45849]
    locData, err := GetLocationData(database, input.LocationID)
  • replacement in tools/import/import_file.go at line 151
    [4.3593][4.3593:3693]()
    if err := utils.EnsureClusterPath(tx, input.ClusterID, filepath.Dir(input.FilePath)); err != nil {
    [4.3593]
    [4.3693]
    if err := EnsureClusterPath(tx, input.ClusterID, filepath.Dir(input.FilePath)); err != nil {
  • replacement in tools/import/import_file.go at line 156
    [4.48634][4.48634:48703]()
    existingID, isDup, err := utils.CheckDuplicateHash(tx, result.Hash)
    [4.48634]
    [4.48703]
    existingID, isDup, err := CheckDuplicateHash(tx, result.Hash)
  • file addition: file_import.go (----------)
    [4.1]
    package imp
    import (
    "context"
    "database/sql"
    "fmt"
    )
    // ImportStage identifies the pipeline stage where an error occurred.
    type ImportStage string
    const (
    StageScan ImportStage = "scan" // directory scanning
    StageHash ImportStage = "hash" // hash computation
    StageParse ImportStage = "parse" // WAV header / filename parsing
    StageProcess ImportStage = "process" // file processing
    StageValidation ImportStage = "validation" // validation checks
    StageInsert ImportStage = "insert" // database insertion
    StageImport ImportStage = "import" // database import (segment pipeline)
    )
    // FileImportError records errors encountered during file processing
    type FileImportError struct {
    FileName string `json:"file_name"`
    Error string `json:"error"`
    Stage ImportStage `json:"stage"`
    }
    // CheckDuplicateHash checks if a file with the given XXH64 hash already exists.
    // Returns the existing file ID if found, or empty string if no duplicate.
    // Works with both *sql.DB, *sql.Tx, and *db.LoggedTx.
    func CheckDuplicateHash(q interface {
    QueryRowContext(ctx context.Context, query string, args ...any) *sql.Row
    }, hash string) (existingID string, isDuplicate bool, err error) {
    err = q.QueryRowContext(context.Background(),
    "SELECT id FROM file WHERE xxh64_hash = ? AND active = true",
    hash,
    ).Scan(&existingID)
    if err == nil {
    return existingID, true, nil
    }
    if err == sql.ErrNoRows {
    return "", false, nil
    }
    return "", false, fmt.Errorf("duplicate check failed: %w", err)
    }
  • file addition: cluster_import.go (----------)
    [4.1]
    package imp
    import (
    "context"
    "database/sql"
    "fmt"
    "os"
    "path/filepath"
    "time"
    "skraak/utils"
    )
    // Mutator represents a transaction-like object that supports both reads and writes.
    // Both *sql.Tx and *db.LoggedTx satisfy this interface.
    // Uses Context variants exclusively so all DB-facing interfaces compose as
    // compatible subsets of *sql.DB / *sql.Tx.
    type Mutator interface {
    ExecContext(ctx context.Context, query string, args ...any) (sql.Result, error)
    QueryRowContext(ctx context.Context, query string, args ...any) *sql.Row
    }
    // Reader is a read-only interface for database queries.
    // Both *sql.DB and *db.LoggedTx satisfy this interface.
    type Reader interface {
    QueryRowContext(ctx context.Context, query string, args ...any) *sql.Row
    }
    // ClusterImportInput defines parameters for importing one cluster
    type ClusterImportInput struct {
    FolderPath string // Absolute path to folder with WAV files
    DatasetID string // 12-char dataset ID
    LocationID string // 12-char location ID
    ClusterID string // 12-char cluster ID
    Recursive bool // Scan subfolders?
    }
    // ClusterImportOutput provides results and statistics
    type ClusterImportOutput struct {
    TotalFiles int
    ImportedFiles int
    SkippedFiles int // Duplicates
    FailedFiles int
    AudioMothFiles int
    TotalDuration float64
    ProcessingTime string
    Errors []FileImportError
    }
    // LocationData holds location information needed for processing
    type LocationData struct {
    Latitude float64
    Longitude float64
    TimezoneID string
    }
    // ImportCluster imports all WAV files from a folder into a cluster.
    // The caller must provide an open transaction via tx; this function does NOT
    // commit or rollback — the caller owns the transaction lifecycle.
    //
    // This is the canonical cluster import logic used by both:
    // - import_files.go (single cluster)
    // - bulk_file_import.go (multiple clusters)
    //
    // Steps:
    // 1. Validate folder exists
    // 2. Get location metadata (lat/lon/timezone) from database
    // 3. Scan folder for WAV files (recursive or not)
    // 4. Batch process all files:
    // - Parse WAV headers (includes file mod time)
    // - Batch parse filename timestamps (variance-based)
    // - Resolve timestamps (AudioMoth → filename → file mod time)
    // - Calculate hashes
    // - Calculate astronomical data
    // 5. Batch insert using the provided transaction:
    // - Check duplicates
    // - INSERT INTO file
    // - INSERT INTO file_dataset (ALWAYS)
    // - INSERT INTO moth_metadata (if AudioMoth)
    // 6. Return summary statistics
    func ImportCluster(
    database Reader,
    tx Mutator,
    input ClusterImportInput,
    ) (*ClusterImportOutput, error) {
    startTime := time.Now()
    // Validate folder exists
    info, err := os.Stat(input.FolderPath)
    if err != nil {
    return nil, fmt.Errorf("folder not accessible: %w", err)
    }
    if !info.IsDir() {
    return nil, fmt.Errorf("path is not a directory: %s", input.FolderPath)
    }
    // Get location data for astronomical calculations
    locationData, err := GetLocationData(database, input.LocationID)
    if err != nil {
    return nil, fmt.Errorf("failed to get location data: %w", err)
    }
    // Scan folder for WAV files
    wavFiles, err := utils.FindFiles(input.FolderPath, utils.FindFilesOptions{
    Extension: ".wav",
    Recursive: input.Recursive,
    SkipPrefixes: []string{"Clips_"},
    SkipHidden: true, // Standard to ignore hidden
    MinSize: 1, // Must have size > 0
    })
    if err != nil {
    return nil, fmt.Errorf("failed to scan folder: %w", err)
    }
    // If no files, return early
    if len(wavFiles) == 0 {
    return &ClusterImportOutput{
    TotalFiles: 0,
    ProcessingTime: time.Since(startTime).String(),
    Errors: []FileImportError{},
    }, nil
    }
    // Batch process all files
    filesData, processErrors := batchProcessFiles(wavFiles, locationData)
    // Batch insert into database using the provided transaction
    imported, skipped, insertErrors, err := insertClusterFiles(
    tx,
    filesData,
    input.DatasetID,
    input.ClusterID,
    input.LocationID,
    )
    if err != nil {
    return nil, fmt.Errorf("database insertion failed: %w", err)
    }
    // Combine all errors
    allErrors := append(processErrors, insertErrors...)
    // Calculate summary statistics
    audiomothCount := 0
    totalDuration := 0.0
    for _, fd := range filesData {
    if fd.IsAudioMoth {
    audiomothCount++
    }
    totalDuration += fd.Duration
    }
    return &ClusterImportOutput{
    TotalFiles: len(wavFiles),
    ImportedFiles: imported,
    SkippedFiles: skipped,
    FailedFiles: len(allErrors),
    AudioMothFiles: audiomothCount,
    TotalDuration: totalDuration,
    ProcessingTime: time.Since(startTime).String(),
    Errors: allErrors,
    }, nil
    }
    // GetLocationData retrieves location coordinates and timezone
    func GetLocationData(database Reader, locationID string) (*LocationData, error) {
    ctx := context.Background()
    var loc LocationData
    err := database.QueryRowContext(ctx,
    "SELECT latitude, longitude, timezone_id FROM location WHERE id = ?",
    locationID,
    ).Scan(&loc.Latitude, &loc.Longitude, &loc.TimezoneID)
    if err != nil {
    return nil, fmt.Errorf("failed to query location data: %w", err)
    }
    return &loc, nil
    }
    // EnsureClusterPath sets the cluster's path field if it's currently empty.
    // Accepts any type with QueryRowContext and ExecContext (e.g. *sql.DB, *sql.Tx, *db.LoggedTx).
    func EnsureClusterPath(database Mutator, clusterID, folderPath string) error {
    ctx := context.Background()
    // Check if cluster already has a path
    var currentPath sql.NullString
    err := database.QueryRowContext(ctx, "SELECT path FROM cluster WHERE id = ?", clusterID).Scan(&currentPath)
    if err != nil {
    return fmt.Errorf("failed to query cluster: %w", err)
    }
    // If path is already set, skip
    if currentPath.Valid && currentPath.String != "" {
    return nil
    }
    // Normalize folder path
    normalizedPath := utils.NormalizeFolderPath(folderPath)
    // Update cluster with normalized path
    _, err = database.ExecContext(ctx,
    "UPDATE cluster SET path = ?, last_modified = now() WHERE id = ?",
    normalizedPath,
    clusterID,
    )
    if err != nil {
    return fmt.Errorf("failed to update cluster path: %w", err)
    }
    return nil
    }
    // wavInfo holds WAV metadata and hash for a single file during batch processing
    type wavInfo struct {
    path string
    metadata *utils.WAVMetadata
    hash string
    err error
    }
    // parseFilenameTimestampsBatch parses filename timestamps and applies timezone offsets.
    // Returns a map from wavInfos index to adjusted timestamp, and any errors.
    func parseFilenameTimestampsBatch(
    wavInfos []wavInfo,
    filenameIndices []int,
    filenames []string,
    timezoneID string,
    ) (map[int]time.Time, []FileImportError) {
    var errors []FileImportError
    result := make(map[int]time.Time)
    filenameTimestamps, err := utils.ParseFilenameTimestamps(filenames)
    if err != nil {
    for _, idx := range filenameIndices {
    errors = append(errors, FileImportError{
    FileName: filepath.Base(wavInfos[idx].path),
    Error: fmt.Sprintf("filename timestamp parsing failed: %v", err),
    Stage: StageParse,
    })
    }
    return result, errors
    }
    adjustedTimestamps, err := utils.ApplyTimezoneOffset(filenameTimestamps, timezoneID)
    if err != nil {
    for _, idx := range filenameIndices {
    errors = append(errors, FileImportError{
    FileName: filepath.Base(wavInfos[idx].path),
    Error: fmt.Sprintf("timezone offset failed: %v", err),
    Stage: StageParse,
    })
    }
    return result, errors
    }
    for j, idx := range filenameIndices {
    result[idx] = adjustedTimestamps[j]
    }
    return result, errors
    }
    // resolveFileData resolves timestamp and calculates astronomical data for a single WAV file.
    func resolveFileData(info wavInfo, preParsedTime *time.Time, location *LocationData) (*utils.FileProcessingResult, error) {
    tsResult, err := utils.ResolveTimestamp(info.metadata, info.path, location.TimezoneID, true, preParsedTime)
    if err != nil {
    return nil, err
    }
    astroData := utils.CalculateAstronomicalData(
    tsResult.Timestamp.UTC(),
    info.metadata.Duration,
    location.Latitude,
    location.Longitude,
    )
    return &utils.FileProcessingResult{
    FileName: filepath.Base(info.path),
    Hash: info.hash,
    Duration: info.metadata.Duration,
    SampleRate: info.metadata.SampleRate,
    TimestampLocal: tsResult.Timestamp,
    IsAudioMoth: tsResult.IsAudioMoth,
    MothData: tsResult.MothData,
    AstroData: astroData,
    }, nil
    }
    // batchProcessFiles extracts metadata and calculates hashes for all files
    func batchProcessFiles(wavFiles []string, location *LocationData) ([]*utils.FileProcessingResult, []FileImportError) {
    var filesData []*utils.FileProcessingResult
    var errors []FileImportError
    // Step 1: Extract WAV metadata and hash in single pass
    wavInfos := make([]wavInfo, len(wavFiles))
    for i, path := range wavFiles {
    metadata, hash, err := utils.ParseWAVHeaderWithHash(path)
    wavInfos[i] = wavInfo{path: path, metadata: metadata, hash: hash, err: err}
    }
    // Step 2: Collect filenames for batch timestamp parsing
    var filenamesForParsing []string
    var filenameIndices []int
    for i, info := range wavInfos {
    if info.err != nil {
    errors = append(errors, FileImportError{
    FileName: filepath.Base(info.path),
    Error: info.err.Error(),
    Stage: StageParse,
    })
    continue
    }
    if utils.HasTimestampFilename(info.path) {
    filenamesForParsing = append(filenamesForParsing, filepath.Base(info.path))
    filenameIndices = append(filenameIndices, i)
    }
    }
    // Step 3: Parse filename timestamps in batch (if any)
    filenameTimestampMap := make(map[int]time.Time)
    if len(filenamesForParsing) > 0 {
    tsMap, tsErrors := parseFilenameTimestampsBatch(wavInfos, filenameIndices, filenamesForParsing, location.TimezoneID)
    errors = append(errors, tsErrors...)
    filenameTimestampMap = tsMap
    }
    // Step 4: Process each file
    for i, info := range wavInfos {
    if info.err != nil {
    continue
    }
    var preParsedTime *time.Time
    if ts, ok := filenameTimestampMap[i]; ok {
    preParsedTime = &ts
    }
    fd, err := resolveFileData(info, preParsedTime, location)
    if err != nil {
    errors = append(errors, FileImportError{
    FileName: filepath.Base(info.path),
    Error: err.Error(),
    Stage: StageParse,
    })
    continue
    }
    filesData = append(filesData, fd)
    }
    return filesData, errors
    }
    // insertSingleFile inserts one file's data into the database within an existing transaction.
    // Returns (imported=true, nil) on success, (imported=false, nil) if skipped, or (false, error) on failure.
    func insertSingleFile(
    ctx context.Context,
    tx Mutator,
    fd *utils.FileProcessingResult,
    datasetID, clusterID, locationID string,
    ) (bool, error) {
    // Check for duplicate hash
    _, isDuplicate, err := CheckDuplicateHash(tx, fd.Hash)
    if err != nil {
    return false, fmt.Errorf("duplicate check failed: %w", err)
    }
    if isDuplicate {
    return false, nil // skipped
    }
    // Generate file ID
    fileID, err := utils.GenerateLongID()
    if err != nil {
    return false, fmt.Errorf("ID generation failed: %w", err)
    }
    // Insert file record
    _, err = tx.ExecContext(ctx, `
    INSERT INTO file (
    id, file_name, xxh64_hash, location_id, timestamp_local,
    cluster_id, duration, sample_rate, maybe_solar_night, maybe_civil_night,
    moon_phase, created_at, last_modified, active
    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, now(), now(), true)
    `,
    fileID, fd.FileName, fd.Hash, locationID,
    fd.TimestampLocal, clusterID, fd.Duration, fd.SampleRate,
    fd.AstroData.SolarNight, fd.AstroData.CivilNight, fd.AstroData.MoonPhase,
    )
    if err != nil {
    return false, fmt.Errorf("file insert failed: %w", err)
    }
    // Insert file_dataset junction (ALWAYS)
    _, err = tx.ExecContext(ctx, `
    INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified)
    VALUES (?, ?, now(), now())
    `, fileID, datasetID)
    if err != nil {
    return false, fmt.Errorf("file_dataset insert failed: %w", err)
    }
    // If AudioMoth, insert moth_metadata
    if fd.IsAudioMoth && fd.MothData != nil {
    _, err = tx.ExecContext(ctx, `
    INSERT INTO moth_metadata (
    file_id, timestamp, recorder_id, gain, battery_v, temp_c,
    created_at, last_modified, active
    ) VALUES (?, ?, ?, ?, ?, ?, now(), now(), true)
    `,
    fileID,
    fd.MothData.Timestamp,
    &fd.MothData.RecorderID,
    &fd.MothData.Gain,
    &fd.MothData.BatteryV,
    &fd.MothData.TempC,
    )
    if err != nil {
    return false, fmt.Errorf("moth_metadata insert failed: %w", err)
    }
    }
    return true, nil
    }
    // insertClusterFiles inserts all file data into database using the provided transaction.
    // The caller is responsible for committing or rolling back the transaction.
    func insertClusterFiles(
    tx Mutator,
    filesData []*utils.FileProcessingResult,
    datasetID, clusterID, locationID string,
    ) (imported, skipped int, errors []FileImportError, err error) {
    ctx := context.Background()
    for _, fd := range filesData {
    wasImported, insertErr := insertSingleFile(ctx, tx, fd, datasetID, clusterID, locationID)
    if insertErr != nil {
    errors = append(errors, FileImportError{
    FileName: fd.FileName,
    Error: insertErr.Error(),
    Stage: StageInsert,
    })
    continue
    }
    if wasImported {
    imported++
    } else {
    skipped++
    }
    }
    return imported, skipped, errors, nil
    }
  • file addition: check_duplicate_hash_test.go (----------)
    [4.1]
    package imp
    import (
    "database/sql"
    "testing"
    _ "github.com/duckdb/duckdb-go/v2"
    )
    func TestCheckDuplicateHash_NoRows(t *testing.T) {
    db := openTestDB(t)
    defer db.Close()
    // No rows exist — should return not-duplicate
    id, dup, err := CheckDuplicateHash(db, "abcdef0123456789")
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if dup {
    t.Error("expected isDuplicate=false when no rows")
    }
    if id != "" {
    t.Errorf("expected empty id, got %q", id)
    }
    }
    func TestCheckDuplicateHash_FoundDuplicate(t *testing.T) {
    db := openTestDB(t)
    defer db.Close()
    // Insert a file with known hash
    hash := "deadbeef12345678"
    fileID := "test_file_id_123"
    _, err := db.Exec(`INSERT INTO file (id, path, dataset_id, xxh64_hash, active)
    VALUES (?, '/test/file.wav', 'ds1', ?, true)`, fileID, hash)
    if err != nil {
    t.Fatalf("insert: %v", err)
    }
    id, dup, err := CheckDuplicateHash(db, hash)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if !dup {
    t.Error("expected isDuplicate=true")
    }
    if id != fileID {
    t.Errorf("expected id=%q, got %q", fileID, id)
    }
    }
    func TestCheckDuplicateHash_InactiveNotDuplicate(t *testing.T) {
    db := openTestDB(t)
    defer db.Close()
    // Insert an INACTIVE file with known hash
    hash := "cafebeef12345678"
    fileID := "inactive_file_id"
    _, err := db.Exec(`INSERT INTO file (id, path, dataset_id, xxh64_hash, active)
    VALUES (?, '/test/old.wav', 'ds1', ?, false)`, fileID, hash)
    if err != nil {
    t.Fatalf("insert: %v", err)
    }
    // Inactive files should NOT be considered duplicates
    id, dup, err := CheckDuplicateHash(db, hash)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if dup {
    t.Error("expected isDuplicate=false for inactive file")
    }
    if id != "" {
    t.Errorf("expected empty id, got %q", id)
    }
    }
    func TestCheckDuplicateHash_DifferentHashNoDuplicate(t *testing.T) {
    db := openTestDB(t)
    defer db.Close()
    // Insert file with hash A
    _, err := db.Exec(`INSERT INTO file (id, path, dataset_id, xxh64_hash, active)
    VALUES ('id1', '/test/a.wav', 'ds1', 'hash_aaaa', true)`)
    if err != nil {
    t.Fatalf("insert: %v", err)
    }
    // Query for hash B — no duplicate
    id, dup, err := CheckDuplicateHash(db, "hash_bbbb")
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if dup {
    t.Error("expected isDuplicate=false for different hash")
    }
    if id != "" {
    t.Errorf("expected empty id, got %q", id)
    }
    }
    // openTestDB creates a DuckDB in-memory database with the minimal schema
    // needed for the file table.
    func openTestDB(t *testing.T) *sql.DB {
    t.Helper()
    db, err := sql.Open("duckdb", "")
    if err != nil {
    t.Fatalf("open duckdb: %v", err)
    }
    _, err = db.Exec(`
    CREATE TABLE file (
    id VARCHAR PRIMARY KEY,
    path VARCHAR,
    dataset_id VARCHAR,
    xxh64_hash VARCHAR,
    active BOOLEAN DEFAULT true
    )
    `)
    if err != nil {
    db.Close()
    t.Fatalf("create table: %v", err)
    }
    return db
    }
  • replacement in tools/import/bulk_file_import.go at line 267
    [4.59614][4.59614:59643]()
    err := database.QueryRow(`
    [4.59614]
    [4.59643]
    err := database.QueryRowContext(context.Background(), `
  • replacement in tools/import/bulk_file_import.go at line 431
    [4.64623][4.64623:64722]()
    err = database.QueryRow("SELECT name FROM location WHERE id = ?", locationID).Scan(&locationName)
    [4.64623]
    [4.64722]
    err = database.QueryRowContext(context.Background(), "SELECT name FROM location WHERE id = ?", locationID).Scan(&locationName)
  • replacement in tools/import/bulk_file_import.go at line 481
    [4.4924][4.4924:4982]()
    err = utils.EnsureClusterPath(tx, clusterID, folderPath)
    [4.4924]
    [4.4982]
    err = EnsureClusterPath(tx, clusterID, folderPath)
  • replacement in tools/import/bulk_file_import.go at line 487
    [4.5083][4.5083:5166]()
    clusterOutput, err := utils.ImportCluster(database, tx, utils.ClusterImportInput{
    [4.5083]
    [4.66412]
    clusterOutput, err := ImportCluster(database, tx, ClusterImportInput{
  • replacement in tools/dataset.go at line 245
    [4.18936][4.18936:18958]()
    err := tx.QueryRow(
    [4.18936]
    [4.18958]
    err := tx.QueryRowContext(context.Background(),
  • edit in tools/calls/calls_clip_labels.go at line 14
    [4.233071]
    [4.233071]
    "skraak/mapping"
  • replacement in tools/calls/calls_clip_labels.go at line 51
    [4.234764][4.234764:234855]()
    kind utils.MappingKind
    classIdx int // valid only when kind == utils.MappingReal
    [4.234764]
    [4.234855]
    kind mapping.Kind
    classIdx int // valid only when kind == mapping.Real
  • replacement in tools/calls/calls_clip_labels.go at line 136
    [4.11431][4.236960:237068](),[4.236960][4.236960:237068]()
    func parseClipLabelsDataFiles(folder, filter string, mapping utils.MappingFile) ([]parsedClipFile, error) {
    [4.11431]
    [4.237068]
    func parseClipLabelsDataFiles(folder, filter string, mf mapping.File) ([]parsedClipFile, error) {
  • replacement in tools/calls/calls_clip_labels.go at line 158
    [4.237906][4.237906:237984]()
    if missing := mapping.ValidateCoversSpecies(speciesSeen); len(missing) > 0 {
    [4.237906]
    [4.237984]
    if missing := mf.ValidateCoversSpecies(speciesSeen); len(missing) > 0 {
  • replacement in tools/calls/calls_clip_labels.go at line 182
    [4.11678][4.11678:11712]()
    mapping utils.MappingFile
    [4.11678]
    [4.11712]
    mf mapping.File
  • replacement in tools/calls/calls_clip_labels.go at line 201
    [4.239014][4.239014:239072]()
    mapping, err := utils.LoadMappingFile(input.MappingPath)
    [4.239014]
    [4.239072]
    mf, err := mapping.Load(input.MappingPath)
  • replacement in tools/calls/calls_clip_labels.go at line 206
    [4.239165][4.239165:239195]()
    classes := mapping.Classes()
    [4.239165]
    [4.239195]
    classes := mf.Classes()
  • replacement in tools/calls/calls_clip_labels.go at line 217
    [4.239429][4.239429:239507]()
    parsed, err := parseClipLabelsDataFiles(input.Folder, input.Filter, mapping)
    [4.239429]
    [4.239507]
    parsed, err := parseClipLabelsDataFiles(input.Folder, input.Filter, mf)
  • replacement in tools/calls/calls_clip_labels.go at line 241
    [4.12499][4.12499:12526]()
    mapping: mapping,
    [4.12499]
    [4.12526]
    mf: mf,
  • replacement in tools/calls/calls_clip_labels.go at line 268
    [4.13142][4.13142:13295]()
    fileRows, err := processClipLabelsFile(pf.path, pf.df, ctx.mapping, ctx.classIdx, ctx.classes, input, ctx.finalClipMode, ctx.cwd, ctx.folderAbs, &out)
    [4.13142]
    [4.240265]
    fileRows, err := processClipLabelsFile(pf.path, pf.df, ctx.mf, ctx.classIdx, ctx.classes, input, ctx.finalClipMode, ctx.cwd, ctx.folderAbs, &out)
  • replacement in tools/calls/calls_clip_labels.go at line 292
    [4.240753][4.240753:240781]()
    mapping utils.MappingFile,
    [4.240753]
    [4.240781]
    mf mapping.File,
  • replacement in tools/calls/calls_clip_labels.go at line 314
    [4.241242][4.241242:241341]()
    segs := resolveSegments(df.Segments, input.Filter, input.MinLabelOverlap, mapping, classIdx, out)
    [4.241242]
    [4.241341]
    segs := resolveSegments(df.Segments, input.Filter, input.MinLabelOverlap, mf, classIdx, out)
  • replacement in tools/calls/calls_clip_labels.go at line 326
    [4.13627][4.13627:13778]()
    func resolveLabel(lbl *utils.Label, seg *utils.Segment, filter string, mapping utils.MappingFile, classIdx map[string]int) (resolvedSeg, bool, bool) {
    [4.13627]
    [4.13778]
    func resolveLabel(lbl *utils.Label, seg *utils.Segment, filter string, mf mapping.File, classIdx map[string]int) (resolvedSeg, bool, bool) {
  • replacement in tools/calls/calls_clip_labels.go at line 330
    [4.13861][4.13861:13911]()
    canon, kind, ok := mapping.Classify(lbl.Species)
    [4.13861]
    [4.13911]
    canon, kind, ok := mf.Classify(lbl.Species)
  • replacement in tools/calls/calls_clip_labels.go at line 335
    [4.13976][4.13976:14000]()
    case utils.MappingIgn:
    [4.13976]
    [4.14000]
    case mapping.Ign:
  • replacement in tools/calls/calls_clip_labels.go at line 337
    [4.14085][4.14085:14109]()
    case utils.MappingNeg:
    [4.14085]
    [4.14109]
    case mapping.Neg:
  • replacement in tools/calls/calls_clip_labels.go at line 339
    [4.14195][4.14195:14220]()
    case utils.MappingReal:
    [4.14195]
    [4.14220]
    case mapping.Real:
  • replacement in tools/calls/calls_clip_labels.go at line 354
    [4.241701][4.241701:241729]()
    mapping utils.MappingFile,
    [4.241701]
    [4.241729]
    mf mapping.File,
  • replacement in tools/calls/calls_clip_labels.go at line 364
    [4.241983][4.14453:14527]()
    rs, isIgnored, ok := resolveLabel(lbl, seg, filter, mapping, classIdx)
    [4.241983]
    [4.242098]
    rs, isIgnored, ok := resolveLabel(lbl, seg, filter, mf, classIdx)
  • replacement in tools/calls/calls_clip_labels.go at line 440
    [4.244596][4.244596:244621]()
    case utils.MappingIgn:
    [4.244596]
    [4.244621]
    case mapping.Ign:
  • replacement in tools/calls/calls_clip_labels.go at line 442
    [4.244641][4.244641:244666]()
    case utils.MappingNeg:
    [4.244641]
    [4.244666]
    case mapping.Neg:
  • replacement in tools/calls/calls_clip_labels.go at line 444
    [4.244688][4.244688:244714]()
    case utils.MappingReal:
    [4.244688]
    [4.244714]
    case mapping.Real:
  • file addition: mapping (d--r------)
    [36.1]
  • file addition: mapping_test.go (----------)
    [0.34176]
    package mapping
    import (
    "os"
    "path/filepath"
    "testing"
    )
    func TestLoad(t *testing.T) {
    t.Run("valid mapping", func(t *testing.T) {
    content := `{
    "GSK": {"species": "Roroa", "calltypes": {"Male": "Male - Solo"}},
    "Don't Know": {"species": "Don't Know"}
    }`
    path := createTempFile(t, content)
    defer os.Remove(path)
    mapping, err := Load(path)
    if err != nil {
    t.Fatalf("expected no error, got: %v", err)
    }
    if len(mapping) != 2 {
    t.Errorf("expected 2 entries, got %d", len(mapping))
    }
    if mapping["GSK"].Species != "Roroa" {
    t.Errorf("expected GSK -> Roroa, got %s", mapping["GSK"].Species)
    }
    if mapping["GSK"].Calltypes["Male"] != "Male - Solo" {
    t.Errorf("expected GSK Male -> Male - Solo, got %s", mapping["GSK"].Calltypes["Male"])
    }
    })
    t.Run("invalid JSON", func(t *testing.T) {
    content := `{invalid json}`
    path := createTempFile(t, content)
    defer os.Remove(path)
    _, err := Load(path)
    if err == nil {
    t.Fatal("expected error for invalid JSON")
    }
    })
    t.Run("empty file", func(t *testing.T) {
    content := `{}`
    path := createTempFile(t, content)
    defer os.Remove(path)
    _, err := Load(path)
    if err == nil {
    t.Fatal("expected error for empty mapping")
    }
    })
    t.Run("missing species field", func(t *testing.T) {
    content := `{"GSK": {"calltypes": {"Male": "Male - Solo"}}}`
    path := createTempFile(t, content)
    defer os.Remove(path)
    _, err := Load(path)
    if err == nil {
    t.Fatal("expected error for missing species field")
    }
    })
    t.Run("empty species field", func(t *testing.T) {
    content := `{"GSK": {"species": ""}}`
    path := createTempFile(t, content)
    defer os.Remove(path)
    _, err := Load(path)
    if err == nil {
    t.Fatal("expected error for empty species field")
    }
    })
    t.Run("nonexistent file", func(t *testing.T) {
    _, err := Load("/nonexistent/path/mapping.json")
    if err == nil {
    t.Fatal("expected error for nonexistent file")
    }
    })
    }
    func TestGetDBSpecies(t *testing.T) {
    mapping := File{
    "GSK": {Species: "Roroa"},
    "K-M": {Species: "Kiwi"},
    }
    t.Run("found", func(t *testing.T) {
    species, ok := mapping.GetDBSpecies("GSK")
    if !ok {
    t.Fatal("expected to find GSK")
    }
    if species != "Roroa" {
    t.Errorf("expected Roroa, got %s", species)
    }
    })
    t.Run("not found", func(t *testing.T) {
    _, ok := mapping.GetDBSpecies("UNKNOWN")
    if ok {
    t.Fatal("expected not to find UNKNOWN")
    }
    })
    }
    func TestGetDBCalltype(t *testing.T) {
    mapping := File{
    "GSK": {
    Species: "Roroa",
    Calltypes: map[string]string{
    "Male": "Male - Solo",
    "Female": "Female - Solo",
    },
    },
    "K-M": {Species: "Kiwi"}, // no calltype mapping
    }
    t.Run("with mapping", func(t *testing.T) {
    ct := mapping.GetDBCalltype("GSK", "Male")
    if ct != "Male - Solo" {
    t.Errorf("expected 'Male - Solo', got %s", ct)
    }
    })
    t.Run("without mapping - passthrough", func(t *testing.T) {
    ct := mapping.GetDBCalltype("GSK", "Unknown")
    if ct != "Unknown" {
    t.Errorf("expected passthrough 'Unknown', got %s", ct)
    }
    })
    t.Run("species not in mapping - passthrough", func(t *testing.T) {
    ct := mapping.GetDBCalltype("UNKNOWN", "Male")
    if ct != "Male" {
    t.Errorf("expected passthrough 'Male', got %s", ct)
    }
    })
    t.Run("species without calltypes - passthrough", func(t *testing.T) {
    ct := mapping.GetDBCalltype("K-M", "Male")
    if ct != "Male" {
    t.Errorf("expected passthrough 'Male', got %s", ct)
    }
    })
    }
    func TestValidationResult(t *testing.T) {
    t.Run("HasErrors - no errors", func(t *testing.T) {
    r := ValidationResult{}
    if r.HasErrors() {
    t.Error("expected no errors")
    }
    })
    t.Run("HasErrors - missing species", func(t *testing.T) {
    r := ValidationResult{MissingSpecies: []string{"UNKNOWN"}}
    if !r.HasErrors() {
    t.Error("expected errors")
    }
    })
    t.Run("HasErrors - missing DB species", func(t *testing.T) {
    r := ValidationResult{MissingDBSpecies: []string{"Phantom"}}
    if !r.HasErrors() {
    t.Error("expected errors")
    }
    })
    t.Run("HasErrors - missing calltypes", func(t *testing.T) {
    r := ValidationResult{MissingCalltypes: map[string]string{"GSK/Male": "Roroa/Male - Solo"}}
    if !r.HasErrors() {
    t.Error("expected errors")
    }
    })
    t.Run("Error - all error types", func(t *testing.T) {
    r := ValidationResult{
    MissingSpecies: []string{"UNKNOWN"},
    MissingDBSpecies: []string{"Phantom"},
    MissingCalltypes: map[string]string{"GSK/Male": "Roroa/Male - Solo"},
    }
    errStr := r.Error()
    if errStr == "" {
    t.Error("expected non-empty error string")
    }
    // Check all parts are present
    if !containsSubstring(errStr, "UNKNOWN") {
    t.Error("error string should contain MISSING species")
    }
    if !containsSubstring(errStr, "Phantom") {
    t.Error("error string should contain missing DB species")
    }
    if !containsSubstring(errStr, "GSK/Male") {
    t.Error("error string should contain missing calltype")
    }
    })
    }
    // Helper functions
    func createTempFile(t *testing.T, content string) string {
    t.Helper()
    tmpDir := t.TempDir()
    path := filepath.Join(tmpDir, "mapping.json")
    if err := os.WriteFile(path, []byte(content), 0644); err != nil {
    t.Fatalf("failed to create temp file: %v", err)
    }
    return path
    }
    func containsSubstring(s, substr string) bool {
    return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsSubstringHelper(s, substr))
    }
    func containsSubstringHelper(s, substr string) bool {
    for i := 0; i <= len(s)-len(substr); i++ {
    if s[i:i+len(substr)] == substr {
    return true
    }
    }
    return false
    }
    func TestMappingClassify(t *testing.T) {
    m := File{
    "noise": {Species: Negative},
    "ignore": {Species: Ignore},
    "kiwi": {Species: "Kiwi"},
    }
    c, k, ok := m.Classify("noise")
    if !ok || k != Neg || c != "" {
    t.Error("failed classify negative")
    }
    c, k, ok = m.Classify("ignore")
    if !ok || k != Ign || c != "" {
    t.Error("failed classify ignore")
    }
    c, k, ok = m.Classify("kiwi")
    if !ok || k != Real || c != "Kiwi" {
    t.Error("failed classify real")
    }
    _, _, ok = m.Classify("missing")
    if ok {
    t.Error("expected missing to be not ok")
    }
    }
    func TestMappingValidateCoversSpecies(t *testing.T) {
    m := File{"kiwi": {Species: "Kiwi"}}
    missing := m.ValidateCoversSpecies(map[string]bool{"kiwi": true, "tui": true})
    if len(missing) != 1 || missing[0] != "tui" {
    t.Errorf("expected [tui], got %v", missing)
    }
    }
    func TestMappingClasses(t *testing.T) {
    m := File{
    "noise": {Species: Negative},
    "kiwi": {Species: "Kiwi"},
    "tui": {Species: "Tui"},
    "duplicate": {Species: "Kiwi"},
    }
    classes := m.Classes()
    if len(classes) != 2 || classes[0] != "Kiwi" || classes[1] != "Tui" {
    t.Errorf("expected [Kiwi, Tui], got %v", classes)
    }
    }
  • file addition: mapping.go (----------)
    [0.34176]
    // Package mapping provides types and utilities for translating .data file
    // species/calltype names to database labels via a mapping JSON file.
    //
    // This is a leaf package: no imports of skraak/db or skraak/tools.
    package mapping
    import (
    "encoding/json"
    "fmt"
    "os"
    "sort"
    "strings"
    )
    // SpeciesMapping maps .data species/calltype names to DB labels
    type SpeciesMapping struct {
    Species string `json:"species"`
    Calltypes map[string]string `json:"calltypes,omitempty"`
    }
    // File represents the complete mapping file structure.
    // Key is the .data file species name.
    type File map[string]SpeciesMapping
    // Load loads and parses a mapping JSON file
    func Load(path string) (File, error) {
    data, err := os.ReadFile(path)
    if err != nil {
    return nil, fmt.Errorf("failed to read mapping file: %w", err)
    }
    var m File
    if err := json.Unmarshal(data, &m); err != nil {
    return nil, fmt.Errorf("failed to parse mapping JSON: %w", err)
    }
    // Validate non-empty
    if len(m) == 0 {
    return nil, fmt.Errorf("mapping file is empty")
    }
    // Validate each entry has species
    for dataSpecies, sm := range m {
    if sm.Species == "" {
    return nil, fmt.Errorf("mapping entry '%s' has empty species field", dataSpecies)
    }
    }
    return m, nil
    }
    // Mapping sentinels: special values for the SpeciesMapping.Species field.
    //
    // Negative marks a .data species as "confirmed empty" (Noise-equivalent):
    // segments matching this name are treated as negative evidence — clips overlapping
    // them emit an all-zero row when no positive species also overlaps.
    //
    // Ignore marks a .data species as "ignored entirely": segments matching
    // this name neither label clips nor block them.
    const (
    Negative = "__NEGATIVE__"
    Ignore = "__IGNORE__"
    )
    // Kind describes how a .data species should be treated.
    type Kind int
    const (
    Real Kind = iota
    Neg
    Ign
    )
    // Classify returns the canonical class name and kind for a .data species.
    // ok is false if dataSpecies is not present in the mapping.
    // For Neg and Ign the canonical string is empty.
    func (m File) Classify(dataSpecies string) (canonical string, kind Kind, ok bool) {
    sm, exists := m[dataSpecies]
    if !exists {
    return "", Real, false
    }
    switch sm.Species {
    case Negative:
    return "", Neg, true
    case Ignore:
    return "", Ign, true
    default:
    return sm.Species, Real, true
    }
    }
    // ValidateCoversSpecies returns the sorted list of species in speciesSet that
    // are missing from the mapping. Empty result means full coverage.
    func (m File) ValidateCoversSpecies(speciesSet map[string]bool) []string {
    missing := make([]string, 0)
    for s := range speciesSet {
    if _, exists := m[s]; !exists {
    missing = append(missing, s)
    }
    }
    sort.Strings(missing)
    return missing
    }
    // Classes returns the sorted unique non-sentinel canonical class names from the mapping.
    // Used to build the CSV column header for clip-labels.
    func (m File) Classes() []string {
    set := make(map[string]bool)
    for _, sm := range m {
    switch sm.Species {
    case Negative, Ignore, "":
    continue
    default:
    set[sm.Species] = true
    }
    }
    out := make([]string, 0, len(set))
    for s := range set {
    out = append(out, s)
    }
    sort.Strings(out)
    return out
    }
    // GetDBSpecies returns the DB species label for a .data species
    func (m File) GetDBSpecies(dataSpecies string) (string, bool) {
    sm, exists := m[dataSpecies]
    if !exists {
    return "", false
    }
    return sm.Species, true
    }
    // GetDBCalltype returns the DB calltype label for a .data species/calltype.
    // Returns the dataCalltype unchanged if no mapping exists.
    func (m File) GetDBCalltype(dataSpecies, dataCalltype string) string {
    sm, exists := m[dataSpecies]
    if !exists || sm.Calltypes == nil {
    return dataCalltype
    }
    if dbCT, ok := sm.Calltypes[dataCalltype]; ok {
    return dbCT
    }
    return dataCalltype
    }
    // ValidationResult contains validation errors for a mapping
    type ValidationResult struct {
    MissingSpecies []string // .data species not in mapping
    MissingDBSpecies []string // mapped species not in DB
    MissingCalltypes map[string]string // "dataSpecies/dataCalltype" -> "dbSpecies/dbCalltype"
    }
    // HasErrors returns true if any validation errors exist
    func (r ValidationResult) HasErrors() bool {
    return len(r.MissingSpecies) > 0 ||
    len(r.MissingDBSpecies) > 0 ||
    len(r.MissingCalltypes) > 0
    }
    // Error returns a formatted error message
    func (r ValidationResult) Error() string {
    var parts []string
    if len(r.MissingSpecies) > 0 {
    parts = append(parts, fmt.Sprintf("species in .data but not in mapping: [%s]",
    strings.Join(r.MissingSpecies, ", ")))
    }
    if len(r.MissingDBSpecies) > 0 {
    parts = append(parts, fmt.Sprintf("mapped species not found in DB: [%s]",
    strings.Join(r.MissingDBSpecies, ", ")))
    }
    if len(r.MissingCalltypes) > 0 {
    var ctErrors []string
    for k, v := range r.MissingCalltypes {
    ctErrors = append(ctErrors, fmt.Sprintf("%s->%s", k, v))
    }
    sort.Strings(ctErrors)
    parts = append(parts, fmt.Sprintf("calltypes not found in DB: [%s]",
    strings.Join(ctErrors, ", ")))
    }
    return strings.Join(parts, "; ")
    }
  • edit in db/validation.go at line 10
    [4.25435]
    [4.25435]
    // Uses Context variants exclusively so all DB-facing interfaces compose as
    // compatible subsets of *sql.DB / *sql.Tx.
  • replacement in db/validation.go at line 13
    [4.25460][4.18795:18848](),[4.18848][4.25460:25506](),[4.25460][4.25460:25506]()
    Query(query string, args ...any) (*sql.Rows, error)
    QueryRow(query string, args ...any) *sql.Row
    [4.25460]
    [4.25506]
    QueryContext(ctx context.Context, query string, args ...any) (*sql.Rows, error)
  • replacement in db/validation.go at line 21
    [4.235][4.25657:25747]()
    err := q.QueryRow("SELECT type FROM dataset WHERE id = ?", datasetID).Scan(&datasetType)
    [4.235]
    [4.332]
    err := q.QueryRowContext(context.Background(), "SELECT type FROM dataset WHERE id = ?", datasetID).Scan(&datasetType)
  • replacement in db/validation.go at line 67
    [4.2010][4.26099:26221]()
    err := q.QueryRow("SELECT dataset_id FROM location WHERE id = ? AND active = true", locationID).Scan(&locationDatasetID)
    [4.2010]
    [4.2139]
    err := q.QueryRowContext(context.Background(), "SELECT dataset_id FROM location WHERE id = ? AND active = true", locationID).Scan(&locationDatasetID)
  • replacement in db/validation.go at line 84
    [4.26440][4.26440:26459]()
    err = q.QueryRow(
    [4.26440]
    [4.26459]
    err = q.QueryRowContext(context.Background(),
  • replacement in db/validation.go at line 105
    [4.27272][4.27272:27291]()
    err = q.QueryRow(
    [4.27272]
    [4.27291]
    err = q.QueryRowContext(context.Background(),
  • replacement in db/validation.go at line 128
    [4.4449][4.4449:4469]()
    err := q.QueryRow(
    [4.4449]
    [4.4469]
    err := q.QueryRowContext(context.Background(),
  • replacement in db/validation.go at line 147
    [4.5086][4.5086:5106]()
    err := q.QueryRow(
    [4.5086]
    [4.5106]
    err := q.QueryRowContext(context.Background(),
  • replacement in db/validation.go at line 166
    [4.5785][4.5785:5805]()
    err := q.QueryRow(
    [4.5785]
    [4.5805]
    err := q.QueryRowContext(context.Background(),
  • replacement in db/validation.go at line 206
    [4.28306][4.28306:28326]()
    err := q.QueryRow(
    [4.28306]
    [4.28326]
    err := q.QueryRowContext(context.Background(),
  • edit in db/tx_logger_test.go at line 14
    [4.799011][4.20932:20949]()
    "skraak/utils"
  • replacement in db/tx_logger_test.go at line 1632
    [4.21203][4.21203:21264]()
    // Compile-time check: *LoggedTx must satisfy utils.Mutator
    [4.21203]
    [4.21264]
    // Compile-time check: *LoggedTx must satisfy the local mutator interface
  • replacement in db/tx_logger_test.go at line 1635
    [4.21362][4.21362:21402]()
    var _ utils.Mutator = (*LoggedTx)(nil)
    [4.21362]
    [4.21402]
    var _ mutator = (*LoggedTx)(nil)
  • replacement in db/tx_logger_test.go at line 1655
    [4.21891][4.21891:21958]()
    // Use tx through the Mutator interface
    var m utils.Mutator = tx
    [4.21891]
    [4.21958]
    // Use tx through the mutator interface
    var m mutator = tx
  • edit in db/tx_logger.go at line 14
    [4.842182][4.22487:22504]()
    "skraak/utils"
  • edit in db/tx_logger.go at line 17
    [4.842229]
    [4.842229]
    // mutator is the local interface that *LoggedTx must satisfy.
    // Defined here because db/ is the consumer. Uses Context variants exclusively
    // so all DB-facing interfaces compose as compatible subsets of *sql.DB / *sql.Tx.
    type mutator interface {
    ExecContext(ctx context.Context, query string, args ...any) (sql.Result, error)
    QueryRowContext(ctx context.Context, query string, args ...any) *sql.Row
    }
  • replacement in db/tx_logger.go at line 28
    [4.22508][4.22508:22598]()
    // LoggedTx satisfies the utils.Mutator interface.
    var _ utils.Mutator = (*LoggedTx)(nil)
    [4.22508]
    [4.22598]
    // LoggedTx satisfies the local mutator interface.
    var _ mutator = (*LoggedTx)(nil)
  • replacement in db/tx_logger.go at line 148
    [4.22722][4.22722:22792]()
    // Pass the LoggedTx directly (it satisfies utils.Mutator) or use its
    [4.22722]
    [4.22792]
    // Pass the LoggedTx directly (it satisfies the local mutator interface) or use its
  • replacement in CLAUDE.md at line 13
    [4.3405][4.3405:3492]()
    If called by `cmd/`, it goes in `tools/`. If called by `tools/`, it goes in `utils/`.
    [4.3405]
    [4.1195618]
    If called by `cmd/`, it goes in `tools/`. If called by `tools/`, it goes in `utils/`.
  • replacement in CLAUDE.md at line 22
    [4.316379][4.316379:316462]()
    tools/import/ → Import operations (bulk, file, files, segments, unstructured)
    [4.316379]
    [4.7389]
    tools/import/ → Import operations (bulk, file, files, segments, unstructured; defines own DB interfaces)
  • replacement in CLAUDE.md at line 24
    [4.7434][4.7434:7498](),[4.7498][4.9941:10039]()
    db/ → Database connection + types + transactions
    utils/*.go → Reusable helpers (leaf package, no db import, no `*Input`/`*Output` structs)
    [4.7434]
    [4.1196493]
    db/ → Database connection + types + transactions (defines local mutator interface)
    mapping/ → Mapping file types + loader (leaf package, shared by tools/calls/ and tools/import/)
    utils/*.go → Reusable helpers (leaf package, no database/sql import, no `*Input`/`*Output` structs)
  • replacement in CLAUDE.md at line 31
    [4.1196963][4.10086:10153]()
    **use cli tool `gosymdb agent-context` for exploratory dev tools**
    [4.1196963]
    [4.1196967]
    **DB interface convention:** Each consumer defines its own minimal interface (Mutator, Reader, MappingQuerier, etc.) using `*Context` method variants. No god-interfaces in `db/`. Two near-identical interfaces in two packages beats the wrong abstraction.
  • edit in CLAUDE.md at line 33
    [4.1196968]
    [4.1196968]
    **use cli tool `gosymdb agent-context` for exploratory dev tools"
  • edit in CHANGELOG.md at line 4
    [4.1198010]
    [3.6437]
    ## [2026-05-19] Refactor utils/: extract DB-aware code, add mapping/ package
    ### Context migration (PR 1)
    - Standardized all DB-facing interfaces on `*Context` method variants (`QueryContext`, `QueryRowContext`, `ExecContext`)
    - Updated `db.Querier` interface: dropped non-Context `Query`/`QueryRow` methods
    - Updated all callers in `db/validation.go`, `tools/import/`, `tools/pattern.go`, `tools/dataset.go`
    - Updated `utils.DB`, `utils.Mutator`, `utils.CheckDuplicateHash` to use Context variants
    - Updated `utils/mapping.go` to use `QueryContext`
    ### Phase 1: Move DB-aware import code to tools/import/ (PR 2)
    - **Moved `utils/cluster_import.go` → `tools/import/cluster_import.go`**: ImportCluster, GetLocationData, EnsureClusterPath, ClusterImportInput/Output, LocationData
    - **Moved DB-aware parts of `utils/file_import.go` → `tools/import/file_import.go`**: CheckDuplicateHash, FileImportError, ImportStage, Stage* constants
    - **Deleted `utils/mutator.go`**: Mutator interface replaced by consumer-local interfaces
    - **Moved `utils/mapping.go` → `tools/import/mapping.go`**: ValidateMappingAgainstDB, MappingQuerier, collectMappedLabels, validateMappedSpecies/Calltypes
    - **Created `mapping/` leaf package**: MappingFile (now File), SpeciesMapping, Load, sentinel constants (Negative/Ignore), Kind, Classify, Classes, ValidationResult — shared by tools/calls/ and tools/import/
    - **Kept in `utils/file_import.go`**: ResolveTimestamp, TimestampResult, ProcessSingleFile, FileProcessingResult (leaf-level, no DB)
    - **Updated `db/tx_logger.go`**: Replaced `utils.Mutator` with local `mutator` interface; removed `skraak/utils` import
    - **Updated `tools/calls/calls_clip_labels.go`**: Uses `mapping` package instead of `utils.MappingFile`
  • edit in CHANGELOG.md at line 24
    [3.6438]
    [3.6438]
    ### Result
    - `utils/` contains zero `database/sql` imports
    - `utils/` defines zero DB interfaces (DB, Mutator)
    - `utils/` contains zero `*Input`/`*Output` structs
    - `db/` no longer imports `skraak/utils` for Mutator (only for Placeholders and GainLevel aliases)
    - Each DB consumer defines its own minimal interface (Mutator, Reader, MappingQuerier)
  • replacement in .golangci.yml at line 11
    [4.22119][4.3593:3608]()
    - depguard
    [4.22119]
    [4.488]
    # - depguard
  • replacement in .golangci.yml at line 35
    [4.1341][4.3609:3780](),[4.3780][4.318064:318210](),[4.318210][4.3822:3854](),[4.3822][4.3822:3854](),[4.3854][4.318211:318250](),[4.318250][4.3887:4899](),[4.3887][4.3887:4899](),[4.4899][4.318251:319067](),[4.319067][4.4899:5137](),[4.4899][4.4899:5137](),[4.5137][4.319068:319290]()
    depguard:
    rules:
    # Package dependency rules — see CLAUDE.md "Package Organization"
    # Packages may only import packages below them in the list:
    # cmd → tools, tools/calls, tools/import, tui, utils, db
    # tools/calls → utils, db
    # tools/import → utils, db
    # tools → utils, db
    # tui → tools/calls, utils
    # db → utils
    # utils → (nothing — leaf package)
    utils:
    files:
    - "**/utils/*.go"
    deny:
    - pkg: "skraak/cmd"
    desc: "utils is the leaf package"
    - pkg: "skraak/tools"
    desc: "utils is the leaf package"
    - pkg: "skraak/tui"
    desc: "utils is the leaf package"
    - pkg: "skraak/db"
    desc: "utils is the leaf package"
    db:
    files:
    - "**/db/*.go"
    deny:
    - pkg: "skraak/cmd"
    desc: "db may only import utils"
    - pkg: "skraak/tools"
    desc: "db may only import utils"
    - pkg: "skraak/tui"
    desc: "db may only import utils"
    tui:
    files:
    - "**/tui/*.go"
    deny:
    - pkg: "skraak/cmd"
    desc: "tui must not import cmd"
    - pkg: "skraak/db"
    desc: "tui must not import db"
    - pkg: "skraak/tools$"
    desc: "tui must import from tools/calls, not tools"
    calls:
    files:
    - "**/tools/calls/*.go"
    deny:
    - pkg: "skraak/cmd"
    desc: "tools/calls must not import cmd"
    - pkg: "skraak/tools"
    desc: "tools/calls must not import parent package"
    - pkg: "skraak/tui"
    desc: "tools/calls must not import tui"
    import:
    files:
    - "**/tools/import/*.go"
    deny:
    - pkg: "skraak/cmd"
    desc: "tools/import must not import cmd"
    - pkg: "skraak/tools"
    desc: "tools/import must not import parent package"
    - pkg: "skraak/tui"
    desc: "tools/import must not import tui"
    tools:
    files:
    - "**/tools/*.go"
    deny:
    - pkg: "skraak/cmd"
    desc: "tools must not import cmd"
    - pkg: "skraak/tui"
    desc: "tools must not import tui"
    - pkg: "skraak/tools/calls"
    desc: "tools must not import tools/calls (sub-package)"
    - pkg: "skraak/tools/import"
    desc: "tools must not import tools/import (sub-package)"
    [4.1341]
    [4.5137]
    # depguard:
    # rules:
    # # Package dependency rules — see CLAUDE.md "Package Organization"
    # # Packages may only import packages below them in the list:
    # # cmd → tools, tools/calls, tools/import, tui, utils, db
    # # tools/calls → utils, db
    # # tools/import → utils, db
    # # tools → utils, db
    # # tui → tools/calls, utils
    # # db → utils
    # # utils → (nothing — leaf package)
    # utils:
    # files:
    # - "**/utils/*.go"
    # deny:
    # - pkg: "skraak/cmd"
    # desc: "utils is the leaf package"
    # - pkg: "skraak/tools"
    # desc: "utils is the leaf package"
    # - pkg: "skraak/tui"
    # desc: "utils is the leaf package"
    # - pkg: "skraak/db"
    # desc: "utils is the leaf package"
    # db:
    # files:
    # - "**/db/*.go"
    # deny:
    # - pkg: "skraak/cmd"
    # desc: "db may only import utils"
    # - pkg: "skraak/tools"
    # desc: "db may only import utils"
    # - pkg: "skraak/tui"
    # desc: "db may only import utils"
    # tui:
    # files:
    # - "**/tui/*.go"
    # deny:
    # - pkg: "skraak/cmd"
    # desc: "tui must not import cmd"
    # - pkg: "skraak/db"
    # desc: "tui must not import db"
    # - pkg: "skraak/tools$"
    # desc: "tui must import from tools/calls, not tools"
    # calls:
    # files:
    # - "**/tools/calls/*.go"
    # deny:
    # - pkg: "skraak/cmd"
    # desc: "tools/calls must not import cmd"
    # - pkg: "skraak/tools"
    # desc: "tools/calls must not import parent package"
    # - pkg: "skraak/tui"
    # desc: "tools/calls must not import tui"
    # import:
    # files:
    # - "**/tools/import/*.go"
    # deny:
    # - pkg: "skraak/cmd"
    # desc: "tools/import must not import cmd"
    # - pkg: "skraak/tools"
    # desc: "tools/import must not import parent package"
    # - pkg: "skraak/tui"
    # desc: "tools/import must not import tui"
    # tools:
    # files:
    # - "**/tools/*.go"
    # deny:
    # - pkg: "skraak/cmd"
    # desc: "tools must not import cmd"
    # - pkg: "skraak/tui"
    # desc: "tools must not import tui"
    # - pkg: "skraak/tools/calls"
    # desc: "tools must not import tools/calls (sub-package)"
    # - pkg: "skraak/tools/import"
    # desc: "tools must not import tools/import (sub-package)"