new import tests

quietlight
May 19, 2026, 1:24 AM
JMDW37LVYJAWULK4RREBRC6OLNVMA5PO5TLN6JIZCTZELL7YF4MQC

Dependencies

  • [2] TSOJUMHV more tests
  • [3] V2HX6HEB claude going nuts all over the place
  • [4] NQPVZ3PP first phase of utils refactor, all realted to db interfaces
  • [*] 3DVPQOKB big tidy up of tools/

Change contents

  • file addition: test_helpers.go (----------)
    [6.1]
    package imp
    import (
    "context"
    "database/sql"
    "encoding/json"
    "fmt"
    "os"
    "path/filepath"
    "testing"
    "time"
    "skraak/datafile"
    "skraak/db"
    "skraak/utils"
    )
    // setupImportTestDB creates an in-memory DuckDB with the full schema and test data.
    //
    // Test data:
    // - Structured dataset: dstest000001
    // - Unstructured dataset: dstest000002
    // - Location (active): loctest00001 (in dstest000001)
    // - Location (inactive): loctest00002 (in dstest000001)
    // - Cluster (active): cltest000001 (in loctest00001)
    // - Cluster (inactive): cltest000002 (in loctest00001)
    // - Species: Kiwi (sptest000001), Roroa (sptest000002)
    // - Calltypes: Kiwi/song (cttest000001), Kiwi/duet (cttest000002)
    // - Filters: kiwi.txt (fitest000001), test.txt (fitest000002)
    func setupImportTestDB(t *testing.T) *sql.DB {
    t.Helper()
    database := db.SetupTestDB(t)
    // Datasets
    db.InsertTestDatasetWithType(t, database, "dstest000001", "Test Structured", "structured")
    db.InsertTestDatasetWithType(t, database, "dstest000002", "Test Unstructured", "unstructured")
    // Locations
    db.InsertTestLocation(t, database, "loctest00001", "dstest000001", "Test Location Active")
    db.InsertTestLocation(t, database, "loctest00002", "dstest000001", "Test Location Inactive")
    mustExec(t, database, "UPDATE location SET active = false WHERE id = 'loctest00002'")
    // Clusters
    db.InsertTestCluster(t, database, "cltest000001", "dstest000001", "loctest00001", "Test Cluster Active")
    db.InsertTestCluster(t, database, "cltest000002", "dstest000001", "loctest00001", "Test Cluster Inactive")
    mustExec(t, database, "UPDATE cluster SET active = false WHERE id = 'cltest000002'")
    // Species
    db.InsertTestSpecies(t, database, "sptest000001", "Kiwi")
    db.InsertTestSpecies(t, database, "sptest000002", "Roroa")
    // Calltypes
    db.InsertTestCallType(t, database, "cttest000001", "sptest000001", "song")
    db.InsertTestCallType(t, database, "cttest000002", "sptest000001", "duet")
    // Filters
    db.InsertTestFilter(t, database, "fitest000001", "kiwi.txt")
    db.InsertTestFilter(t, database, "fitest000002", "test.txt")
    return database
    }
    // setupFileBasedTestDB creates a file-based DuckDB for tests that need to
    // open multiple connections to the same database (e.g., ImportAudioFiles).
    // Returns the path to the database file. The database is closed after setup.
    func setupFileBasedTestDB(t *testing.T) string {
    t.Helper()
    // Create temp file for database
    tmpDir := t.TempDir()
    dbPath := filepath.Join(tmpDir, "test.duckdb")
    // Open database
    database, err := sql.Open("duckdb", dbPath)
    if err != nil {
    t.Fatalf("failed to open database: %v", err)
    }
    // Apply schema
    schema, err := db.ReadSchemaSQL()
    if err != nil {
    database.Close()
    t.Fatalf("failed to read schema: %v", err)
    }
    if _, err = database.Exec(schema); err != nil {
    database.Close()
    t.Fatalf("failed to create schema: %v", err)
    }
    // Insert test data - same as setupImportTestDB
    db.InsertTestDatasetWithType(t, database, "dstest000001", "Test Structured", "structured")
    db.InsertTestDatasetWithType(t, database, "dstest000002", "Test Unstructured", "unstructured")
    db.InsertTestLocation(t, database, "loctest00001", "dstest000001", "Test Location Active")
    db.InsertTestLocation(t, database, "loctest00002", "dstest000001", "Test Location Inactive")
    mustExec(t, database, "UPDATE location SET active = false WHERE id = 'loctest00002'")
    db.InsertTestCluster(t, database, "cltest000001", "dstest000001", "loctest00001", "Test Cluster Active")
    db.InsertTestCluster(t, database, "cltest000002", "dstest000001", "loctest00001", "Test Cluster Inactive")
    mustExec(t, database, "UPDATE cluster SET active = false WHERE id = 'cltest000002'")
    db.InsertTestSpecies(t, database, "sptest000001", "Kiwi")
    db.InsertTestSpecies(t, database, "sptest000002", "Roroa")
    db.InsertTestCallType(t, database, "cttest000001", "sptest000001", "song")
    db.InsertTestCallType(t, database, "cttest000002", "sptest000001", "duet")
    db.InsertTestFilter(t, database, "fitest000001", "kiwi.txt")
    db.InsertTestFilter(t, database, "fitest000002", "test.txt")
    // Close the database so tests can open their own connections
    database.Close()
    return dbPath
    }
    // mustExec executes a SQL statement, failing the test on error.
    func mustExec(t *testing.T, database *sql.DB, query string, args ...any) {
    t.Helper()
    if _, err := database.Exec(query, args...); err != nil {
    t.Fatalf("exec: %v", err)
    }
    }
    // createTestWAV creates a minimal valid WAV file at the given path.
    // Returns the XXH64 hash of the file.
    func createTestWAV(t *testing.T, path string) string {
    t.Helper()
    // Create a 1-second WAV file at 16kHz mono 16-bit
    // 44-byte header + 32000 bytes of data (16000 samples * 2 bytes)
    const sampleRate = 16000
    const numSamples = sampleRate // 1 second
    const dataSize = numSamples * 2 // 2 bytes per sample
    const fileSize = 44 + dataSize - 8
    data := make([]byte, 44+dataSize)
    // RIFF header
    copy(data[0:4], "RIFF")
    data[4] = byte(fileSize & 0xFF)
    data[5] = byte((fileSize >> 8) & 0xFF)
    data[6] = byte((fileSize >> 16) & 0xFF)
    data[7] = byte((fileSize >> 24) & 0xFF)
    copy(data[8:12], "WAVE")
    // fmt chunk
    copy(data[12:16], "fmt ")
    data[16] = 16 // fmt chunk size
    data[20] = 1 // PCM format
    data[22] = 1 // mono
    data[24] = byte(sampleRate & 0xFF)
    data[25] = byte((sampleRate >> 8) & 0xFF)
    const byteRate = sampleRate * 2
    data[28] = byte(byteRate & 0xFF)
    data[29] = byte((byteRate >> 8) & 0xFF)
    data[32] = 2 // block align
    data[34] = 16 // bits per sample
    // data chunk
    copy(data[36:40], "data")
    data[40] = byte(dataSize & 0xFF)
    data[41] = byte((dataSize >> 8) & 0xFF)
    data[42] = byte((dataSize >> 16) & 0xFF)
    data[43] = byte((dataSize >> 24) & 0xFF)
    // Audio data is already zeros
    if err := os.WriteFile(path, data, 0644); err != nil {
    t.Fatalf("failed to create test WAV: %v", err)
    }
    hash, err := utils.ComputeXXH64(path)
    if err != nil {
    t.Fatalf("failed to compute hash: %v", err)
    }
    return hash
    }
    // createTestWAVWithMetadata creates a WAV file and inserts it into the database.
    // Returns the file ID and hash.
    func createTestWAVWithMetadata(t *testing.T, database *sql.DB, clusterID, locationID, filename string) (fileID, hash string) {
    t.Helper()
    // Create temp file
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, filename)
    hash = createTestWAV(t, wavPath)
    // Generate file ID
    fileID, err := utils.GenerateLongID()
    if err != nil {
    t.Fatalf("failed to generate file ID: %v", err)
    }
    // Insert file record
    _, err = database.ExecContext(context.Background(), `
    INSERT INTO file (id, file_name, xxh64_hash, location_id, cluster_id, timestamp_local, duration, sample_rate, active)
    VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP, 0.0005, 16000, true)
    `, fileID, filename, hash, locationID, clusterID)
    if err != nil {
    t.Fatalf("failed to insert file: %v", err)
    }
    return fileID, hash
    }
    // createTestDataFile creates a minimal .data file with the given segments.
    func createTestDataFile(t *testing.T, wavPath string, segments []*datafile.Segment) string {
    t.Helper()
    dataPath := wavPath + ".data"
    df := &datafile.DataFile{
    Meta: &datafile.DataMeta{
    Operator: "test",
    Duration: 0.0005,
    },
    Segments: segments,
    }
    if err := df.Write(dataPath); err != nil {
    t.Fatalf("failed to write test .data file: %v", err)
    }
    return dataPath
    }
    // createTestMappingFile creates a minimal mapping.json file.
    func createTestMappingFile(t *testing.T, dir string) string {
    t.Helper()
    mapping := map[string]any{
    "Kiwi": map[string]any{
    "species": "Kiwi",
    "calltypes": map[string]string{
    "song": "song",
    "duet": "duet",
    },
    },
    "Roroa": map[string]any{
    "species": "Roroa",
    },
    }
    data, err := json.Marshal(mapping)
    if err != nil {
    t.Fatalf("failed to marshal mapping: %v", err)
    }
    path := filepath.Join(dir, "mapping.json")
    if err := os.WriteFile(path, data, 0644); err != nil {
    t.Fatalf("failed to write mapping file: %v", err)
    }
    return path
    }
    // createTestCSVFile creates a CSV file for bulk import testing.
    // Columns: location_name, location_id, directory_path, date_range, sample_rate, file_count
    func createTestCSVFile(t *testing.T, dir string, rows [][]string) string {
    t.Helper()
    path := filepath.Join(dir, "import.csv")
    file, err := os.Create(path)
    if err != nil {
    t.Fatalf("failed to create CSV: %v", err)
    }
    defer file.Close()
    // Write header
    if _, err := file.WriteString("location_name,location_id,directory_path,date_range,sample_rate,file_count\n"); err != nil {
    t.Fatalf("failed to write CSV header: %v", err)
    }
    // Write rows
    for _, row := range rows {
    line := fmt.Sprintf("%s,%s,%s,%s,%s,%s\n", row[0], row[1], row[2], row[3], row[4], row[5])
    if _, err := file.WriteString(line); err != nil {
    t.Fatalf("failed to write CSV row: %v", err)
    }
    }
    return path
    }
    // createTestLogFile creates a log file path for bulk import testing.
    func createTestLogFile(t *testing.T, dir string) string {
    t.Helper()
    path := filepath.Join(dir, "import.log")
    // Create empty file
    if err := os.WriteFile(path, []byte{}, 0644); err != nil {
    t.Fatalf("failed to create log file: %v", err)
    }
    return path
    }
    // assertFileCount queries the database and asserts the expected number of files.
    func assertFileCount(t *testing.T, database *sql.DB, expected int) {
    t.Helper()
    var count int
    if err := database.QueryRow("SELECT COUNT(*) FROM file WHERE active = true").Scan(&count); err != nil {
    t.Fatalf("failed to count files: %v", err)
    }
    if count != expected {
    t.Errorf("expected %d files, got %d", expected, count)
    }
    }
    // assertSegmentCount queries the database and asserts the expected number of segments.
    func assertSegmentCount(t *testing.T, database *sql.DB, expected int) {
    t.Helper()
    var count int
    if err := database.QueryRow("SELECT COUNT(*) FROM segment WHERE active = true").Scan(&count); err != nil {
    t.Fatalf("failed to count segments: %v", err)
    }
    if count != expected {
    t.Errorf("expected %d segments, got %d", expected, count)
    }
    }
    // assertLabelCount queries the database and asserts the expected number of labels.
    func assertLabelCount(t *testing.T, database *sql.DB, expected int) {
    t.Helper()
    var count int
    if err := database.QueryRow("SELECT COUNT(*) FROM label WHERE active = true").Scan(&count); err != nil {
    t.Fatalf("failed to count labels: %v", err)
    }
    if count != expected {
    t.Errorf("expected %d labels, got %d", expected, count)
    }
    }
    // getTestLocationData returns location data for testing.
    func getTestLocationData(t *testing.T, database *sql.DB, locationID string) *LocationData {
    t.Helper()
    data, err := GetLocationData(database, locationID)
    if err != nil {
    t.Fatalf("failed to get location data: %v", err)
    }
    return data
    }
    // beginTestTx begins a logged transaction for testing.
    func beginTestTx(t *testing.T, ctx context.Context, database *sql.DB) *db.LoggedTx {
    t.Helper()
    tx, err := db.BeginLoggedTx(ctx, database, "test")
    if err != nil {
    t.Fatalf("failed to begin transaction: %v", err)
    }
    return tx
    }
    // waitForAsync waits for a short duration to allow async operations to complete.
    func waitForAsync() {
    time.Sleep(100 * time.Millisecond)
    }
  • file addition: import_unstructured_test.go (----------)
    [6.1]
    package imp
    import (
    "context"
    "database/sql"
    "os"
    "path/filepath"
    "testing"
    )
    func TestImportUnstructured(t *testing.T) {
    ctx := context.Background()
    t.Run("happy path - import single WAV file", func(t *testing.T) {
    dbPath := setupFileBasedTestDB(t)
    // Create temp folder with a WAV file
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test_recording.wav")
    hash := createTestWAV(t, wavPath)
    // Import to unstructured dataset
    output, err := ImportUnstructured(ctx, ImportUnstructuredInput{
    DBPath: dbPath,
    DatasetID: "dstest000002", // unstructured dataset
    FolderPath: tmpDir,
    Recursive: new(true),
    })
    if err != nil {
    t.Fatalf("ImportUnstructured failed: %v", err)
    }
    // Verify output
    if output.TotalFiles != 1 {
    t.Errorf("expected 1 total file, got %d", output.TotalFiles)
    }
    if output.ImportedFiles != 1 {
    t.Errorf("expected 1 imported file, got %d", output.ImportedFiles)
    }
    if output.SkippedFiles != 0 {
    t.Errorf("expected 0 skipped files, got %d", output.SkippedFiles)
    }
    if len(output.Errors) != 0 {
    t.Errorf("unexpected errors: %v", output.Errors)
    }
    // Verify file was inserted into database
    database, err := sql.Open("duckdb", dbPath)
    if err != nil {
    t.Fatalf("failed to open database for verification: %v", err)
    }
    defer database.Close()
    var fileCount int
    err = database.QueryRow("SELECT COUNT(*) FROM file WHERE xxh64_hash = ? AND active = true", hash).Scan(&fileCount)
    if err != nil {
    t.Fatalf("failed to query file: %v", err)
    }
    if fileCount != 1 {
    t.Errorf("expected 1 file in database, got %d", fileCount)
    }
    // Verify file_dataset link
    var linkCount int
    err = database.QueryRow(`
    SELECT COUNT(*) FROM file_dataset fd
    JOIN file f ON fd.file_id = f.id
    WHERE f.xxh64_hash = ? AND fd.dataset_id = 'dstest000002'
    `, hash).Scan(&linkCount)
    if err != nil {
    t.Fatalf("failed to query file_dataset: %v", err)
    }
    if linkCount != 1 {
    t.Errorf("expected 1 file_dataset link, got %d", linkCount)
    }
    // Verify location_id and cluster_id are NULL for unstructured
    var locID, clID sql.NullString
    err = database.QueryRow("SELECT location_id, cluster_id FROM file WHERE xxh64_hash = ?", hash).Scan(&locID, &clID)
    if err != nil {
    t.Fatalf("failed to query file: %v", err)
    }
    if locID.Valid {
    t.Errorf("expected NULL location_id for unstructured file, got %s", locID.String)
    }
    if clID.Valid {
    t.Errorf("expected NULL cluster_id for unstructured file, got %s", clID.String)
    }
    })
    t.Run("duplicate handling - skip file with existing hash", func(t *testing.T) {
    dbPath := setupFileBasedTestDB(t)
    // Create temp folder with a WAV file
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test_recording.wav")
    hash := createTestWAV(t, wavPath)
    // First import
    _, err := ImportUnstructured(ctx, ImportUnstructuredInput{
    DBPath: dbPath,
    DatasetID: "dstest000002",
    FolderPath: tmpDir,
    Recursive: new(true),
    })
    if err != nil {
    t.Fatalf("first import failed: %v", err)
    }
    // Second import of same file (should be skipped as duplicate)
    output, err := ImportUnstructured(ctx, ImportUnstructuredInput{
    DBPath: dbPath,
    DatasetID: "dstest000002",
    FolderPath: tmpDir,
    Recursive: new(true),
    })
    if err != nil {
    t.Fatalf("second import failed: %v", err)
    }
    // Verify output
    if output.TotalFiles != 1 {
    t.Errorf("expected 1 total file, got %d", output.TotalFiles)
    }
    if output.ImportedFiles != 0 {
    t.Errorf("expected 0 imported files (duplicate), got %d", output.ImportedFiles)
    }
    if output.SkippedFiles != 1 {
    t.Errorf("expected 1 skipped file (duplicate), got %d", output.SkippedFiles)
    }
    // Verify only one file in database (not duplicated)
    database, err := sql.Open("duckdb", dbPath)
    if err != nil {
    t.Fatalf("failed to open database for verification: %v", err)
    }
    defer database.Close()
    var fileCount int
    err = database.QueryRow("SELECT COUNT(*) FROM file WHERE xxh64_hash = ? AND active = true", hash).Scan(&fileCount)
    if err != nil {
    t.Fatalf("failed to query file: %v", err)
    }
    if fileCount != 1 {
    t.Errorf("expected 1 file in database (not duplicated), got %d", fileCount)
    }
    })
    t.Run("empty folder returns empty output", func(t *testing.T) {
    dbPath := setupFileBasedTestDB(t)
    tmpDir := t.TempDir()
    output, err := ImportUnstructured(ctx, ImportUnstructuredInput{
    DBPath: dbPath,
    DatasetID: "dstest000002",
    FolderPath: tmpDir,
    Recursive: new(true),
    })
    if err != nil {
    t.Fatalf("ImportUnstructured failed: %v", err)
    }
    if output.TotalFiles != 0 {
    t.Errorf("expected 0 total files, got %d", output.TotalFiles)
    }
    })
    t.Run("structured dataset rejected", func(t *testing.T) {
    dbPath := setupFileBasedTestDB(t)
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.wav")
    createTestWAV(t, wavPath)
    _, err := ImportUnstructured(ctx, ImportUnstructuredInput{
    DBPath: dbPath,
    DatasetID: "dstest000001", // structured dataset
    FolderPath: tmpDir,
    })
    if err == nil {
    t.Error("expected error for structured dataset")
    }
    })
    t.Run("invalid dataset ID", func(t *testing.T) {
    dbPath := setupFileBasedTestDB(t)
    tmpDir := t.TempDir()
    _, err := ImportUnstructured(ctx, ImportUnstructuredInput{
    DBPath: dbPath,
    DatasetID: "invalid_id",
    FolderPath: tmpDir,
    })
    if err == nil {
    t.Error("expected error for invalid dataset ID")
    }
    })
    t.Run("folder does not exist", func(t *testing.T) {
    dbPath := setupFileBasedTestDB(t)
    _, err := ImportUnstructured(ctx, ImportUnstructuredInput{
    DBPath: dbPath,
    DatasetID: "dstest000002",
    FolderPath: "/nonexistent/path",
    })
    if err == nil {
    t.Error("expected error for nonexistent folder")
    }
    })
    }
    func TestScanWavFiles(t *testing.T) {
    t.Run("finds WAV files", func(t *testing.T) {
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.wav")
    createTestWAV(t, wavPath)
    files, errors := scanWavFiles(tmpDir, false)
    if len(errors) > 0 {
    t.Errorf("unexpected errors: %v", errors)
    }
    if len(files) != 1 {
    t.Errorf("expected 1 file, got %d", len(files))
    }
    })
    t.Run("case insensitive extension", func(t *testing.T) {
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.WAV")
    createTestWAV(t, wavPath)
    files, errors := scanWavFiles(tmpDir, false)
    if len(errors) > 0 {
    t.Errorf("unexpected errors: %v", errors)
    }
    if len(files) != 1 {
    t.Errorf("expected 1 file, got %d", len(files))
    }
    })
    t.Run("non-recursive ignores subdirectories", func(t *testing.T) {
    tmpDir := t.TempDir()
    subDir := filepath.Join(tmpDir, "subdir")
    if err := os.Mkdir(subDir, 0755); err != nil {
    t.Fatalf("failed to create subdir: %v", err)
    }
    // Create WAV in both directories
    createTestWAV(t, filepath.Join(tmpDir, "root.wav"))
    createTestWAV(t, filepath.Join(subDir, "sub.wav"))
    files, errors := scanWavFiles(tmpDir, false)
    if len(errors) > 0 {
    t.Errorf("unexpected errors: %v", errors)
    }
    if len(files) != 1 {
    t.Errorf("expected 1 file (non-recursive), got %d", len(files))
    }
    })
    t.Run("recursive finds all files", func(t *testing.T) {
    tmpDir := t.TempDir()
    subDir := filepath.Join(tmpDir, "subdir")
    if err := os.Mkdir(subDir, 0755); err != nil {
    t.Fatalf("failed to create subdir: %v", err)
    }
    // Create WAV in both directories
    createTestWAV(t, filepath.Join(tmpDir, "root.wav"))
    createTestWAV(t, filepath.Join(subDir, "sub.wav"))
    files, errors := scanWavFiles(tmpDir, true)
    if len(errors) > 0 {
    t.Errorf("unexpected errors: %v", errors)
    }
    if len(files) != 2 {
    t.Errorf("expected 2 files (recursive), got %d", len(files))
    }
    })
    }
  • edit in tools/import/import_segments_validation_test.go at line 4
    [2.24][2.24:46]()
    "database/sql"
    "os"
  • edit in tools/import/import_segments_validation_test.go at line 9
    [2.82][2.82:98]()
    "skraak/utils"
  • edit in tools/import/import_segments_validation_test.go at line 11
    [2.101][2.101:2141]()
    // setupImportTestDB creates a DB with the full schema + test data for import validation.
    //
    // dataset (structured): ds_imptest00001
    // dataset (unstructured): ds_imptest00002
    // location in ds1: loc_imptest0001
    // location (inactive): loc_imptest0002 (in ds1)
    // cluster in loc1: cl_imptest00001
    // cluster (inactive): cl_imptest00002 (in loc1)
    // species: Kiwi (sp_kiwi000000), Roroa (sp_roroa00000)
    // calltypes: Kiwi/song (ct_kiwi000001), Kiwi/duet (ct_kiwi000002)
    // filter: kiwi.txt (fi_kiwi0000001), tomtit.txt (fi_tomtit000001)
    func setupImportTestDB(t *testing.T) *sql.DB {
    t.Helper()
    database := db.SetupTestDB(t)
    db.InsertTestDatasetWithType(t, database, "ds_imptest00001", "Imp Structured", "structured")
    db.InsertTestDatasetWithType(t, database, "ds_imptest00002", "Imp Unstructured", "unstructured")
    db.InsertTestLocation(t, database, "loc_imptest0001", "ds_imptest00001", "Loc Active")
    db.InsertTestLocation(t, database, "loc_imptest0002", "ds_imptest00001", "Loc Inactive")
    mustExecImport(t, database, "UPDATE location SET active = false WHERE id = 'loc_imptest0002'")
    db.InsertTestCluster(t, database, "cl_imptest00001", "ds_imptest00001", "loc_imptest0001", "Cl Active")
    db.InsertTestCluster(t, database, "cl_imptest00002", "ds_imptest00001", "loc_imptest0001", "Cl Inactive")
    mustExecImport(t, database, "UPDATE cluster SET active = false WHERE id = 'cl_imptest00002'")
    db.InsertTestSpecies(t, database, "sp_kiwi000000", "Kiwi")
    db.InsertTestSpecies(t, database, "sp_roroa00000", "Roroa")
    db.InsertTestCallType(t, database, "ct_kiwi000001", "sp_kiwi000000", "song")
    db.InsertTestCallType(t, database, "ct_kiwi000002", "sp_kiwi000000", "duet")
    db.InsertTestFilter(t, database, "fi_kiwi0000001", "kiwi.txt")
    db.InsertTestFilter(t, database, "fi_tomtit000001", "tomtit.txt")
    return database
    }
    func mustExecImport(t *testing.T, database *sql.DB, query string, args ...any) {
    t.Helper()
    if _, err := database.Exec(query, args...); err != nil {
    t.Fatalf("exec: %v", err)
    }
    }
  • replacement in tools/import/import_segments_validation_test.go at line 18
    [2.2334][2.2334:2435]()
    err := validateSegmentHierarchy(database, "ds_imptest00001", "loc_imptest0001", "cl_imptest00001")
    [2.2334]
    [2.2435]
    err := validateSegmentHierarchy(database, "dstest000001", "loctest00001", "cltest000001")
  • replacement in tools/import/import_segments_validation_test.go at line 25
    [2.2564][2.2564:2665]()
    err := validateSegmentHierarchy(database, "ds_imptest00002", "loc_imptest0001", "cl_imptest00001")
    [2.2564]
    [2.2665]
    err := validateSegmentHierarchy(database, "dstest000002", "loctest00001", "cltest000001")
  • replacement in tools/import/import_segments_validation_test.go at line 32
    [2.2858][2.2858:2959]()
    err := validateSegmentHierarchy(database, "ds_imptest00001", "loc_imptest0002", "cl_imptest00001")
    [2.2858]
    [2.2959]
    err := validateSegmentHierarchy(database, "dstest000001", "loctest00002", "cltest000001")
  • replacement in tools/import/import_segments_validation_test.go at line 39
    [2.3094][2.3094:3195]()
    err := validateSegmentHierarchy(database, "ds_imptest00001", "loc_imptest0001", "cl_imptest00002")
    [2.3094]
    [2.3195]
    err := validateSegmentHierarchy(database, "dstest000001", "loctest00001", "cltest000002")
  • replacement in tools/import/import_segments_validation_test.go at line 53
    [2.3461][2.3461:3567]()
    filterMap, err := validateFiltersExist(database, map[string]bool{"kiwi.txt": true, "tomtit.txt": true})
    [2.3461]
    [2.3567]
    filterMap, err := validateFiltersExist(database, map[string]bool{"kiwi.txt": true, "test.txt": true})
  • replacement in tools/import/import_segments_validation_test.go at line 60
    [2.3719][2.3719:3844]()
    if filterMap["kiwi.txt"] != "fi_kiwi0000001" {
    t.Errorf("kiwi.txt ID = %q, want fi_kiwi0000001", filterMap["kiwi.txt"])
    [2.3719]
    [2.3844]
    if filterMap["kiwi.txt"] != "fitest000001" {
    t.Errorf("kiwi.txt ID = %q, want fitest000001", filterMap["kiwi.txt"])
  • replacement in tools/import/import_segments_validation_test.go at line 103
    [2.5009][2.5009:5122]()
    if speciesMap["Kiwi"] != "sp_kiwi000000" {
    t.Errorf("Kiwi ID = %q, want sp_kiwi000000", speciesMap["Kiwi"])
    [2.5009]
    [2.5122]
    if speciesMap["Kiwi"] != "sptest000001" {
    t.Errorf("Kiwi ID = %q, want sptest000001", speciesMap["Kiwi"])
  • replacement in tools/import/import_segments_validation_test.go at line 106
    [2.5126][2.5126:5242]()
    if speciesMap["Roroa"] != "sp_roroa00000" {
    t.Errorf("Roroa ID = %q, want sp_roroa00000", speciesMap["Roroa"])
    [2.5126]
    [2.5242]
    if speciesMap["Roroa"] != "sptest000002" {
    t.Errorf("Roroa ID = %q, want sptest000002", speciesMap["Roroa"])
  • replacement in tools/import/import_segments_validation_test.go at line 120
    [2.5540][2.5540:5676]()
    if calltypeMap["Kiwi"]["song"] != "ct_kiwi000001" {
    t.Errorf("Kiwi/song ID = %q, want ct_kiwi000001", calltypeMap["Kiwi"]["song"])
    [2.5540]
    [2.5676]
    if calltypeMap["Kiwi"]["song"] != "cttest000001" {
    t.Errorf("Kiwi/song ID = %q, want cttest000001", calltypeMap["Kiwi"]["song"])
  • replacement in tools/import/import_segments_validation_test.go at line 123
    [2.5680][2.5680:5816]()
    if calltypeMap["Kiwi"]["duet"] != "ct_kiwi000002" {
    t.Errorf("Kiwi/duet ID = %q, want ct_kiwi000002", calltypeMap["Kiwi"]["duet"])
    [2.5680]
    [2.5816]
    if calltypeMap["Kiwi"]["duet"] != "cttest000002" {
    t.Errorf("Kiwi/duet ID = %q, want cttest000002", calltypeMap["Kiwi"]["duet"])
  • replacement in tools/import/import_segments_validation_test.go at line 149
    [2.6417][2.6417:6510]()
    fileMap, errors := validateAndMapFiles(database, nil, "cl_imptest00001", "ds_imptest00001")
    [2.6417]
    [2.6510]
    fileMap, errors := validateAndMapFiles(database, nil, "cltest000001", "dstest000001")
  • replacement in tools/import/import_segments_validation_test.go at line 165
    [2.6910][2.6910:6995]()
    if err := createFakeWAV(wavPath); err != nil {
    t.Fatalf("create WAV: %v", err)
    }
    [2.6910]
    [2.6995]
    hash := createTestWAV(t, wavPath)
  • edit in tools/import/import_segments_validation_test.go at line 167
    [2.6996][2.6996:7146]()
    // Compute the actual hash of the file we created
    hash, err := utils.ComputeXXH64(wavPath)
    if err != nil {
    t.Fatalf("compute hash: %v", err)
    }
  • replacement in tools/import/import_segments_validation_test.go at line 168
    [2.7185][2.7185:7308]()
    db.InsertTestFileForCluster(t, database, "fi_test00000001", "cl_imptest00001", "loc_imptest0001", "test.wav", hash, 60.0)
    [2.7185]
    [2.7308]
    db.InsertTestFileForCluster(t, database, "fitest000004", "cltest000001", "loctest00001", "test.wav", hash, 60.0)
  • replacement in tools/import/import_segments_validation_test.go at line 171
    [2.7334][2.7334:7506]()
    mustExecImport(t, database, "INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified) VALUES (?, ?, now(), now())", "fi_test00000001", "ds_imptest00001")
    [2.7334]
    [2.7506]
    mustExec(t, database, "INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified) VALUES (?, ?, now(), now())", "fitest000004", "dstest000001")
  • replacement in tools/import/import_segments_validation_test.go at line 180
    [2.7706][2.7706:7803]()
    fileMap, errors := validateAndMapFiles(database, scanned, "cl_imptest00001", "ds_imptest00001")
    [2.7706]
    [2.7803]
    fileMap, errors := validateAndMapFiles(database, scanned, "cltest000001", "dstest000001")
  • replacement in tools/import/import_segments_validation_test.go at line 195
    [2.8163][2.8163:8248]()
    if err := createFakeWAV(wavPath); err != nil {
    t.Fatalf("create WAV: %v", err)
    }
    [2.8163]
    [2.8248]
    createTestWAV(t, wavPath)
  • replacement in tools/import/import_segments_validation_test.go at line 204
    [2.8448][2.8448:8545]()
    fileMap, errors := validateAndMapFiles(database, scanned, "cl_imptest00001", "ds_imptest00001")
    [2.8448]
    [2.8545]
    fileMap, errors := validateAndMapFiles(database, scanned, "cltest000001", "dstest000001")
  • edit in tools/import/import_segments_validation_test.go at line 212
    [2.8702][2.8702:9549]()
    }
    // createFakeWAV creates a minimal WAV file (44-byte header + 1 sample)
    func createFakeWAV(path string) error {
    // Minimal valid WAV: 44-byte header + 4 bytes of data
    header := make([]byte, 44)
    copy(header[0:4], "RIFF")
    copy(header[4:8], "\x24\x00\x00\x00") // file size - 8
    copy(header[8:12], "WAVE")
    copy(header[12:16], "fmt ")
    copy(header[16:20], "\x10\x00\x00\x00") // chunk size
    copy(header[20:22], "\x01\x00") // PCM
    copy(header[22:24], "\x01\x00") // mono
    copy(header[24:28], "\x80\x3E\x00\x00") // 16000 sample rate
    copy(header[28:32], "\x00\x7D\x00\x00") // byte rate
    copy(header[32:34], "\x02\x00") // block align
    copy(header[34:36], "\x10\x00") // bits per sample
    copy(header[36:40], "data")
    copy(header[40:44], "\x00\x00\x00\x00") // data size
    return os.WriteFile(path, header, 0644)
  • file addition: import_segments_full_test.go (----------)
    [6.1]
    package imp
    import (
    "context"
    "database/sql"
    "path/filepath"
    "testing"
    "skraak/datafile"
    "skraak/db"
    )
    func TestImportSegments(t *testing.T) {
    ctx := context.Background()
    t.Run("happy path - import segments from .data file", func(t *testing.T) {
    dbPath := setupFileBasedTestDB(t)
    // Create temp folder with a WAV file and .data file
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.wav")
    hash := createTestWAV(t, wavPath)
    // Insert the file into database first (simulating pre-imported audio)
    database, err := sql.Open("duckdb", dbPath)
    if err != nil {
    t.Fatalf("failed to open database: %v", err)
    }
    db.InsertTestFileForCluster(t, database, "fitestseg001", "cltest000001", "loctest00001", "test.wav", hash, 1.0)
    mustExec(t, database, "INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified) VALUES (?, ?, now(), now())", "fitestseg001", "dstest000001")
    database.Close()
    // Create .data file with segments
    segments := []*datafile.Segment{
    {
    StartTime: 0.1,
    EndTime: 0.5,
    Labels: []*datafile.Label{
    {Species: "Kiwi", Certainty: 90, Filter: "kiwi.txt"},
    },
    },
    }
    _ = createTestDataFile(t, wavPath, segments)
    // Create mapping file
    mappingPath := createTestMappingFile(t, tmpDir)
    // Import segments
    output, err := ImportSegments(ctx, ImportSegmentsInput{
    DBPath: dbPath,
    Folder: tmpDir,
    Mapping: mappingPath,
    DatasetID: "dstest000001",
    LocationID: "loctest00001",
    ClusterID: "cltest000001",
    })
    if err != nil {
    t.Fatalf("ImportSegments failed: %v", err)
    }
    // Verify output
    if output.Summary.DataFilesFound != 1 {
    t.Errorf("expected 1 data file found, got %d", output.Summary.DataFilesFound)
    }
    if output.Summary.ImportedSegments != 1 {
    t.Errorf("expected 1 imported segment, got %d", output.Summary.ImportedSegments)
    }
    if output.Summary.ImportedLabels != 1 {
    t.Errorf("expected 1 imported label, got %d", output.Summary.ImportedLabels)
    }
    // Verify segment in database
    database, err = sql.Open("duckdb", dbPath)
    if err != nil {
    t.Fatalf("failed to open database for verification: %v", err)
    }
    defer database.Close()
    var segmentCount int
    err = database.QueryRow("SELECT COUNT(*) FROM segment WHERE active = true").Scan(&segmentCount)
    if err != nil {
    t.Fatalf("failed to query segments: %v", err)
    }
    if segmentCount != 1 {
    t.Errorf("expected 1 segment in database, got %d", segmentCount)
    }
    var labelCount int
    err = database.QueryRow("SELECT COUNT(*) FROM label WHERE active = true").Scan(&labelCount)
    if err != nil {
    t.Fatalf("failed to query labels: %v", err)
    }
    if labelCount != 1 {
    t.Errorf("expected 1 label in database, got %d", labelCount)
    }
    })
    t.Run("file not in database - error", func(t *testing.T) {
    dbPath := setupFileBasedTestDB(t)
    // Create temp folder with a WAV file and .data file
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.wav")
    createTestWAV(t, wavPath)
    // Create .data file
    segments := []*datafile.Segment{
    {StartTime: 0.1, EndTime: 0.5, Labels: []*datafile.Label{{Species: "Kiwi", Certainty: 90, Filter: "kiwi.txt"}}},
    }
    createTestDataFile(t, wavPath, segments)
    // Create mapping file
    mappingPath := createTestMappingFile(t, tmpDir)
    // Import segments - should fail because file not in DB
    output, err := ImportSegments(ctx, ImportSegmentsInput{
    DBPath: dbPath,
    Folder: tmpDir,
    Mapping: mappingPath,
    DatasetID: "dstest000001",
    LocationID: "loctest00001",
    ClusterID: "cltest000001",
    })
    if err != nil {
    t.Fatalf("ImportSegments should not return error: %v", err)
    }
    // Should have errors about file not found
    if len(output.Errors) == 0 {
    t.Error("expected errors for file not in database")
    }
    })
    t.Run("no .data files - error", func(t *testing.T) {
    dbPath := setupFileBasedTestDB(t)
    tmpDir := t.TempDir()
    mappingPath := createTestMappingFile(t, tmpDir)
    _, err := ImportSegments(ctx, ImportSegmentsInput{
    DBPath: dbPath,
    Folder: tmpDir,
    Mapping: mappingPath,
    DatasetID: "dstest000001",
    LocationID: "loctest00001",
    ClusterID: "cltest000001",
    })
    if err == nil {
    t.Error("expected error for no .data files")
    }
    })
    t.Run("invalid dataset ID", func(t *testing.T) {
    dbPath := setupFileBasedTestDB(t)
    tmpDir := t.TempDir()
    _, err := ImportSegments(ctx, ImportSegmentsInput{
    DBPath: dbPath,
    Folder: tmpDir,
    Mapping: filepath.Join(tmpDir, "mapping.json"),
    DatasetID: "invalid_id",
    LocationID: "loctest00001",
    ClusterID: "cltest000001",
    })
    if err == nil {
    t.Error("expected error for invalid dataset ID")
    }
    })
    }
    func TestWriteIDsToDataFiles(t *testing.T) {
    t.Run("writes hash and label IDs to .data file", func(t *testing.T) {
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.wav")
    createTestWAV(t, wavPath)
    // Create .data file
    segments := []*datafile.Segment{
    {
    StartTime: 0.1,
    EndTime: 0.5,
    Labels: []*datafile.Label{
    {Species: "Kiwi", Certainty: 90, Filter: "kiwi.txt"},
    },
    },
    }
    dataPath := createTestDataFile(t, wavPath, segments)
    // Simulate file updates
    updates := []dataFileUpdate{
    {
    DataPath: dataPath,
    WavHash: "test_hash_123",
    LabelIDs: map[int]map[int]string{
    0: {0: "label_id_001"},
    },
    },
    }
    // Write IDs
    errors := writeIDsToDataFiles(updates)
    if len(errors) > 0 {
    t.Errorf("unexpected errors: %v", errors)
    }
    // Read back and verify
    df, err := datafile.ParseDataFile(dataPath)
    if err != nil {
    t.Fatalf("failed to parse .data file: %v", err)
    }
    // Check hash in metadata
    if df.Meta.Extra == nil {
    t.Fatal("expected extra metadata")
    }
    if df.Meta.Extra["skraak_hash"] != "test_hash_123" {
    t.Errorf("expected skraak_hash, got %v", df.Meta.Extra["skraak_hash"])
    }
    // Check label ID
    if len(df.Segments) == 0 || len(df.Segments[0].Labels) == 0 {
    t.Fatal("expected segments and labels")
    }
    label := df.Segments[0].Labels[0]
    if label.Extra == nil || label.Extra["skraak_label_id"] != "label_id_001" {
    t.Errorf("expected skraak_label_id, got %v", label.Extra)
    }
    })
    t.Run("handles non-existent file", func(t *testing.T) {
    updates := []dataFileUpdate{
    {
    DataPath: "/nonexistent/path/test.data",
    WavHash: "test_hash",
    LabelIDs: map[int]map[int]string{},
    },
    }
    errors := writeIDsToDataFiles(updates)
    if len(errors) == 0 {
    t.Error("expected error for non-existent file")
    }
    })
    }
    func TestImportSegmentsIntoDB(t *testing.T) {
    ctx := context.Background()
    database := setupImportTestDB(t)
    defer database.Close()
    t.Run("imports segments within transaction", func(t *testing.T) {
    // Create temp WAV file
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.wav")
    hash := createTestWAV(t, wavPath)
    // Insert file into database
    fileID := "fitestimp001"
    db.InsertTestFileForCluster(t, database, fileID, "cltest000001", "loctest00001", "test.wav", hash, 1.0)
    mustExec(t, database, "INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified) VALUES (?, ?, now(), now())", fileID, "dstest000001")
    // Create scanned file with segment
    scanned := scannedDataFile{
    DataPath: wavPath + ".data",
    WavPath: wavPath,
    WavHash: hash,
    FileID: fileID,
    Duration: 1.0,
    Segments: []*datafile.Segment{
    {
    StartTime: 0.1,
    EndTime: 0.5,
    Labels: []*datafile.Label{
    {Species: "Kiwi", Certainty: 90, Filter: "kiwi.txt"},
    },
    },
    },
    }
    fileIDMap := map[string]scannedDataFile{fileID: scanned}
    mapping := MappingFile{"Kiwi": {Species: "Kiwi"}}
    filterIDMap := map[string]string{"kiwi.txt": "fitest000001"}
    speciesIDMap := map[string]string{"Kiwi": "sptest000001"}
    calltypeIDMap := map[string]map[string]string{}
    segments, labels, subtypes, updates, errors := importSegmentsIntoDB(
    ctx, database, fileIDMap, []scannedDataFile{scanned},
    mapping, filterIDMap, speciesIDMap, calltypeIDMap,
    "dstest000001", nil,
    )
    if len(errors) > 0 {
    t.Errorf("unexpected errors: %v", errors)
    }
    if len(segments) != 1 {
    t.Errorf("expected 1 segment, got %d", len(segments))
    }
    if labels != 1 {
    t.Errorf("expected 1 label, got %d", labels)
    }
    if subtypes != 0 {
    t.Errorf("expected 0 subtypes, got %d", subtypes)
    }
    if len(updates) != 1 {
    t.Errorf("expected 1 update, got %d", len(updates))
    }
    })
    }
  • file addition: import_files_test.go (----------)
    [6.1]
    package imp
    import (
    "context"
    "database/sql"
    "path/filepath"
    "testing"
    "skraak/db"
    )
    func TestImportAudioFiles(t *testing.T) {
    ctx := context.Background()
    t.Run("happy path - import single WAV file", func(t *testing.T) {
    dbPath := setupFileBasedTestDB(t)
    // Don't keep database open - ImportAudioFiles manages its own connections
    // Create temp folder with a WAV file
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test_recording.wav")
    hash := createTestWAV(t, wavPath)
    // Import
    output, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
    DBPath: dbPath,
    FolderPath: tmpDir,
    DatasetID: "dstest000001",
    LocationID: "loctest00001",
    ClusterID: "cltest000001",
    Recursive: new(true),
    })
    if err != nil {
    t.Fatalf("ImportAudioFiles failed: %v", err)
    }
    // Verify output
    if output.Summary.TotalFiles != 1 {
    t.Errorf("expected 1 total file, got %d", output.Summary.TotalFiles)
    }
    if output.Summary.ImportedFiles != 1 {
    t.Errorf("expected 1 imported file, got %d", output.Summary.ImportedFiles)
    }
    if output.Summary.SkippedFiles != 0 {
    t.Errorf("expected 0 skipped files, got %d", output.Summary.SkippedFiles)
    }
    if len(output.Errors) != 0 {
    t.Errorf("unexpected errors: %v", output.Errors)
    }
    // Verify file was inserted into database - open new connection
    database, err := sql.Open("duckdb", dbPath)
    if err != nil {
    t.Fatalf("failed to open database for verification: %v", err)
    }
    defer database.Close()
    var fileCount int
    err = database.QueryRow("SELECT COUNT(*) FROM file WHERE xxh64_hash = ? AND active = true", hash).Scan(&fileCount)
    if err != nil {
    t.Fatalf("failed to query file: %v", err)
    }
    if fileCount != 1 {
    t.Errorf("expected 1 file in database, got %d", fileCount)
    }
    // Verify file_dataset link
    var linkCount int
    err = database.QueryRow(`
    SELECT COUNT(*) FROM file_dataset fd
    JOIN file f ON fd.file_id = f.id
    WHERE f.xxh64_hash = ? AND fd.dataset_id = 'dstest000001'
    `, hash).Scan(&linkCount)
    if err != nil {
    t.Fatalf("failed to query file_dataset: %v", err)
    }
    if linkCount != 1 {
    t.Errorf("expected 1 file_dataset link, got %d", linkCount)
    }
    })
    t.Run("duplicate handling - skip file with existing hash", func(t *testing.T) {
    dbPath := setupFileBasedTestDB(t)
    // Don't keep database open - ImportAudioFiles manages its own connections
    // Create temp folder with a WAV file
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test_recording.wav")
    hash := createTestWAV(t, wavPath)
    // First import
    _, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
    DBPath: dbPath,
    FolderPath: tmpDir,
    DatasetID: "dstest000001",
    LocationID: "loctest00001",
    ClusterID: "cltest000001",
    Recursive: new(true),
    })
    if err != nil {
    t.Fatalf("first import failed: %v", err)
    }
    // Second import of same file (should be skipped as duplicate)
    output, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
    DBPath: dbPath,
    FolderPath: tmpDir,
    DatasetID: "dstest000001",
    LocationID: "loctest00001",
    ClusterID: "cltest000001",
    Recursive: new(true),
    })
    if err != nil {
    t.Fatalf("second import failed: %v", err)
    }
    // Verify output
    if output.Summary.TotalFiles != 1 {
    t.Errorf("expected 1 total file, got %d", output.Summary.TotalFiles)
    }
    if output.Summary.ImportedFiles != 0 {
    t.Errorf("expected 0 imported files (duplicate), got %d", output.Summary.ImportedFiles)
    }
    if output.Summary.SkippedFiles != 1 {
    t.Errorf("expected 1 skipped file (duplicate), got %d", output.Summary.SkippedFiles)
    }
    // Verify only one file in database (not duplicated) - open new connection
    database, err := sql.Open("duckdb", dbPath)
    if err != nil {
    t.Fatalf("failed to open database for verification: %v", err)
    }
    defer database.Close()
    var fileCount int
    err = database.QueryRow("SELECT COUNT(*) FROM file WHERE xxh64_hash = ? AND active = true", hash).Scan(&fileCount)
    if err != nil {
    t.Fatalf("failed to query file: %v", err)
    }
    if fileCount != 1 {
    t.Errorf("expected 1 file in database (not duplicated), got %d", fileCount)
    }
    })
    t.Run("invalid dataset ID", func(t *testing.T) {
    dbPath := setupFileBasedTestDB(t)
    tmpDir := t.TempDir()
    _, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
    DBPath: dbPath,
    FolderPath: tmpDir,
    DatasetID: "invalid_id",
    LocationID: "loctest00001",
    ClusterID: "cltest000001",
    })
    if err == nil {
    t.Error("expected error for invalid dataset ID")
    }
    })
    t.Run("folder does not exist", func(t *testing.T) {
    dbPath := setupFileBasedTestDB(t)
    _, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
    DBPath: dbPath,
    FolderPath: "/nonexistent/path",
    DatasetID: "dstest000001",
    LocationID: "loctest00001",
    ClusterID: "cltest000001",
    })
    if err == nil {
    t.Error("expected error for nonexistent folder")
    }
    })
    t.Run("unstructured dataset rejected", func(t *testing.T) {
    dbPath := setupFileBasedTestDB(t)
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.wav")
    createTestWAV(t, wavPath)
    _, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
    DBPath: dbPath,
    FolderPath: tmpDir,
    DatasetID: "dstest000002", // unstructured dataset
    LocationID: "loctest00001",
    ClusterID: "cltest000001",
    })
    if err == nil {
    t.Error("expected error for unstructured dataset")
    }
    })
    t.Run("inactive cluster rejected", func(t *testing.T) {
    dbPath := setupFileBasedTestDB(t)
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.wav")
    createTestWAV(t, wavPath)
    _, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
    DBPath: dbPath,
    FolderPath: tmpDir,
    DatasetID: "dstest000001",
    LocationID: "loctest00001",
    ClusterID: "cltest000002", // inactive cluster
    })
    if err == nil {
    t.Error("expected error for inactive cluster")
    }
    })
    }
    // boolPtr returns a pointer to the bool value.
    //
    //go:fix inline
    func boolPtr(v bool) *bool {
    return new(v)
    }
    // TestImportCluster tests the lower-level cluster import function.
    func TestImportCluster(t *testing.T) {
    ctx := context.Background()
    database := setupImportTestDB(t)
    defer database.Close()
    t.Run("happy path", func(t *testing.T) {
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.wav")
    hash := createTestWAV(t, wavPath)
    tx := beginTestTx(t, ctx, database)
    defer tx.Rollback()
    output, err := ImportCluster(database, tx, ClusterImportInput{
    FolderPath: tmpDir,
    DatasetID: "dstest000001",
    LocationID: "loctest00001",
    ClusterID: "cltest000001",
    Recursive: true,
    })
    if err != nil {
    t.Fatalf("ImportCluster failed: %v", err)
    }
    if output.TotalFiles != 1 {
    t.Errorf("expected 1 total file, got %d", output.TotalFiles)
    }
    if output.ImportedFiles != 1 {
    t.Errorf("expected 1 imported file, got %d", output.ImportedFiles)
    }
    // Commit and verify
    if err := tx.Commit(); err != nil {
    t.Fatalf("commit failed: %v", err)
    }
    var count int
    if err := database.QueryRow("SELECT COUNT(*) FROM file WHERE xxh64_hash = ? AND active = true", hash).Scan(&count); err != nil {
    t.Fatalf("query failed: %v", err)
    }
    if count != 1 {
    t.Errorf("expected 1 file in database, got %d", count)
    }
    })
    t.Run("empty folder returns empty output", func(t *testing.T) {
    tmpDir := t.TempDir()
    tx := beginTestTx(t, ctx, database)
    defer tx.Rollback()
    output, err := ImportCluster(database, tx, ClusterImportInput{
    FolderPath: tmpDir,
    DatasetID: "dstest000001",
    LocationID: "loctest00001",
    ClusterID: "cltest000001",
    Recursive: true,
    })
    if err != nil {
    t.Fatalf("ImportCluster failed: %v", err)
    }
    if output.TotalFiles != 0 {
    t.Errorf("expected 0 total files, got %d", output.TotalFiles)
    }
    })
    }
    // TestCheckDuplicateHash tests the duplicate hash checking function.
    func TestCheckDuplicateHash(t *testing.T) {
    database := setupImportTestDB(t)
    defer database.Close()
    t.Run("no duplicate found", func(t *testing.T) {
    id, isDupe, err := CheckDuplicateHash(database, "nonexistent_hash_12345")
    if err != nil {
    t.Fatalf("CheckDuplicateHash failed: %v", err)
    }
    if isDupe {
    t.Error("expected no duplicate for nonexistent hash")
    }
    if id != "" {
    t.Errorf("expected empty id, got %q", id)
    }
    })
    t.Run("duplicate found", func(t *testing.T) {
    // Insert a file with a known hash
    const testHash = "dup_test_hash_001"
    db.InsertTestFileForCluster(t, database, "fitest000003", "cltest000001", "loctest00001", "dup_test.wav", testHash, 1.0)
    id, isDupe, err := CheckDuplicateHash(database, testHash)
    if err != nil {
    t.Fatalf("CheckDuplicateHash failed: %v", err)
    }
    if !isDupe {
    t.Error("expected duplicate to be found")
    }
    if id != "fitest000003" {
    t.Errorf("expected id fitest000003, got %q", id)
    }
    })
    }
    // TestEnsureClusterPath tests the cluster path setting function.
    func TestEnsureClusterPath(t *testing.T) {
    database := setupImportTestDB(t)
    defer database.Close()
    t.Run("set empty path", func(t *testing.T) {
    // Create a cluster with empty path
    db.InsertTestCluster(t, database, "clptest00001", "dstest000001", "loctest00001", "Path Test")
    mustExec(t, database, "UPDATE cluster SET path = NULL WHERE id = 'clptest00001'")
    err := EnsureClusterPath(database, "clptest00001", "/test/path")
    if err != nil {
    t.Fatalf("EnsureClusterPath failed: %v", err)
    }
    var path string
    if err := database.QueryRow("SELECT path FROM cluster WHERE id = 'clptest00001'").Scan(&path); err != nil {
    t.Fatalf("query failed: %v", err)
    }
    // Path is normalized by utils.NormalizeFolderPath which may remove leading slash
    if path == "" {
    t.Error("expected path to be set")
    }
    })
    t.Run("do not overwrite existing path", func(t *testing.T) {
    // cltest000001 already has a path from setup
    err := EnsureClusterPath(database, "cltest000001", "/new/path")
    if err != nil {
    t.Fatalf("EnsureClusterPath failed: %v", err)
    }
    var path string
    if err := database.QueryRow("SELECT path FROM cluster WHERE id = 'cltest000001'").Scan(&path); err != nil {
    t.Fatalf("query failed: %v", err)
    }
    // Path should NOT have been changed
    if path == "/new/path" {
    t.Error("path should not have been overwritten")
    }
    })
    }
  • file addition: bulk_file_import_test.go (----------)
    [6.1]
    package imp
    import (
    "context"
    "database/sql"
    "os"
    "path/filepath"
    "testing"
    )
    func TestBulkFileImport(t *testing.T) {
    ctx := context.Background()
    t.Run("happy path - import from CSV", func(t *testing.T) {
    dbPath := setupFileBasedTestDB(t)
    // Create temp folder with WAV files
    tmpDir := t.TempDir()
    wavDir := filepath.Join(tmpDir, "recordings")
    if err := os.Mkdir(wavDir, 0755); err != nil {
    t.Fatalf("failed to create wav dir: %v", err)
    }
    // Create a WAV file
    wavPath := filepath.Join(wavDir, "test_recording.wav")
    createTestWAV(t, wavPath)
    // Create CSV file
    csvPath := createTestCSVFile(t, tmpDir, [][]string{
    {"Test Location", "loctest00001", wavDir, "2024-01", "16000", "1"},
    })
    // Create log file
    logPath := createTestLogFile(t, tmpDir)
    // Import
    output, err := BulkFileImport(ctx, BulkFileImportInput{
    DBPath: dbPath,
    DatasetID: "dstest000001",
    CSVPath: csvPath,
    LogFilePath: logPath,
    })
    if err != nil {
    t.Fatalf("BulkFileImport failed: %v", err)
    }
    // Verify output
    if output.TotalLocations != 1 {
    t.Errorf("expected 1 location, got %d", output.TotalLocations)
    }
    if output.TotalFilesScanned == 0 {
    t.Error("expected some files scanned")
    }
    if output.FilesImported == 0 {
    t.Error("expected some files imported")
    }
    // Verify cluster was created
    database, err := sql.Open("duckdb", dbPath)
    if err != nil {
    t.Fatalf("failed to open database: %v", err)
    }
    defer database.Close()
    var clusterCount int
    err = database.QueryRow("SELECT COUNT(*) FROM cluster WHERE active = true").Scan(&clusterCount)
    if err != nil {
    t.Fatalf("failed to query clusters: %v", err)
    }
    if clusterCount < 1 {
    t.Errorf("expected at least 1 cluster, got %d", clusterCount)
    }
    })
    t.Run("missing CSV file - error", func(t *testing.T) {
    dbPath := setupFileBasedTestDB(t)
    tmpDir := t.TempDir()
    logPath := createTestLogFile(t, tmpDir)
    _, err := BulkFileImport(ctx, BulkFileImportInput{
    DBPath: dbPath,
    DatasetID: "dstest000001",
    CSVPath: filepath.Join(tmpDir, "nonexistent.csv"),
    LogFilePath: logPath,
    })
    if err == nil {
    t.Error("expected error for missing CSV file")
    }
    })
    t.Run("invalid dataset ID", func(t *testing.T) {
    dbPath := setupFileBasedTestDB(t)
    tmpDir := t.TempDir()
    csvPath := createTestCSVFile(t, tmpDir, [][]string{})
    logPath := createTestLogFile(t, tmpDir)
    _, err := BulkFileImport(ctx, BulkFileImportInput{
    DBPath: dbPath,
    DatasetID: "invalid_id",
    CSVPath: csvPath,
    LogFilePath: logPath,
    })
    if err == nil {
    t.Error("expected error for invalid dataset ID")
    }
    })
    t.Run("location not in dataset - error", func(t *testing.T) {
    dbPath := setupFileBasedTestDB(t)
    tmpDir := t.TempDir()
    wavDir := filepath.Join(tmpDir, "recordings")
    if err := os.Mkdir(wavDir, 0755); err != nil {
    t.Fatalf("failed to create wav dir: %v", err)
    }
    createTestWAV(t, filepath.Join(wavDir, "test.wav"))
    // Use invalid location ID
    csvPath := createTestCSVFile(t, tmpDir, [][]string{
    {"Test Location", "invalid_loc!", wavDir, "2024-01", "16000", "1"},
    })
    logPath := createTestLogFile(t, tmpDir)
    _, err := BulkFileImport(ctx, BulkFileImportInput{
    DBPath: dbPath,
    DatasetID: "dstest000001",
    CSVPath: csvPath,
    LogFilePath: logPath,
    })
    if err == nil {
    t.Error("expected error for location not in dataset")
    }
    })
    }
    func TestParseBulkCSVRow(t *testing.T) {
    t.Run("valid row", func(t *testing.T) {
    row := []string{"Test Loc", "loctest00001", "/path/to/dir", "2024-01", "16000", "10"}
    result, err := parseBulkCSVRow(row)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if result.LocationName != "Test Loc" {
    t.Errorf("expected Test Loc, got %s", result.LocationName)
    }
    if result.LocationID != "loctest00001" {
    t.Errorf("expected loctest00001, got %s", result.LocationID)
    }
    if result.SampleRate != 16000 {
    t.Errorf("expected 16000, got %d", result.SampleRate)
    }
    if result.FileCount != 10 {
    t.Errorf("expected 10, got %d", result.FileCount)
    }
    })
    t.Run("insufficient columns", func(t *testing.T) {
    row := []string{"a", "b", "c"}
    _, err := parseBulkCSVRow(row)
    if err == nil {
    t.Error("expected error for insufficient columns")
    }
    })
    t.Run("empty location_name", func(t *testing.T) {
    row := []string{"", "loctest00001", "/path", "2024-01", "16000", "10"}
    _, err := parseBulkCSVRow(row)
    if err == nil {
    t.Error("expected error for empty location_name")
    }
    })
    t.Run("empty directory_path", func(t *testing.T) {
    row := []string{"Test Loc", "loctest00001", "", "2024-01", "16000", "10"}
    _, err := parseBulkCSVRow(row)
    if err == nil {
    t.Error("expected error for empty directory_path")
    }
    })
    t.Run("invalid sample_rate", func(t *testing.T) {
    row := []string{"Test Loc", "loctest00001", "/path", "2024-01", "notanumber", "10"}
    _, err := parseBulkCSVRow(row)
    if err == nil {
    t.Error("expected error for invalid sample_rate")
    }
    })
    t.Run("invalid location_id format", func(t *testing.T) {
    row := []string{"Test Loc", "badid", "/path", "2024-01", "16000", "10"}
    _, err := parseBulkCSVRow(row)
    if err == nil {
    t.Error("expected error for invalid location_id format")
    }
    })
    }
    func TestBulkReadCSV(t *testing.T) {
    t.Run("reads valid CSV", func(t *testing.T) {
    tmpDir := t.TempDir()
    csvPath := createTestCSVFile(t, tmpDir, [][]string{
    {"Loc1", "loctest00001", "/path1", "2024-01", "16000", "10"},
    {"Loc2", "loctest00002", "/path2", "2024-02", "48000", "20"},
    })
    locations, err := bulkReadCSV(csvPath)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if len(locations) != 2 {
    t.Errorf("expected 2 locations, got %d", len(locations))
    }
    })
    t.Run("empty CSV - error", func(t *testing.T) {
    tmpDir := t.TempDir()
    csvPath := filepath.Join(tmpDir, "empty.csv")
    // Write completely empty file (no header either)
    if err := os.WriteFile(csvPath, []byte(""), 0644); err != nil {
    t.Fatalf("failed to write CSV: %v", err)
    }
    _, err := bulkReadCSV(csvPath)
    if err == nil {
    t.Error("expected error for empty CSV")
    }
    })
    }