JMDW37LVYJAWULK4RREBRC6OLNVMA5PO5TLN6JIZCTZELL7YF4MQC package impimport ("context""database/sql""encoding/json""fmt""os""path/filepath""testing""time""skraak/datafile""skraak/db""skraak/utils")// setupImportTestDB creates an in-memory DuckDB with the full schema and test data.//// Test data:// - Structured dataset: dstest000001// - Unstructured dataset: dstest000002// - Location (active): loctest00001 (in dstest000001)// - Location (inactive): loctest00002 (in dstest000001)// - Cluster (active): cltest000001 (in loctest00001)// - Cluster (inactive): cltest000002 (in loctest00001)// - Species: Kiwi (sptest000001), Roroa (sptest000002)// - Calltypes: Kiwi/song (cttest000001), Kiwi/duet (cttest000002)// - Filters: kiwi.txt (fitest000001), test.txt (fitest000002)func setupImportTestDB(t *testing.T) *sql.DB {t.Helper()database := db.SetupTestDB(t)// Datasetsdb.InsertTestDatasetWithType(t, database, "dstest000001", "Test Structured", "structured")db.InsertTestDatasetWithType(t, database, "dstest000002", "Test Unstructured", "unstructured")// Locationsdb.InsertTestLocation(t, database, "loctest00001", "dstest000001", "Test Location Active")db.InsertTestLocation(t, database, "loctest00002", "dstest000001", "Test Location Inactive")mustExec(t, database, "UPDATE location SET active = false WHERE id = 'loctest00002'")// Clustersdb.InsertTestCluster(t, database, "cltest000001", "dstest000001", "loctest00001", "Test Cluster Active")db.InsertTestCluster(t, database, "cltest000002", "dstest000001", "loctest00001", "Test Cluster Inactive")mustExec(t, database, "UPDATE cluster SET active = false WHERE id = 'cltest000002'")// Speciesdb.InsertTestSpecies(t, database, "sptest000001", "Kiwi")db.InsertTestSpecies(t, database, "sptest000002", "Roroa")// Calltypesdb.InsertTestCallType(t, database, "cttest000001", "sptest000001", "song")db.InsertTestCallType(t, database, "cttest000002", "sptest000001", "duet")// Filtersdb.InsertTestFilter(t, database, "fitest000001", "kiwi.txt")db.InsertTestFilter(t, database, "fitest000002", "test.txt")return database}// setupFileBasedTestDB creates a file-based DuckDB for tests that need to// open multiple connections to the same database (e.g., ImportAudioFiles).// Returns the path to the database file. The database is closed after setup.func setupFileBasedTestDB(t *testing.T) string {t.Helper()// Create temp file for databasetmpDir := t.TempDir()dbPath := filepath.Join(tmpDir, "test.duckdb")// Open databasedatabase, err := sql.Open("duckdb", dbPath)if err != nil {t.Fatalf("failed to open database: %v", err)}// Apply schemaschema, err := db.ReadSchemaSQL()if err != nil {database.Close()t.Fatalf("failed to read schema: %v", err)}if _, err = database.Exec(schema); err != nil {database.Close()t.Fatalf("failed to create schema: %v", err)}// Insert test data - same as setupImportTestDBdb.InsertTestDatasetWithType(t, database, "dstest000001", "Test Structured", "structured")db.InsertTestDatasetWithType(t, database, "dstest000002", "Test Unstructured", "unstructured")db.InsertTestLocation(t, database, "loctest00001", "dstest000001", "Test Location Active")db.InsertTestLocation(t, database, "loctest00002", "dstest000001", "Test Location Inactive")mustExec(t, database, "UPDATE location SET active = false WHERE id = 'loctest00002'")db.InsertTestCluster(t, database, "cltest000001", "dstest000001", "loctest00001", "Test Cluster Active")db.InsertTestCluster(t, database, "cltest000002", "dstest000001", "loctest00001", "Test Cluster Inactive")mustExec(t, database, "UPDATE cluster SET active = false WHERE id = 'cltest000002'")db.InsertTestSpecies(t, database, "sptest000001", "Kiwi")db.InsertTestSpecies(t, database, "sptest000002", "Roroa")db.InsertTestCallType(t, database, "cttest000001", "sptest000001", "song")db.InsertTestCallType(t, database, "cttest000002", "sptest000001", "duet")db.InsertTestFilter(t, database, "fitest000001", "kiwi.txt")db.InsertTestFilter(t, database, "fitest000002", "test.txt")// Close the database so tests can open their own connectionsdatabase.Close()return dbPath}// mustExec executes a SQL statement, failing the test on error.func mustExec(t *testing.T, database *sql.DB, query string, args ...any) {t.Helper()if _, err := database.Exec(query, args...); err != nil {t.Fatalf("exec: %v", err)}}// createTestWAV creates a minimal valid WAV file at the given path.// Returns the XXH64 hash of the file.func createTestWAV(t *testing.T, path string) string {t.Helper()// Create a 1-second WAV file at 16kHz mono 16-bit// 44-byte header + 32000 bytes of data (16000 samples * 2 bytes)const sampleRate = 16000const numSamples = sampleRate // 1 secondconst dataSize = numSamples * 2 // 2 bytes per sampleconst fileSize = 44 + dataSize - 8data := make([]byte, 44+dataSize)// RIFF headercopy(data[0:4], "RIFF")data[4] = byte(fileSize & 0xFF)data[5] = byte((fileSize >> 8) & 0xFF)data[6] = byte((fileSize >> 16) & 0xFF)data[7] = byte((fileSize >> 24) & 0xFF)copy(data[8:12], "WAVE")// fmt chunkcopy(data[12:16], "fmt ")data[16] = 16 // fmt chunk sizedata[20] = 1 // PCM formatdata[22] = 1 // monodata[24] = byte(sampleRate & 0xFF)data[25] = byte((sampleRate >> 8) & 0xFF)const byteRate = sampleRate * 2data[28] = byte(byteRate & 0xFF)data[29] = byte((byteRate >> 8) & 0xFF)data[32] = 2 // block aligndata[34] = 16 // bits per sample// data chunkcopy(data[36:40], "data")data[40] = byte(dataSize & 0xFF)data[41] = byte((dataSize >> 8) & 0xFF)data[42] = byte((dataSize >> 16) & 0xFF)data[43] = byte((dataSize >> 24) & 0xFF)// Audio data is already zerosif err := os.WriteFile(path, data, 0644); err != nil {t.Fatalf("failed to create test WAV: %v", err)}hash, err := utils.ComputeXXH64(path)if err != nil {t.Fatalf("failed to compute hash: %v", err)}return hash}// createTestWAVWithMetadata creates a WAV file and inserts it into the database.// Returns the file ID and hash.func createTestWAVWithMetadata(t *testing.T, database *sql.DB, clusterID, locationID, filename string) (fileID, hash string) {t.Helper()// Create temp filetmpDir := t.TempDir()wavPath := filepath.Join(tmpDir, filename)hash = createTestWAV(t, wavPath)// Generate file IDfileID, err := utils.GenerateLongID()if err != nil {t.Fatalf("failed to generate file ID: %v", err)}// Insert file record_, err = database.ExecContext(context.Background(), `INSERT INTO file (id, file_name, xxh64_hash, location_id, cluster_id, timestamp_local, duration, sample_rate, active)VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP, 0.0005, 16000, true)`, fileID, filename, hash, locationID, clusterID)if err != nil {t.Fatalf("failed to insert file: %v", err)}return fileID, hash}// createTestDataFile creates a minimal .data file with the given segments.func createTestDataFile(t *testing.T, wavPath string, segments []*datafile.Segment) string {t.Helper()dataPath := wavPath + ".data"df := &datafile.DataFile{Meta: &datafile.DataMeta{Operator: "test",Duration: 0.0005,},Segments: segments,}if err := df.Write(dataPath); err != nil {t.Fatalf("failed to write test .data file: %v", err)}return dataPath}// createTestMappingFile creates a minimal mapping.json file.func createTestMappingFile(t *testing.T, dir string) string {t.Helper()mapping := map[string]any{"Kiwi": map[string]any{"species": "Kiwi","calltypes": map[string]string{"song": "song","duet": "duet",},},"Roroa": map[string]any{"species": "Roroa",},}data, err := json.Marshal(mapping)if err != nil {t.Fatalf("failed to marshal mapping: %v", err)}path := filepath.Join(dir, "mapping.json")if err := os.WriteFile(path, data, 0644); err != nil {t.Fatalf("failed to write mapping file: %v", err)}return path}// createTestCSVFile creates a CSV file for bulk import testing.// Columns: location_name, location_id, directory_path, date_range, sample_rate, file_countfunc createTestCSVFile(t *testing.T, dir string, rows [][]string) string {t.Helper()path := filepath.Join(dir, "import.csv")file, err := os.Create(path)if err != nil {t.Fatalf("failed to create CSV: %v", err)}defer file.Close()// Write headerif _, err := file.WriteString("location_name,location_id,directory_path,date_range,sample_rate,file_count\n"); err != nil {t.Fatalf("failed to write CSV header: %v", err)}// Write rowsfor _, row := range rows {line := fmt.Sprintf("%s,%s,%s,%s,%s,%s\n", row[0], row[1], row[2], row[3], row[4], row[5])if _, err := file.WriteString(line); err != nil {t.Fatalf("failed to write CSV row: %v", err)}}return path}// createTestLogFile creates a log file path for bulk import testing.func createTestLogFile(t *testing.T, dir string) string {t.Helper()path := filepath.Join(dir, "import.log")// Create empty fileif err := os.WriteFile(path, []byte{}, 0644); err != nil {t.Fatalf("failed to create log file: %v", err)}return path}// assertFileCount queries the database and asserts the expected number of files.func assertFileCount(t *testing.T, database *sql.DB, expected int) {t.Helper()var count intif err := database.QueryRow("SELECT COUNT(*) FROM file WHERE active = true").Scan(&count); err != nil {t.Fatalf("failed to count files: %v", err)}if count != expected {t.Errorf("expected %d files, got %d", expected, count)}}// assertSegmentCount queries the database and asserts the expected number of segments.func assertSegmentCount(t *testing.T, database *sql.DB, expected int) {t.Helper()var count intif err := database.QueryRow("SELECT COUNT(*) FROM segment WHERE active = true").Scan(&count); err != nil {t.Fatalf("failed to count segments: %v", err)}if count != expected {t.Errorf("expected %d segments, got %d", expected, count)}}// assertLabelCount queries the database and asserts the expected number of labels.func assertLabelCount(t *testing.T, database *sql.DB, expected int) {t.Helper()var count intif err := database.QueryRow("SELECT COUNT(*) FROM label WHERE active = true").Scan(&count); err != nil {t.Fatalf("failed to count labels: %v", err)}if count != expected {t.Errorf("expected %d labels, got %d", expected, count)}}// getTestLocationData returns location data for testing.func getTestLocationData(t *testing.T, database *sql.DB, locationID string) *LocationData {t.Helper()data, err := GetLocationData(database, locationID)if err != nil {t.Fatalf("failed to get location data: %v", err)}return data}// beginTestTx begins a logged transaction for testing.func beginTestTx(t *testing.T, ctx context.Context, database *sql.DB) *db.LoggedTx {t.Helper()tx, err := db.BeginLoggedTx(ctx, database, "test")if err != nil {t.Fatalf("failed to begin transaction: %v", err)}return tx}// waitForAsync waits for a short duration to allow async operations to complete.func waitForAsync() {time.Sleep(100 * time.Millisecond)}
package impimport ("context""database/sql""os""path/filepath""testing")func TestImportUnstructured(t *testing.T) {ctx := context.Background()t.Run("happy path - import single WAV file", func(t *testing.T) {dbPath := setupFileBasedTestDB(t)// Create temp folder with a WAV filetmpDir := t.TempDir()wavPath := filepath.Join(tmpDir, "test_recording.wav")hash := createTestWAV(t, wavPath)// Import to unstructured datasetoutput, err := ImportUnstructured(ctx, ImportUnstructuredInput{DBPath: dbPath,DatasetID: "dstest000002", // unstructured datasetFolderPath: tmpDir,Recursive: new(true),})if err != nil {t.Fatalf("ImportUnstructured failed: %v", err)}// Verify outputif output.TotalFiles != 1 {t.Errorf("expected 1 total file, got %d", output.TotalFiles)}if output.ImportedFiles != 1 {t.Errorf("expected 1 imported file, got %d", output.ImportedFiles)}if output.SkippedFiles != 0 {t.Errorf("expected 0 skipped files, got %d", output.SkippedFiles)}if len(output.Errors) != 0 {t.Errorf("unexpected errors: %v", output.Errors)}// Verify file was inserted into databasedatabase, err := sql.Open("duckdb", dbPath)if err != nil {t.Fatalf("failed to open database for verification: %v", err)}defer database.Close()var fileCount interr = database.QueryRow("SELECT COUNT(*) FROM file WHERE xxh64_hash = ? AND active = true", hash).Scan(&fileCount)if err != nil {t.Fatalf("failed to query file: %v", err)}if fileCount != 1 {t.Errorf("expected 1 file in database, got %d", fileCount)}// Verify file_dataset linkvar linkCount interr = database.QueryRow(`SELECT COUNT(*) FROM file_dataset fdJOIN file f ON fd.file_id = f.idWHERE f.xxh64_hash = ? AND fd.dataset_id = 'dstest000002'`, hash).Scan(&linkCount)if err != nil {t.Fatalf("failed to query file_dataset: %v", err)}if linkCount != 1 {t.Errorf("expected 1 file_dataset link, got %d", linkCount)}// Verify location_id and cluster_id are NULL for unstructuredvar locID, clID sql.NullStringerr = database.QueryRow("SELECT location_id, cluster_id FROM file WHERE xxh64_hash = ?", hash).Scan(&locID, &clID)if err != nil {t.Fatalf("failed to query file: %v", err)}if locID.Valid {t.Errorf("expected NULL location_id for unstructured file, got %s", locID.String)}if clID.Valid {t.Errorf("expected NULL cluster_id for unstructured file, got %s", clID.String)}})t.Run("duplicate handling - skip file with existing hash", func(t *testing.T) {dbPath := setupFileBasedTestDB(t)// Create temp folder with a WAV filetmpDir := t.TempDir()wavPath := filepath.Join(tmpDir, "test_recording.wav")hash := createTestWAV(t, wavPath)// First import_, err := ImportUnstructured(ctx, ImportUnstructuredInput{DBPath: dbPath,DatasetID: "dstest000002",FolderPath: tmpDir,Recursive: new(true),})if err != nil {t.Fatalf("first import failed: %v", err)}// Second import of same file (should be skipped as duplicate)output, err := ImportUnstructured(ctx, ImportUnstructuredInput{DBPath: dbPath,DatasetID: "dstest000002",FolderPath: tmpDir,Recursive: new(true),})if err != nil {t.Fatalf("second import failed: %v", err)}// Verify outputif output.TotalFiles != 1 {t.Errorf("expected 1 total file, got %d", output.TotalFiles)}if output.ImportedFiles != 0 {t.Errorf("expected 0 imported files (duplicate), got %d", output.ImportedFiles)}if output.SkippedFiles != 1 {t.Errorf("expected 1 skipped file (duplicate), got %d", output.SkippedFiles)}// Verify only one file in database (not duplicated)database, err := sql.Open("duckdb", dbPath)if err != nil {t.Fatalf("failed to open database for verification: %v", err)}defer database.Close()var fileCount interr = database.QueryRow("SELECT COUNT(*) FROM file WHERE xxh64_hash = ? AND active = true", hash).Scan(&fileCount)if err != nil {t.Fatalf("failed to query file: %v", err)}if fileCount != 1 {t.Errorf("expected 1 file in database (not duplicated), got %d", fileCount)}})t.Run("empty folder returns empty output", func(t *testing.T) {dbPath := setupFileBasedTestDB(t)tmpDir := t.TempDir()output, err := ImportUnstructured(ctx, ImportUnstructuredInput{DBPath: dbPath,DatasetID: "dstest000002",FolderPath: tmpDir,Recursive: new(true),})if err != nil {t.Fatalf("ImportUnstructured failed: %v", err)}if output.TotalFiles != 0 {t.Errorf("expected 0 total files, got %d", output.TotalFiles)}})t.Run("structured dataset rejected", func(t *testing.T) {dbPath := setupFileBasedTestDB(t)tmpDir := t.TempDir()wavPath := filepath.Join(tmpDir, "test.wav")createTestWAV(t, wavPath)_, err := ImportUnstructured(ctx, ImportUnstructuredInput{DBPath: dbPath,DatasetID: "dstest000001", // structured datasetFolderPath: tmpDir,})if err == nil {t.Error("expected error for structured dataset")}})t.Run("invalid dataset ID", func(t *testing.T) {dbPath := setupFileBasedTestDB(t)tmpDir := t.TempDir()_, err := ImportUnstructured(ctx, ImportUnstructuredInput{DBPath: dbPath,DatasetID: "invalid_id",FolderPath: tmpDir,})if err == nil {t.Error("expected error for invalid dataset ID")}})t.Run("folder does not exist", func(t *testing.T) {dbPath := setupFileBasedTestDB(t)_, err := ImportUnstructured(ctx, ImportUnstructuredInput{DBPath: dbPath,DatasetID: "dstest000002",FolderPath: "/nonexistent/path",})if err == nil {t.Error("expected error for nonexistent folder")}})}func TestScanWavFiles(t *testing.T) {t.Run("finds WAV files", func(t *testing.T) {tmpDir := t.TempDir()wavPath := filepath.Join(tmpDir, "test.wav")createTestWAV(t, wavPath)files, errors := scanWavFiles(tmpDir, false)if len(errors) > 0 {t.Errorf("unexpected errors: %v", errors)}if len(files) != 1 {t.Errorf("expected 1 file, got %d", len(files))}})t.Run("case insensitive extension", func(t *testing.T) {tmpDir := t.TempDir()wavPath := filepath.Join(tmpDir, "test.WAV")createTestWAV(t, wavPath)files, errors := scanWavFiles(tmpDir, false)if len(errors) > 0 {t.Errorf("unexpected errors: %v", errors)}if len(files) != 1 {t.Errorf("expected 1 file, got %d", len(files))}})t.Run("non-recursive ignores subdirectories", func(t *testing.T) {tmpDir := t.TempDir()subDir := filepath.Join(tmpDir, "subdir")if err := os.Mkdir(subDir, 0755); err != nil {t.Fatalf("failed to create subdir: %v", err)}// Create WAV in both directoriescreateTestWAV(t, filepath.Join(tmpDir, "root.wav"))createTestWAV(t, filepath.Join(subDir, "sub.wav"))files, errors := scanWavFiles(tmpDir, false)if len(errors) > 0 {t.Errorf("unexpected errors: %v", errors)}if len(files) != 1 {t.Errorf("expected 1 file (non-recursive), got %d", len(files))}})t.Run("recursive finds all files", func(t *testing.T) {tmpDir := t.TempDir()subDir := filepath.Join(tmpDir, "subdir")if err := os.Mkdir(subDir, 0755); err != nil {t.Fatalf("failed to create subdir: %v", err)}// Create WAV in both directoriescreateTestWAV(t, filepath.Join(tmpDir, "root.wav"))createTestWAV(t, filepath.Join(subDir, "sub.wav"))files, errors := scanWavFiles(tmpDir, true)if len(errors) > 0 {t.Errorf("unexpected errors: %v", errors)}if len(files) != 2 {t.Errorf("expected 2 files (recursive), got %d", len(files))}})}
// setupImportTestDB creates a DB with the full schema + test data for import validation.//// dataset (structured): ds_imptest00001// dataset (unstructured): ds_imptest00002// location in ds1: loc_imptest0001// location (inactive): loc_imptest0002 (in ds1)// cluster in loc1: cl_imptest00001// cluster (inactive): cl_imptest00002 (in loc1)// species: Kiwi (sp_kiwi000000), Roroa (sp_roroa00000)// calltypes: Kiwi/song (ct_kiwi000001), Kiwi/duet (ct_kiwi000002)// filter: kiwi.txt (fi_kiwi0000001), tomtit.txt (fi_tomtit000001)func setupImportTestDB(t *testing.T) *sql.DB {t.Helper()database := db.SetupTestDB(t)db.InsertTestDatasetWithType(t, database, "ds_imptest00001", "Imp Structured", "structured")db.InsertTestDatasetWithType(t, database, "ds_imptest00002", "Imp Unstructured", "unstructured")db.InsertTestLocation(t, database, "loc_imptest0001", "ds_imptest00001", "Loc Active")db.InsertTestLocation(t, database, "loc_imptest0002", "ds_imptest00001", "Loc Inactive")mustExecImport(t, database, "UPDATE location SET active = false WHERE id = 'loc_imptest0002'")db.InsertTestCluster(t, database, "cl_imptest00001", "ds_imptest00001", "loc_imptest0001", "Cl Active")db.InsertTestCluster(t, database, "cl_imptest00002", "ds_imptest00001", "loc_imptest0001", "Cl Inactive")mustExecImport(t, database, "UPDATE cluster SET active = false WHERE id = 'cl_imptest00002'")db.InsertTestSpecies(t, database, "sp_kiwi000000", "Kiwi")db.InsertTestSpecies(t, database, "sp_roroa00000", "Roroa")db.InsertTestCallType(t, database, "ct_kiwi000001", "sp_kiwi000000", "song")db.InsertTestCallType(t, database, "ct_kiwi000002", "sp_kiwi000000", "duet")db.InsertTestFilter(t, database, "fi_kiwi0000001", "kiwi.txt")db.InsertTestFilter(t, database, "fi_tomtit000001", "tomtit.txt")return database}func mustExecImport(t *testing.T, database *sql.DB, query string, args ...any) {t.Helper()if _, err := database.Exec(query, args...); err != nil {t.Fatalf("exec: %v", err)}}
if calltypeMap["Kiwi"]["song"] != "ct_kiwi000001" {t.Errorf("Kiwi/song ID = %q, want ct_kiwi000001", calltypeMap["Kiwi"]["song"])
if calltypeMap["Kiwi"]["song"] != "cttest000001" {t.Errorf("Kiwi/song ID = %q, want cttest000001", calltypeMap["Kiwi"]["song"])
if calltypeMap["Kiwi"]["duet"] != "ct_kiwi000002" {t.Errorf("Kiwi/duet ID = %q, want ct_kiwi000002", calltypeMap["Kiwi"]["duet"])
if calltypeMap["Kiwi"]["duet"] != "cttest000002" {t.Errorf("Kiwi/duet ID = %q, want cttest000002", calltypeMap["Kiwi"]["duet"])
mustExecImport(t, database, "INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified) VALUES (?, ?, now(), now())", "fi_test00000001", "ds_imptest00001")
mustExec(t, database, "INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified) VALUES (?, ?, now(), now())", "fitest000004", "dstest000001")
}// createFakeWAV creates a minimal WAV file (44-byte header + 1 sample)func createFakeWAV(path string) error {// Minimal valid WAV: 44-byte header + 4 bytes of dataheader := make([]byte, 44)copy(header[0:4], "RIFF")copy(header[4:8], "\x24\x00\x00\x00") // file size - 8copy(header[8:12], "WAVE")copy(header[12:16], "fmt ")copy(header[16:20], "\x10\x00\x00\x00") // chunk sizecopy(header[20:22], "\x01\x00") // PCMcopy(header[22:24], "\x01\x00") // monocopy(header[24:28], "\x80\x3E\x00\x00") // 16000 sample ratecopy(header[28:32], "\x00\x7D\x00\x00") // byte ratecopy(header[32:34], "\x02\x00") // block aligncopy(header[34:36], "\x10\x00") // bits per samplecopy(header[36:40], "data")copy(header[40:44], "\x00\x00\x00\x00") // data sizereturn os.WriteFile(path, header, 0644)
package impimport ("context""database/sql""path/filepath""testing""skraak/datafile""skraak/db")func TestImportSegments(t *testing.T) {ctx := context.Background()t.Run("happy path - import segments from .data file", func(t *testing.T) {dbPath := setupFileBasedTestDB(t)// Create temp folder with a WAV file and .data filetmpDir := t.TempDir()wavPath := filepath.Join(tmpDir, "test.wav")hash := createTestWAV(t, wavPath)// Insert the file into database first (simulating pre-imported audio)database, err := sql.Open("duckdb", dbPath)if err != nil {t.Fatalf("failed to open database: %v", err)}db.InsertTestFileForCluster(t, database, "fitestseg001", "cltest000001", "loctest00001", "test.wav", hash, 1.0)mustExec(t, database, "INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified) VALUES (?, ?, now(), now())", "fitestseg001", "dstest000001")database.Close()// Create .data file with segmentssegments := []*datafile.Segment{{StartTime: 0.1,EndTime: 0.5,Labels: []*datafile.Label{{Species: "Kiwi", Certainty: 90, Filter: "kiwi.txt"},},},}_ = createTestDataFile(t, wavPath, segments)// Create mapping filemappingPath := createTestMappingFile(t, tmpDir)// Import segmentsoutput, err := ImportSegments(ctx, ImportSegmentsInput{DBPath: dbPath,Folder: tmpDir,Mapping: mappingPath,DatasetID: "dstest000001",LocationID: "loctest00001",ClusterID: "cltest000001",})if err != nil {t.Fatalf("ImportSegments failed: %v", err)}// Verify outputif output.Summary.DataFilesFound != 1 {t.Errorf("expected 1 data file found, got %d", output.Summary.DataFilesFound)}if output.Summary.ImportedSegments != 1 {t.Errorf("expected 1 imported segment, got %d", output.Summary.ImportedSegments)}if output.Summary.ImportedLabels != 1 {t.Errorf("expected 1 imported label, got %d", output.Summary.ImportedLabels)}// Verify segment in databasedatabase, err = sql.Open("duckdb", dbPath)if err != nil {t.Fatalf("failed to open database for verification: %v", err)}defer database.Close()var segmentCount interr = database.QueryRow("SELECT COUNT(*) FROM segment WHERE active = true").Scan(&segmentCount)if err != nil {t.Fatalf("failed to query segments: %v", err)}if segmentCount != 1 {t.Errorf("expected 1 segment in database, got %d", segmentCount)}var labelCount interr = database.QueryRow("SELECT COUNT(*) FROM label WHERE active = true").Scan(&labelCount)if err != nil {t.Fatalf("failed to query labels: %v", err)}if labelCount != 1 {t.Errorf("expected 1 label in database, got %d", labelCount)}})t.Run("file not in database - error", func(t *testing.T) {dbPath := setupFileBasedTestDB(t)// Create temp folder with a WAV file and .data filetmpDir := t.TempDir()wavPath := filepath.Join(tmpDir, "test.wav")createTestWAV(t, wavPath)// Create .data filesegments := []*datafile.Segment{{StartTime: 0.1, EndTime: 0.5, Labels: []*datafile.Label{{Species: "Kiwi", Certainty: 90, Filter: "kiwi.txt"}}},}createTestDataFile(t, wavPath, segments)// Create mapping filemappingPath := createTestMappingFile(t, tmpDir)// Import segments - should fail because file not in DBoutput, err := ImportSegments(ctx, ImportSegmentsInput{DBPath: dbPath,Folder: tmpDir,Mapping: mappingPath,DatasetID: "dstest000001",LocationID: "loctest00001",ClusterID: "cltest000001",})if err != nil {t.Fatalf("ImportSegments should not return error: %v", err)}// Should have errors about file not foundif len(output.Errors) == 0 {t.Error("expected errors for file not in database")}})t.Run("no .data files - error", func(t *testing.T) {dbPath := setupFileBasedTestDB(t)tmpDir := t.TempDir()mappingPath := createTestMappingFile(t, tmpDir)_, err := ImportSegments(ctx, ImportSegmentsInput{DBPath: dbPath,Folder: tmpDir,Mapping: mappingPath,DatasetID: "dstest000001",LocationID: "loctest00001",ClusterID: "cltest000001",})if err == nil {t.Error("expected error for no .data files")}})t.Run("invalid dataset ID", func(t *testing.T) {dbPath := setupFileBasedTestDB(t)tmpDir := t.TempDir()_, err := ImportSegments(ctx, ImportSegmentsInput{DBPath: dbPath,Folder: tmpDir,Mapping: filepath.Join(tmpDir, "mapping.json"),DatasetID: "invalid_id",LocationID: "loctest00001",ClusterID: "cltest000001",})if err == nil {t.Error("expected error for invalid dataset ID")}})}func TestWriteIDsToDataFiles(t *testing.T) {t.Run("writes hash and label IDs to .data file", func(t *testing.T) {tmpDir := t.TempDir()wavPath := filepath.Join(tmpDir, "test.wav")createTestWAV(t, wavPath)// Create .data filesegments := []*datafile.Segment{{StartTime: 0.1,EndTime: 0.5,Labels: []*datafile.Label{{Species: "Kiwi", Certainty: 90, Filter: "kiwi.txt"},},},}dataPath := createTestDataFile(t, wavPath, segments)// Simulate file updatesupdates := []dataFileUpdate{{DataPath: dataPath,WavHash: "test_hash_123",LabelIDs: map[int]map[int]string{0: {0: "label_id_001"},},},}// Write IDserrors := writeIDsToDataFiles(updates)if len(errors) > 0 {t.Errorf("unexpected errors: %v", errors)}// Read back and verifydf, err := datafile.ParseDataFile(dataPath)if err != nil {t.Fatalf("failed to parse .data file: %v", err)}// Check hash in metadataif df.Meta.Extra == nil {t.Fatal("expected extra metadata")}if df.Meta.Extra["skraak_hash"] != "test_hash_123" {t.Errorf("expected skraak_hash, got %v", df.Meta.Extra["skraak_hash"])}// Check label IDif len(df.Segments) == 0 || len(df.Segments[0].Labels) == 0 {t.Fatal("expected segments and labels")}label := df.Segments[0].Labels[0]if label.Extra == nil || label.Extra["skraak_label_id"] != "label_id_001" {t.Errorf("expected skraak_label_id, got %v", label.Extra)}})t.Run("handles non-existent file", func(t *testing.T) {updates := []dataFileUpdate{{DataPath: "/nonexistent/path/test.data",WavHash: "test_hash",LabelIDs: map[int]map[int]string{},},}errors := writeIDsToDataFiles(updates)if len(errors) == 0 {t.Error("expected error for non-existent file")}})}func TestImportSegmentsIntoDB(t *testing.T) {ctx := context.Background()database := setupImportTestDB(t)defer database.Close()t.Run("imports segments within transaction", func(t *testing.T) {// Create temp WAV filetmpDir := t.TempDir()wavPath := filepath.Join(tmpDir, "test.wav")hash := createTestWAV(t, wavPath)// Insert file into databasefileID := "fitestimp001"db.InsertTestFileForCluster(t, database, fileID, "cltest000001", "loctest00001", "test.wav", hash, 1.0)mustExec(t, database, "INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified) VALUES (?, ?, now(), now())", fileID, "dstest000001")// Create scanned file with segmentscanned := scannedDataFile{DataPath: wavPath + ".data",WavPath: wavPath,WavHash: hash,FileID: fileID,Duration: 1.0,Segments: []*datafile.Segment{{StartTime: 0.1,EndTime: 0.5,Labels: []*datafile.Label{{Species: "Kiwi", Certainty: 90, Filter: "kiwi.txt"},},},},}fileIDMap := map[string]scannedDataFile{fileID: scanned}mapping := MappingFile{"Kiwi": {Species: "Kiwi"}}filterIDMap := map[string]string{"kiwi.txt": "fitest000001"}speciesIDMap := map[string]string{"Kiwi": "sptest000001"}calltypeIDMap := map[string]map[string]string{}segments, labels, subtypes, updates, errors := importSegmentsIntoDB(ctx, database, fileIDMap, []scannedDataFile{scanned},mapping, filterIDMap, speciesIDMap, calltypeIDMap,"dstest000001", nil,)if len(errors) > 0 {t.Errorf("unexpected errors: %v", errors)}if len(segments) != 1 {t.Errorf("expected 1 segment, got %d", len(segments))}if labels != 1 {t.Errorf("expected 1 label, got %d", labels)}if subtypes != 0 {t.Errorf("expected 0 subtypes, got %d", subtypes)}if len(updates) != 1 {t.Errorf("expected 1 update, got %d", len(updates))}})}
package impimport ("context""database/sql""path/filepath""testing""skraak/db")func TestImportAudioFiles(t *testing.T) {ctx := context.Background()t.Run("happy path - import single WAV file", func(t *testing.T) {dbPath := setupFileBasedTestDB(t)// Don't keep database open - ImportAudioFiles manages its own connections// Create temp folder with a WAV filetmpDir := t.TempDir()wavPath := filepath.Join(tmpDir, "test_recording.wav")hash := createTestWAV(t, wavPath)// Importoutput, err := ImportAudioFiles(ctx, ImportAudioFilesInput{DBPath: dbPath,FolderPath: tmpDir,DatasetID: "dstest000001",LocationID: "loctest00001",ClusterID: "cltest000001",Recursive: new(true),})if err != nil {t.Fatalf("ImportAudioFiles failed: %v", err)}// Verify outputif output.Summary.TotalFiles != 1 {t.Errorf("expected 1 total file, got %d", output.Summary.TotalFiles)}if output.Summary.ImportedFiles != 1 {t.Errorf("expected 1 imported file, got %d", output.Summary.ImportedFiles)}if output.Summary.SkippedFiles != 0 {t.Errorf("expected 0 skipped files, got %d", output.Summary.SkippedFiles)}if len(output.Errors) != 0 {t.Errorf("unexpected errors: %v", output.Errors)}// Verify file was inserted into database - open new connectiondatabase, err := sql.Open("duckdb", dbPath)if err != nil {t.Fatalf("failed to open database for verification: %v", err)}defer database.Close()var fileCount interr = database.QueryRow("SELECT COUNT(*) FROM file WHERE xxh64_hash = ? AND active = true", hash).Scan(&fileCount)if err != nil {t.Fatalf("failed to query file: %v", err)}if fileCount != 1 {t.Errorf("expected 1 file in database, got %d", fileCount)}// Verify file_dataset linkvar linkCount interr = database.QueryRow(`SELECT COUNT(*) FROM file_dataset fdJOIN file f ON fd.file_id = f.idWHERE f.xxh64_hash = ? AND fd.dataset_id = 'dstest000001'`, hash).Scan(&linkCount)if err != nil {t.Fatalf("failed to query file_dataset: %v", err)}if linkCount != 1 {t.Errorf("expected 1 file_dataset link, got %d", linkCount)}})t.Run("duplicate handling - skip file with existing hash", func(t *testing.T) {dbPath := setupFileBasedTestDB(t)// Don't keep database open - ImportAudioFiles manages its own connections// Create temp folder with a WAV filetmpDir := t.TempDir()wavPath := filepath.Join(tmpDir, "test_recording.wav")hash := createTestWAV(t, wavPath)// First import_, err := ImportAudioFiles(ctx, ImportAudioFilesInput{DBPath: dbPath,FolderPath: tmpDir,DatasetID: "dstest000001",LocationID: "loctest00001",ClusterID: "cltest000001",Recursive: new(true),})if err != nil {t.Fatalf("first import failed: %v", err)}// Second import of same file (should be skipped as duplicate)output, err := ImportAudioFiles(ctx, ImportAudioFilesInput{DBPath: dbPath,FolderPath: tmpDir,DatasetID: "dstest000001",LocationID: "loctest00001",ClusterID: "cltest000001",Recursive: new(true),})if err != nil {t.Fatalf("second import failed: %v", err)}// Verify outputif output.Summary.TotalFiles != 1 {t.Errorf("expected 1 total file, got %d", output.Summary.TotalFiles)}if output.Summary.ImportedFiles != 0 {t.Errorf("expected 0 imported files (duplicate), got %d", output.Summary.ImportedFiles)}if output.Summary.SkippedFiles != 1 {t.Errorf("expected 1 skipped file (duplicate), got %d", output.Summary.SkippedFiles)}// Verify only one file in database (not duplicated) - open new connectiondatabase, err := sql.Open("duckdb", dbPath)if err != nil {t.Fatalf("failed to open database for verification: %v", err)}defer database.Close()var fileCount interr = database.QueryRow("SELECT COUNT(*) FROM file WHERE xxh64_hash = ? AND active = true", hash).Scan(&fileCount)if err != nil {t.Fatalf("failed to query file: %v", err)}if fileCount != 1 {t.Errorf("expected 1 file in database (not duplicated), got %d", fileCount)}})t.Run("invalid dataset ID", func(t *testing.T) {dbPath := setupFileBasedTestDB(t)tmpDir := t.TempDir()_, err := ImportAudioFiles(ctx, ImportAudioFilesInput{DBPath: dbPath,FolderPath: tmpDir,DatasetID: "invalid_id",LocationID: "loctest00001",ClusterID: "cltest000001",})if err == nil {t.Error("expected error for invalid dataset ID")}})t.Run("folder does not exist", func(t *testing.T) {dbPath := setupFileBasedTestDB(t)_, err := ImportAudioFiles(ctx, ImportAudioFilesInput{DBPath: dbPath,FolderPath: "/nonexistent/path",DatasetID: "dstest000001",LocationID: "loctest00001",ClusterID: "cltest000001",})if err == nil {t.Error("expected error for nonexistent folder")}})t.Run("unstructured dataset rejected", func(t *testing.T) {dbPath := setupFileBasedTestDB(t)tmpDir := t.TempDir()wavPath := filepath.Join(tmpDir, "test.wav")createTestWAV(t, wavPath)_, err := ImportAudioFiles(ctx, ImportAudioFilesInput{DBPath: dbPath,FolderPath: tmpDir,DatasetID: "dstest000002", // unstructured datasetLocationID: "loctest00001",ClusterID: "cltest000001",})if err == nil {t.Error("expected error for unstructured dataset")}})t.Run("inactive cluster rejected", func(t *testing.T) {dbPath := setupFileBasedTestDB(t)tmpDir := t.TempDir()wavPath := filepath.Join(tmpDir, "test.wav")createTestWAV(t, wavPath)_, err := ImportAudioFiles(ctx, ImportAudioFilesInput{DBPath: dbPath,FolderPath: tmpDir,DatasetID: "dstest000001",LocationID: "loctest00001",ClusterID: "cltest000002", // inactive cluster})if err == nil {t.Error("expected error for inactive cluster")}})}// boolPtr returns a pointer to the bool value.////go:fix inlinefunc boolPtr(v bool) *bool {return new(v)}// TestImportCluster tests the lower-level cluster import function.func TestImportCluster(t *testing.T) {ctx := context.Background()database := setupImportTestDB(t)defer database.Close()t.Run("happy path", func(t *testing.T) {tmpDir := t.TempDir()wavPath := filepath.Join(tmpDir, "test.wav")hash := createTestWAV(t, wavPath)tx := beginTestTx(t, ctx, database)defer tx.Rollback()output, err := ImportCluster(database, tx, ClusterImportInput{FolderPath: tmpDir,DatasetID: "dstest000001",LocationID: "loctest00001",ClusterID: "cltest000001",Recursive: true,})if err != nil {t.Fatalf("ImportCluster failed: %v", err)}if output.TotalFiles != 1 {t.Errorf("expected 1 total file, got %d", output.TotalFiles)}if output.ImportedFiles != 1 {t.Errorf("expected 1 imported file, got %d", output.ImportedFiles)}// Commit and verifyif err := tx.Commit(); err != nil {t.Fatalf("commit failed: %v", err)}var count intif err := database.QueryRow("SELECT COUNT(*) FROM file WHERE xxh64_hash = ? AND active = true", hash).Scan(&count); err != nil {t.Fatalf("query failed: %v", err)}if count != 1 {t.Errorf("expected 1 file in database, got %d", count)}})t.Run("empty folder returns empty output", func(t *testing.T) {tmpDir := t.TempDir()tx := beginTestTx(t, ctx, database)defer tx.Rollback()output, err := ImportCluster(database, tx, ClusterImportInput{FolderPath: tmpDir,DatasetID: "dstest000001",LocationID: "loctest00001",ClusterID: "cltest000001",Recursive: true,})if err != nil {t.Fatalf("ImportCluster failed: %v", err)}if output.TotalFiles != 0 {t.Errorf("expected 0 total files, got %d", output.TotalFiles)}})}// TestCheckDuplicateHash tests the duplicate hash checking function.func TestCheckDuplicateHash(t *testing.T) {database := setupImportTestDB(t)defer database.Close()t.Run("no duplicate found", func(t *testing.T) {id, isDupe, err := CheckDuplicateHash(database, "nonexistent_hash_12345")if err != nil {t.Fatalf("CheckDuplicateHash failed: %v", err)}if isDupe {t.Error("expected no duplicate for nonexistent hash")}if id != "" {t.Errorf("expected empty id, got %q", id)}})t.Run("duplicate found", func(t *testing.T) {// Insert a file with a known hashconst testHash = "dup_test_hash_001"db.InsertTestFileForCluster(t, database, "fitest000003", "cltest000001", "loctest00001", "dup_test.wav", testHash, 1.0)id, isDupe, err := CheckDuplicateHash(database, testHash)if err != nil {t.Fatalf("CheckDuplicateHash failed: %v", err)}if !isDupe {t.Error("expected duplicate to be found")}if id != "fitest000003" {t.Errorf("expected id fitest000003, got %q", id)}})}// TestEnsureClusterPath tests the cluster path setting function.func TestEnsureClusterPath(t *testing.T) {database := setupImportTestDB(t)defer database.Close()t.Run("set empty path", func(t *testing.T) {// Create a cluster with empty pathdb.InsertTestCluster(t, database, "clptest00001", "dstest000001", "loctest00001", "Path Test")mustExec(t, database, "UPDATE cluster SET path = NULL WHERE id = 'clptest00001'")err := EnsureClusterPath(database, "clptest00001", "/test/path")if err != nil {t.Fatalf("EnsureClusterPath failed: %v", err)}var path stringif err := database.QueryRow("SELECT path FROM cluster WHERE id = 'clptest00001'").Scan(&path); err != nil {t.Fatalf("query failed: %v", err)}// Path is normalized by utils.NormalizeFolderPath which may remove leading slashif path == "" {t.Error("expected path to be set")}})t.Run("do not overwrite existing path", func(t *testing.T) {// cltest000001 already has a path from setuperr := EnsureClusterPath(database, "cltest000001", "/new/path")if err != nil {t.Fatalf("EnsureClusterPath failed: %v", err)}var path stringif err := database.QueryRow("SELECT path FROM cluster WHERE id = 'cltest000001'").Scan(&path); err != nil {t.Fatalf("query failed: %v", err)}// Path should NOT have been changedif path == "/new/path" {t.Error("path should not have been overwritten")}})}
package impimport ("context""database/sql""os""path/filepath""testing")func TestBulkFileImport(t *testing.T) {ctx := context.Background()t.Run("happy path - import from CSV", func(t *testing.T) {dbPath := setupFileBasedTestDB(t)// Create temp folder with WAV filestmpDir := t.TempDir()wavDir := filepath.Join(tmpDir, "recordings")if err := os.Mkdir(wavDir, 0755); err != nil {t.Fatalf("failed to create wav dir: %v", err)}// Create a WAV filewavPath := filepath.Join(wavDir, "test_recording.wav")createTestWAV(t, wavPath)// Create CSV filecsvPath := createTestCSVFile(t, tmpDir, [][]string{{"Test Location", "loctest00001", wavDir, "2024-01", "16000", "1"},})// Create log filelogPath := createTestLogFile(t, tmpDir)// Importoutput, err := BulkFileImport(ctx, BulkFileImportInput{DBPath: dbPath,DatasetID: "dstest000001",CSVPath: csvPath,LogFilePath: logPath,})if err != nil {t.Fatalf("BulkFileImport failed: %v", err)}// Verify outputif output.TotalLocations != 1 {t.Errorf("expected 1 location, got %d", output.TotalLocations)}if output.TotalFilesScanned == 0 {t.Error("expected some files scanned")}if output.FilesImported == 0 {t.Error("expected some files imported")}// Verify cluster was createddatabase, err := sql.Open("duckdb", dbPath)if err != nil {t.Fatalf("failed to open database: %v", err)}defer database.Close()var clusterCount interr = database.QueryRow("SELECT COUNT(*) FROM cluster WHERE active = true").Scan(&clusterCount)if err != nil {t.Fatalf("failed to query clusters: %v", err)}if clusterCount < 1 {t.Errorf("expected at least 1 cluster, got %d", clusterCount)}})t.Run("missing CSV file - error", func(t *testing.T) {dbPath := setupFileBasedTestDB(t)tmpDir := t.TempDir()logPath := createTestLogFile(t, tmpDir)_, err := BulkFileImport(ctx, BulkFileImportInput{DBPath: dbPath,DatasetID: "dstest000001",CSVPath: filepath.Join(tmpDir, "nonexistent.csv"),LogFilePath: logPath,})if err == nil {t.Error("expected error for missing CSV file")}})t.Run("invalid dataset ID", func(t *testing.T) {dbPath := setupFileBasedTestDB(t)tmpDir := t.TempDir()csvPath := createTestCSVFile(t, tmpDir, [][]string{})logPath := createTestLogFile(t, tmpDir)_, err := BulkFileImport(ctx, BulkFileImportInput{DBPath: dbPath,DatasetID: "invalid_id",CSVPath: csvPath,LogFilePath: logPath,})if err == nil {t.Error("expected error for invalid dataset ID")}})t.Run("location not in dataset - error", func(t *testing.T) {dbPath := setupFileBasedTestDB(t)tmpDir := t.TempDir()wavDir := filepath.Join(tmpDir, "recordings")if err := os.Mkdir(wavDir, 0755); err != nil {t.Fatalf("failed to create wav dir: %v", err)}createTestWAV(t, filepath.Join(wavDir, "test.wav"))// Use invalid location IDcsvPath := createTestCSVFile(t, tmpDir, [][]string{{"Test Location", "invalid_loc!", wavDir, "2024-01", "16000", "1"},})logPath := createTestLogFile(t, tmpDir)_, err := BulkFileImport(ctx, BulkFileImportInput{DBPath: dbPath,DatasetID: "dstest000001",CSVPath: csvPath,LogFilePath: logPath,})if err == nil {t.Error("expected error for location not in dataset")}})}func TestParseBulkCSVRow(t *testing.T) {t.Run("valid row", func(t *testing.T) {row := []string{"Test Loc", "loctest00001", "/path/to/dir", "2024-01", "16000", "10"}result, err := parseBulkCSVRow(row)if err != nil {t.Fatalf("unexpected error: %v", err)}if result.LocationName != "Test Loc" {t.Errorf("expected Test Loc, got %s", result.LocationName)}if result.LocationID != "loctest00001" {t.Errorf("expected loctest00001, got %s", result.LocationID)}if result.SampleRate != 16000 {t.Errorf("expected 16000, got %d", result.SampleRate)}if result.FileCount != 10 {t.Errorf("expected 10, got %d", result.FileCount)}})t.Run("insufficient columns", func(t *testing.T) {row := []string{"a", "b", "c"}_, err := parseBulkCSVRow(row)if err == nil {t.Error("expected error for insufficient columns")}})t.Run("empty location_name", func(t *testing.T) {row := []string{"", "loctest00001", "/path", "2024-01", "16000", "10"}_, err := parseBulkCSVRow(row)if err == nil {t.Error("expected error for empty location_name")}})t.Run("empty directory_path", func(t *testing.T) {row := []string{"Test Loc", "loctest00001", "", "2024-01", "16000", "10"}_, err := parseBulkCSVRow(row)if err == nil {t.Error("expected error for empty directory_path")}})t.Run("invalid sample_rate", func(t *testing.T) {row := []string{"Test Loc", "loctest00001", "/path", "2024-01", "notanumber", "10"}_, err := parseBulkCSVRow(row)if err == nil {t.Error("expected error for invalid sample_rate")}})t.Run("invalid location_id format", func(t *testing.T) {row := []string{"Test Loc", "badid", "/path", "2024-01", "16000", "10"}_, err := parseBulkCSVRow(row)if err == nil {t.Error("expected error for invalid location_id format")}})}func TestBulkReadCSV(t *testing.T) {t.Run("reads valid CSV", func(t *testing.T) {tmpDir := t.TempDir()csvPath := createTestCSVFile(t, tmpDir, [][]string{{"Loc1", "loctest00001", "/path1", "2024-01", "16000", "10"},{"Loc2", "loctest00002", "/path2", "2024-02", "48000", "20"},})locations, err := bulkReadCSV(csvPath)if err != nil {t.Fatalf("unexpected error: %v", err)}if len(locations) != 2 {t.Errorf("expected 2 locations, got %d", len(locations))}})t.Run("empty CSV - error", func(t *testing.T) {tmpDir := t.TempDir()csvPath := filepath.Join(tmpDir, "empty.csv")// Write completely empty file (no header either)if err := os.WriteFile(csvPath, []byte(""), 0644); err != nil {t.Fatalf("failed to write CSV: %v", err)}_, err := bulkReadCSV(csvPath)if err == nil {t.Error("expected error for empty CSV")}})}