Fork channel

Create a new channel as a copy of main.

Rename channel

Rename main to:

Delete channel

Delete main? This cannot be undone.

import_segments_full_test.go
package imp

import (
	"context"
	"database/sql"
	"path/filepath"
	"testing"

	"skraak/datafile"
	"skraak/db"
)

func TestImportSegments(t *testing.T) {
	ctx := context.Background()

	t.Run("happy path - import segments from .data file", func(t *testing.T) {
		dbPath := setupFileBasedTestDB(t)

		// Create temp folder with a WAV file and .data file
		tmpDir := t.TempDir()
		wavPath := filepath.Join(tmpDir, "test.wav")
		hash := createTestWAV(t, wavPath)

		// Insert the file into database first (simulating pre-imported audio)
		database, err := sql.Open("duckdb", dbPath)
		if err != nil {
			t.Fatalf("failed to open database: %v", err)
		}
		db.InsertTestFileForCluster(t, database, "fitestseg001", "cltest000001", "loctest00001", "test.wav", hash, 1.0)
		mustExec(t, database, "INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified) VALUES (?, ?, now(), now())", "fitestseg001", "dstest000001")
		database.Close()

		// Create .data file with segments
		segments := []*datafile.Segment{
			{
				StartTime: 0.1,
				EndTime:   0.5,
				Labels: []*datafile.Label{
					{Species: "Kiwi", Certainty: 90, Filter: "kiwi.txt"},
				},
			},
		}
		_ = createTestDataFile(t, wavPath, segments)

		// Create mapping file
		mappingPath := createTestMappingFile(t, tmpDir)

		// Import segments
		output, err := ImportSegments(ctx, ImportSegmentsInput{
			DBPath:     dbPath,
			Folder:     tmpDir,
			Mapping:    mappingPath,
			DatasetID:  "dstest000001",
			LocationID: "loctest00001",
			ClusterID:  "cltest000001",
		})
		if err != nil {
			t.Fatalf("ImportSegments failed: %v", err)
		}

		// Verify output
		if output.Summary.DataFilesFound != 1 {
			t.Errorf("expected 1 data file found, got %d", output.Summary.DataFilesFound)
		}
		if output.Summary.ImportedSegments != 1 {
			t.Errorf("expected 1 imported segment, got %d", output.Summary.ImportedSegments)
		}
		if output.Summary.ImportedLabels != 1 {
			t.Errorf("expected 1 imported label, got %d", output.Summary.ImportedLabels)
		}

		// Verify segment in database
		database, err = sql.Open("duckdb", dbPath)
		if err != nil {
			t.Fatalf("failed to open database for verification: %v", err)
		}
		defer database.Close()

		var segmentCount int
		err = database.QueryRow("SELECT COUNT(*) FROM segment WHERE active = true").Scan(&segmentCount)
		if err != nil {
			t.Fatalf("failed to query segments: %v", err)
		}
		if segmentCount != 1 {
			t.Errorf("expected 1 segment in database, got %d", segmentCount)
		}

		var labelCount int
		err = database.QueryRow("SELECT COUNT(*) FROM label WHERE active = true").Scan(&labelCount)
		if err != nil {
			t.Fatalf("failed to query labels: %v", err)
		}
		if labelCount != 1 {
			t.Errorf("expected 1 label in database, got %d", labelCount)
		}
	})

	t.Run("file not in database - error", func(t *testing.T) {
		dbPath := setupFileBasedTestDB(t)

		// Create temp folder with a WAV file and .data file
		tmpDir := t.TempDir()
		wavPath := filepath.Join(tmpDir, "test.wav")
		createTestWAV(t, wavPath)

		// Create .data file
		segments := []*datafile.Segment{
			{StartTime: 0.1, EndTime: 0.5, Labels: []*datafile.Label{{Species: "Kiwi", Certainty: 90, Filter: "kiwi.txt"}}},
		}
		createTestDataFile(t, wavPath, segments)

		// Create mapping file
		mappingPath := createTestMappingFile(t, tmpDir)

		// Import segments - should fail because file not in DB
		output, err := ImportSegments(ctx, ImportSegmentsInput{
			DBPath:     dbPath,
			Folder:     tmpDir,
			Mapping:    mappingPath,
			DatasetID:  "dstest000001",
			LocationID: "loctest00001",
			ClusterID:  "cltest000001",
		})
		if err != nil {
			t.Fatalf("ImportSegments should not return error: %v", err)
		}

		// Should have errors about file not found
		if len(output.Errors) == 0 {
			t.Error("expected errors for file not in database")
		}
	})

	t.Run("no .data files - error", func(t *testing.T) {
		dbPath := setupFileBasedTestDB(t)
		tmpDir := t.TempDir()
		mappingPath := createTestMappingFile(t, tmpDir)

		_, err := ImportSegments(ctx, ImportSegmentsInput{
			DBPath:     dbPath,
			Folder:     tmpDir,
			Mapping:    mappingPath,
			DatasetID:  "dstest000001",
			LocationID: "loctest00001",
			ClusterID:  "cltest000001",
		})
		if err == nil {
			t.Error("expected error for no .data files")
		}
	})

	t.Run("invalid dataset ID", func(t *testing.T) {
		dbPath := setupFileBasedTestDB(t)
		tmpDir := t.TempDir()

		_, err := ImportSegments(ctx, ImportSegmentsInput{
			DBPath:     dbPath,
			Folder:     tmpDir,
			Mapping:    filepath.Join(tmpDir, "mapping.json"),
			DatasetID:  "invalid_id",
			LocationID: "loctest00001",
			ClusterID:  "cltest000001",
		})
		if err == nil {
			t.Error("expected error for invalid dataset ID")
		}
	})
}

func TestWriteIDsToDataFiles(t *testing.T) {
	t.Run("writes hash and label IDs to .data file", func(t *testing.T) {
		tmpDir := t.TempDir()
		wavPath := filepath.Join(tmpDir, "test.wav")
		createTestWAV(t, wavPath)

		// Create .data file
		segments := []*datafile.Segment{
			{
				StartTime: 0.1,
				EndTime:   0.5,
				Labels: []*datafile.Label{
					{Species: "Kiwi", Certainty: 90, Filter: "kiwi.txt"},
				},
			},
		}
		dataPath := createTestDataFile(t, wavPath, segments)

		// Simulate file updates
		updates := []dataFileUpdate{
			{
				DataPath: dataPath,
				WavHash:  "test_hash_123",
				LabelIDs: map[int]map[int]string{
					0: {0: "label_id_001"},
				},
			},
		}

		// Write IDs
		errors := writeIDsToDataFiles(updates)
		if len(errors) > 0 {
			t.Errorf("unexpected errors: %v", errors)
		}

		// Read back and verify
		df, err := datafile.ParseDataFile(dataPath)
		if err != nil {
			t.Fatalf("failed to parse .data file: %v", err)
		}

		// Check hash in metadata
		if df.Meta.Extra == nil {
			t.Fatal("expected extra metadata")
		}
		if df.Meta.Extra["skraak_hash"] != "test_hash_123" {
			t.Errorf("expected skraak_hash, got %v", df.Meta.Extra["skraak_hash"])
		}

		// Check label ID
		if len(df.Segments) == 0 || len(df.Segments[0].Labels) == 0 {
			t.Fatal("expected segments and labels")
		}
		label := df.Segments[0].Labels[0]
		if label.Extra == nil || label.Extra["skraak_label_id"] != "label_id_001" {
			t.Errorf("expected skraak_label_id, got %v", label.Extra)
		}
	})

	t.Run("handles non-existent file", func(t *testing.T) {
		updates := []dataFileUpdate{
			{
				DataPath: "/nonexistent/path/test.data",
				WavHash:  "test_hash",
				LabelIDs: map[int]map[int]string{},
			},
		}

		errors := writeIDsToDataFiles(updates)
		if len(errors) == 0 {
			t.Error("expected error for non-existent file")
		}
	})
}

func TestImportSegmentsIntoDB(t *testing.T) {
	ctx := context.Background()
	database := setupImportTestDB(t)
	defer database.Close()

	t.Run("imports segments within transaction", func(t *testing.T) {
		// Create temp WAV file
		tmpDir := t.TempDir()
		wavPath := filepath.Join(tmpDir, "test.wav")
		hash := createTestWAV(t, wavPath)

		// Insert file into database
		fileID := "fitestimp001"
		db.InsertTestFileForCluster(t, database, fileID, "cltest000001", "loctest00001", "test.wav", hash, 1.0)
		mustExec(t, database, "INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified) VALUES (?, ?, now(), now())", fileID, "dstest000001")

		// Create scanned file with segment
		scanned := scannedDataFile{
			DataPath: wavPath + ".data",
			WavPath:  wavPath,
			WavHash:  hash,
			FileID:   fileID,
			Duration: 1.0,
			Segments: []*datafile.Segment{
				{
					StartTime: 0.1,
					EndTime:   0.5,
					Labels: []*datafile.Label{
						{Species: "Kiwi", Certainty: 90, Filter: "kiwi.txt"},
					},
				},
			},
		}

		fileIDMap := map[string]scannedDataFile{fileID: scanned}
		mapping := MappingFile{"Kiwi": {Species: "Kiwi"}}
		filterIDMap := map[string]string{"kiwi.txt": "fitest000001"}
		speciesIDMap := map[string]string{"Kiwi": "sptest000001"}
		calltypeIDMap := map[string]map[string]string{}

		segments, labels, subtypes, updates, errors := importSegmentsIntoDB(
			ctx, database, fileIDMap, []scannedDataFile{scanned},
			mapping, filterIDMap, speciesIDMap, calltypeIDMap,
			"dstest000001", nil,
		)

		if len(errors) > 0 {
			t.Errorf("unexpected errors: %v", errors)
		}
		if len(segments) != 1 {
			t.Errorf("expected 1 segment, got %d", len(segments))
		}
		if labels != 1 {
			t.Errorf("expected 1 label, got %d", labels)
		}
		if subtypes != 0 {
			t.Errorf("expected 0 subtypes, got %d", subtypes)
		}
		if len(updates) != 1 {
			t.Errorf("expected 1 update, got %d", len(updates))
		}
	})
}