import_segments_full_test.go
package imp
import (
"context"
"database/sql"
"path/filepath"
"testing"
"skraak/datafile"
"skraak/db"
)
func TestImportSegments(t *testing.T) {
ctx := context.Background()
t.Run("happy path - import segments from .data file", func(t *testing.T) {
dbPath := setupFileBasedTestDB(t)
// Create temp folder with a WAV file and .data file
tmpDir := t.TempDir()
wavPath := filepath.Join(tmpDir, "test.wav")
hash := createTestWAV(t, wavPath)
// Insert the file into database first (simulating pre-imported audio)
database, err := sql.Open("duckdb", dbPath)
if err != nil {
t.Fatalf("failed to open database: %v", err)
}
db.InsertTestFileForCluster(t, database, "fitestseg001", "cltest000001", "loctest00001", "test.wav", hash, 1.0)
mustExec(t, database, "INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified) VALUES (?, ?, now(), now())", "fitestseg001", "dstest000001")
database.Close()
// Create .data file with segments
segments := []*datafile.Segment{
{
StartTime: 0.1,
EndTime: 0.5,
Labels: []*datafile.Label{
{Species: "Kiwi", Certainty: 90, Filter: "kiwi.txt"},
},
},
}
_ = createTestDataFile(t, wavPath, segments)
// Create mapping file
mappingPath := createTestMappingFile(t, tmpDir)
// Import segments
output, err := ImportSegments(ctx, ImportSegmentsInput{
DBPath: dbPath,
Folder: tmpDir,
Mapping: mappingPath,
DatasetID: "dstest000001",
LocationID: "loctest00001",
ClusterID: "cltest000001",
})
if err != nil {
t.Fatalf("ImportSegments failed: %v", err)
}
// Verify output
if output.Summary.DataFilesFound != 1 {
t.Errorf("expected 1 data file found, got %d", output.Summary.DataFilesFound)
}
if output.Summary.ImportedSegments != 1 {
t.Errorf("expected 1 imported segment, got %d", output.Summary.ImportedSegments)
}
if output.Summary.ImportedLabels != 1 {
t.Errorf("expected 1 imported label, got %d", output.Summary.ImportedLabels)
}
// Verify segment in database
database, err = sql.Open("duckdb", dbPath)
if err != nil {
t.Fatalf("failed to open database for verification: %v", err)
}
defer database.Close()
var segmentCount int
err = database.QueryRow("SELECT COUNT(*) FROM segment WHERE active = true").Scan(&segmentCount)
if err != nil {
t.Fatalf("failed to query segments: %v", err)
}
if segmentCount != 1 {
t.Errorf("expected 1 segment in database, got %d", segmentCount)
}
var labelCount int
err = database.QueryRow("SELECT COUNT(*) FROM label WHERE active = true").Scan(&labelCount)
if err != nil {
t.Fatalf("failed to query labels: %v", err)
}
if labelCount != 1 {
t.Errorf("expected 1 label in database, got %d", labelCount)
}
})
t.Run("file not in database - error", func(t *testing.T) {
dbPath := setupFileBasedTestDB(t)
// Create temp folder with a WAV file and .data file
tmpDir := t.TempDir()
wavPath := filepath.Join(tmpDir, "test.wav")
createTestWAV(t, wavPath)
// Create .data file
segments := []*datafile.Segment{
{StartTime: 0.1, EndTime: 0.5, Labels: []*datafile.Label{{Species: "Kiwi", Certainty: 90, Filter: "kiwi.txt"}}},
}
createTestDataFile(t, wavPath, segments)
// Create mapping file
mappingPath := createTestMappingFile(t, tmpDir)
// Import segments - should fail because file not in DB
output, err := ImportSegments(ctx, ImportSegmentsInput{
DBPath: dbPath,
Folder: tmpDir,
Mapping: mappingPath,
DatasetID: "dstest000001",
LocationID: "loctest00001",
ClusterID: "cltest000001",
})
if err != nil {
t.Fatalf("ImportSegments should not return error: %v", err)
}
// Should have errors about file not found
if len(output.Errors) == 0 {
t.Error("expected errors for file not in database")
}
})
t.Run("no .data files - error", func(t *testing.T) {
dbPath := setupFileBasedTestDB(t)
tmpDir := t.TempDir()
mappingPath := createTestMappingFile(t, tmpDir)
_, err := ImportSegments(ctx, ImportSegmentsInput{
DBPath: dbPath,
Folder: tmpDir,
Mapping: mappingPath,
DatasetID: "dstest000001",
LocationID: "loctest00001",
ClusterID: "cltest000001",
})
if err == nil {
t.Error("expected error for no .data files")
}
})
t.Run("invalid dataset ID", func(t *testing.T) {
dbPath := setupFileBasedTestDB(t)
tmpDir := t.TempDir()
_, err := ImportSegments(ctx, ImportSegmentsInput{
DBPath: dbPath,
Folder: tmpDir,
Mapping: filepath.Join(tmpDir, "mapping.json"),
DatasetID: "invalid_id",
LocationID: "loctest00001",
ClusterID: "cltest000001",
})
if err == nil {
t.Error("expected error for invalid dataset ID")
}
})
}
func TestWriteIDsToDataFiles(t *testing.T) {
t.Run("writes hash and label IDs to .data file", func(t *testing.T) {
tmpDir := t.TempDir()
wavPath := filepath.Join(tmpDir, "test.wav")
createTestWAV(t, wavPath)
// Create .data file
segments := []*datafile.Segment{
{
StartTime: 0.1,
EndTime: 0.5,
Labels: []*datafile.Label{
{Species: "Kiwi", Certainty: 90, Filter: "kiwi.txt"},
},
},
}
dataPath := createTestDataFile(t, wavPath, segments)
// Simulate file updates
updates := []dataFileUpdate{
{
DataPath: dataPath,
WavHash: "test_hash_123",
LabelIDs: map[int]map[int]string{
0: {0: "label_id_001"},
},
},
}
// Write IDs
errors := writeIDsToDataFiles(updates)
if len(errors) > 0 {
t.Errorf("unexpected errors: %v", errors)
}
// Read back and verify
df, err := datafile.ParseDataFile(dataPath)
if err != nil {
t.Fatalf("failed to parse .data file: %v", err)
}
// Check hash in metadata
if df.Meta.Extra == nil {
t.Fatal("expected extra metadata")
}
if df.Meta.Extra["skraak_hash"] != "test_hash_123" {
t.Errorf("expected skraak_hash, got %v", df.Meta.Extra["skraak_hash"])
}
// Check label ID
if len(df.Segments) == 0 || len(df.Segments[0].Labels) == 0 {
t.Fatal("expected segments and labels")
}
label := df.Segments[0].Labels[0]
if label.Extra == nil || label.Extra["skraak_label_id"] != "label_id_001" {
t.Errorf("expected skraak_label_id, got %v", label.Extra)
}
})
t.Run("handles non-existent file", func(t *testing.T) {
updates := []dataFileUpdate{
{
DataPath: "/nonexistent/path/test.data",
WavHash: "test_hash",
LabelIDs: map[int]map[int]string{},
},
}
errors := writeIDsToDataFiles(updates)
if len(errors) == 0 {
t.Error("expected error for non-existent file")
}
})
}
func TestImportSegmentsIntoDB(t *testing.T) {
ctx := context.Background()
database := setupImportTestDB(t)
defer database.Close()
t.Run("imports segments within transaction", func(t *testing.T) {
// Create temp WAV file
tmpDir := t.TempDir()
wavPath := filepath.Join(tmpDir, "test.wav")
hash := createTestWAV(t, wavPath)
// Insert file into database
fileID := "fitestimp001"
db.InsertTestFileForCluster(t, database, fileID, "cltest000001", "loctest00001", "test.wav", hash, 1.0)
mustExec(t, database, "INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified) VALUES (?, ?, now(), now())", fileID, "dstest000001")
// Create scanned file with segment
scanned := scannedDataFile{
DataPath: wavPath + ".data",
WavPath: wavPath,
WavHash: hash,
FileID: fileID,
Duration: 1.0,
Segments: []*datafile.Segment{
{
StartTime: 0.1,
EndTime: 0.5,
Labels: []*datafile.Label{
{Species: "Kiwi", Certainty: 90, Filter: "kiwi.txt"},
},
},
},
}
fileIDMap := map[string]scannedDataFile{fileID: scanned}
mapping := MappingFile{"Kiwi": {Species: "Kiwi"}}
filterIDMap := map[string]string{"kiwi.txt": "fitest000001"}
speciesIDMap := map[string]string{"Kiwi": "sptest000001"}
calltypeIDMap := map[string]map[string]string{}
segments, labels, subtypes, updates, errors := importSegmentsIntoDB(
ctx, database, fileIDMap, []scannedDataFile{scanned},
mapping, filterIDMap, speciesIDMap, calltypeIDMap,
"dstest000001", nil,
)
if len(errors) > 0 {
t.Errorf("unexpected errors: %v", errors)
}
if len(segments) != 1 {
t.Errorf("expected 1 segment, got %d", len(segments))
}
if labels != 1 {
t.Errorf("expected 1 label, got %d", labels)
}
if subtypes != 0 {
t.Errorf("expected 0 subtypes, got %d", subtypes)
}
if len(updates) != 1 {
t.Errorf("expected 1 update, got %d", len(updates))
}
})
}