import_files_test.go
package imp
import (
"context"
"database/sql"
"path/filepath"
"testing"
"skraak/db"
)
func TestImportAudioFiles_HappyPath(t *testing.T) {
ctx := context.Background()
dbPath := setupFileBasedTestDB(t)
// Create temp folder with a WAV file
tmpDir := t.TempDir()
wavPath := filepath.Join(tmpDir, "test_recording.wav")
hash := createTestWAV(t, wavPath)
// Import
output, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
DBPath: dbPath,
FolderPath: tmpDir,
DatasetID: "dstest000001",
LocationID: "loctest00001",
ClusterID: "cltest000001",
Recursive: new(true),
})
if err != nil {
t.Fatalf("ImportAudioFiles failed: %v", err)
}
// Verify output
if output.Summary.TotalFiles != 1 {
t.Errorf("expected 1 total file, got %d", output.Summary.TotalFiles)
}
if output.Summary.ImportedFiles != 1 {
t.Errorf("expected 1 imported file, got %d", output.Summary.ImportedFiles)
}
if output.Summary.SkippedFiles != 0 {
t.Errorf("expected 0 skipped files, got %d", output.Summary.SkippedFiles)
}
if len(output.Errors) != 0 {
t.Errorf("unexpected errors: %v", output.Errors)
}
// Verify file was inserted into database - open new connection
database, err := sql.Open("duckdb", dbPath)
if err != nil {
t.Fatalf("failed to open database for verification: %v", err)
}
defer database.Close()
var fileCount int
err = database.QueryRow("SELECT COUNT(*) FROM file WHERE xxh64_hash = ? AND active = true", hash).Scan(&fileCount)
if err != nil {
t.Fatalf("failed to query file: %v", err)
}
if fileCount != 1 {
t.Errorf("expected 1 file in database, got %d", fileCount)
}
// Verify file_dataset link
var linkCount int
err = database.QueryRow(`
SELECT COUNT(*) FROM file_dataset fd
JOIN file f ON fd.file_id = f.id
WHERE f.xxh64_hash = ? AND fd.dataset_id = 'dstest000001'
`, hash).Scan(&linkCount)
if err != nil {
t.Fatalf("failed to query file_dataset: %v", err)
}
if linkCount != 1 {
t.Errorf("expected 1 file_dataset link, got %d", linkCount)
}
}
func TestImportAudioFiles_DuplicateHandling(t *testing.T) {
ctx := context.Background()
dbPath := setupFileBasedTestDB(t)
// Create temp folder with a WAV file
tmpDir := t.TempDir()
wavPath := filepath.Join(tmpDir, "test_recording.wav")
hash := createTestWAV(t, wavPath)
// First import
_, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
DBPath: dbPath,
FolderPath: tmpDir,
DatasetID: "dstest000001",
LocationID: "loctest00001",
ClusterID: "cltest000001",
Recursive: new(true),
})
if err != nil {
t.Fatalf("first import failed: %v", err)
}
// Second import of same file (should be skipped as duplicate)
output, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
DBPath: dbPath,
FolderPath: tmpDir,
DatasetID: "dstest000001",
LocationID: "loctest00001",
ClusterID: "cltest000001",
Recursive: new(true),
})
if err != nil {
t.Fatalf("second import failed: %v", err)
}
// Verify output
if output.Summary.TotalFiles != 1 {
t.Errorf("expected 1 total file, got %d", output.Summary.TotalFiles)
}
if output.Summary.ImportedFiles != 0 {
t.Errorf("expected 0 imported files (duplicate), got %d", output.Summary.ImportedFiles)
}
if output.Summary.SkippedFiles != 1 {
t.Errorf("expected 1 skipped file (duplicate), got %d", output.Summary.SkippedFiles)
}
// Verify only one file in database (not duplicated) - open new connection
database, err := sql.Open("duckdb", dbPath)
if err != nil {
t.Fatalf("failed to open database for verification: %v", err)
}
defer database.Close()
var fileCount int
err = database.QueryRow("SELECT COUNT(*) FROM file WHERE xxh64_hash = ? AND active = true", hash).Scan(&fileCount)
if err != nil {
t.Fatalf("failed to query file: %v", err)
}
if fileCount != 1 {
t.Errorf("expected 1 file in database (not duplicated), got %d", fileCount)
}
}
func TestImportAudioFiles_ErrorCases(t *testing.T) {
ctx := context.Background()
t.Run("invalid dataset ID", func(t *testing.T) {
dbPath := setupFileBasedTestDB(t)
tmpDir := t.TempDir()
_, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
DBPath: dbPath,
FolderPath: tmpDir,
DatasetID: "invalid_id",
LocationID: "loctest00001",
ClusterID: "cltest000001",
})
if err == nil {
t.Error("expected error for invalid dataset ID")
}
})
t.Run("folder does not exist", func(t *testing.T) {
dbPath := setupFileBasedTestDB(t)
_, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
DBPath: dbPath,
FolderPath: "/nonexistent/path",
DatasetID: "dstest000001",
LocationID: "loctest00001",
ClusterID: "cltest000001",
})
if err == nil {
t.Error("expected error for nonexistent folder")
}
})
t.Run("unstructured dataset rejected", func(t *testing.T) {
dbPath := setupFileBasedTestDB(t)
tmpDir := t.TempDir()
wavPath := filepath.Join(tmpDir, "test.wav")
createTestWAV(t, wavPath)
_, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
DBPath: dbPath,
FolderPath: tmpDir,
DatasetID: "dstest000002", // unstructured dataset
LocationID: "loctest00001",
ClusterID: "cltest000001",
})
if err == nil {
t.Error("expected error for unstructured dataset")
}
})
t.Run("inactive cluster rejected", func(t *testing.T) {
dbPath := setupFileBasedTestDB(t)
tmpDir := t.TempDir()
wavPath := filepath.Join(tmpDir, "test.wav")
createTestWAV(t, wavPath)
_, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
DBPath: dbPath,
FolderPath: tmpDir,
DatasetID: "dstest000001",
LocationID: "loctest00001",
ClusterID: "cltest000002", // inactive cluster
})
if err == nil {
t.Error("expected error for inactive cluster")
}
})
}
// TestImportCluster tests the lower-level cluster import function.
func TestImportCluster(t *testing.T) {
ctx := context.Background()
database := setupImportTestDB(t)
defer database.Close()
t.Run("happy path", func(t *testing.T) {
tmpDir := t.TempDir()
wavPath := filepath.Join(tmpDir, "test.wav")
hash := createTestWAV(t, wavPath)
tx := beginTestTx(t, ctx, database)
defer tx.Rollback()
output, err := ImportCluster(database, tx, ClusterImportInput{
FolderPath: tmpDir,
DatasetID: "dstest000001",
LocationID: "loctest00001",
ClusterID: "cltest000001",
Recursive: true,
})
if err != nil {
t.Fatalf("ImportCluster failed: %v", err)
}
if output.TotalFiles != 1 {
t.Errorf("expected 1 total file, got %d", output.TotalFiles)
}
if output.ImportedFiles != 1 {
t.Errorf("expected 1 imported file, got %d", output.ImportedFiles)
}
// Commit and verify
if err := tx.Commit(); err != nil {
t.Fatalf("commit failed: %v", err)
}
var count int
if err := database.QueryRow("SELECT COUNT(*) FROM file WHERE xxh64_hash = ? AND active = true", hash).Scan(&count); err != nil {
t.Fatalf("query failed: %v", err)
}
if count != 1 {
t.Errorf("expected 1 file in database, got %d", count)
}
})
t.Run("empty folder returns empty output", func(t *testing.T) {
tmpDir := t.TempDir()
tx := beginTestTx(t, ctx, database)
defer tx.Rollback()
output, err := ImportCluster(database, tx, ClusterImportInput{
FolderPath: tmpDir,
DatasetID: "dstest000001",
LocationID: "loctest00001",
ClusterID: "cltest000001",
Recursive: true,
})
if err != nil {
t.Fatalf("ImportCluster failed: %v", err)
}
if output.TotalFiles != 0 {
t.Errorf("expected 0 total files, got %d", output.TotalFiles)
}
})
}
// TestCheckDuplicateHash tests the duplicate hash checking function.
func TestCheckDuplicateHash(t *testing.T) {
database := setupImportTestDB(t)
defer database.Close()
t.Run("no duplicate found", func(t *testing.T) {
id, isDupe, err := CheckDuplicateHash(database, "nonexistent_hash_12345")
if err != nil {
t.Fatalf("CheckDuplicateHash failed: %v", err)
}
if isDupe {
t.Error("expected no duplicate for nonexistent hash")
}
if id != "" {
t.Errorf("expected empty id, got %q", id)
}
})
t.Run("duplicate found", func(t *testing.T) {
// Insert a file with a known hash
const testHash = "dup_test_hash_001"
db.InsertTestFileForCluster(t, database, "fitest000003", "cltest000001", "loctest00001", "dup_test.wav", testHash, 1.0)
id, isDupe, err := CheckDuplicateHash(database, testHash)
if err != nil {
t.Fatalf("CheckDuplicateHash failed: %v", err)
}
if !isDupe {
t.Error("expected duplicate to be found")
}
if id != "fitest000003" {
t.Errorf("expected id fitest000003, got %q", id)
}
})
}
// TestEnsureClusterPath tests the cluster path setting function.
func TestEnsureClusterPath(t *testing.T) {
database := setupImportTestDB(t)
defer database.Close()
t.Run("set empty path", func(t *testing.T) {
// Create a cluster with empty path
db.InsertTestCluster(t, database, "clptest00001", "dstest000001", "loctest00001", "Path Test")
mustExec(t, database, "UPDATE cluster SET path = NULL WHERE id = 'clptest00001'")
err := EnsureClusterPath(database, "clptest00001", "/test/path")
if err != nil {
t.Fatalf("EnsureClusterPath failed: %v", err)
}
var path string
if err := database.QueryRow("SELECT path FROM cluster WHERE id = 'clptest00001'").Scan(&path); err != nil {
t.Fatalf("query failed: %v", err)
}
// Path is normalized by utils.NormalizeFolderPath which may remove leading slash
if path == "" {
t.Error("expected path to be set")
}
})
t.Run("do not overwrite existing path", func(t *testing.T) {
// cltest000001 already has a path from setup
err := EnsureClusterPath(database, "cltest000001", "/new/path")
if err != nil {
t.Fatalf("EnsureClusterPath failed: %v", err)
}
var path string
if err := database.QueryRow("SELECT path FROM cluster WHERE id = 'cltest000001'").Scan(&path); err != nil {
t.Fatalf("query failed: %v", err)
}
// Path should NOT have been changed
if path == "/new/path" {
t.Error("path should not have been overwritten")
}
})
}