Fork channel

Create a new channel as a copy of main.

Rename channel

Rename main to:

Delete channel

Delete main? This cannot be undone.

import_files_test.go
package imp

import (
	"context"
	"database/sql"
	"path/filepath"
	"testing"

	"skraak/db"
)

func TestImportAudioFiles_HappyPath(t *testing.T) {
	ctx := context.Background()
	dbPath := setupFileBasedTestDB(t)

	// Create temp folder with a WAV file
	tmpDir := t.TempDir()
	wavPath := filepath.Join(tmpDir, "test_recording.wav")
	hash := createTestWAV(t, wavPath)

	// Import
	output, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
		DBPath:     dbPath,
		FolderPath: tmpDir,
		DatasetID:  "dstest000001",
		LocationID: "loctest00001",
		ClusterID:  "cltest000001",
		Recursive:  new(true),
	})
	if err != nil {
		t.Fatalf("ImportAudioFiles failed: %v", err)
	}

	// Verify output
	if output.Summary.TotalFiles != 1 {
		t.Errorf("expected 1 total file, got %d", output.Summary.TotalFiles)
	}
	if output.Summary.ImportedFiles != 1 {
		t.Errorf("expected 1 imported file, got %d", output.Summary.ImportedFiles)
	}
	if output.Summary.SkippedFiles != 0 {
		t.Errorf("expected 0 skipped files, got %d", output.Summary.SkippedFiles)
	}
	if len(output.Errors) != 0 {
		t.Errorf("unexpected errors: %v", output.Errors)
	}

	// Verify file was inserted into database - open new connection
	database, err := sql.Open("duckdb", dbPath)
	if err != nil {
		t.Fatalf("failed to open database for verification: %v", err)
	}
	defer database.Close()

	var fileCount int
	err = database.QueryRow("SELECT COUNT(*) FROM file WHERE xxh64_hash = ? AND active = true", hash).Scan(&fileCount)
	if err != nil {
		t.Fatalf("failed to query file: %v", err)
	}
	if fileCount != 1 {
		t.Errorf("expected 1 file in database, got %d", fileCount)
	}

	// Verify file_dataset link
	var linkCount int
	err = database.QueryRow(`
		SELECT COUNT(*) FROM file_dataset fd
		JOIN file f ON fd.file_id = f.id
		WHERE f.xxh64_hash = ? AND fd.dataset_id = 'dstest000001'
	`, hash).Scan(&linkCount)
	if err != nil {
		t.Fatalf("failed to query file_dataset: %v", err)
	}
	if linkCount != 1 {
		t.Errorf("expected 1 file_dataset link, got %d", linkCount)
	}
}

func TestImportAudioFiles_DuplicateHandling(t *testing.T) {
	ctx := context.Background()
	dbPath := setupFileBasedTestDB(t)

	// Create temp folder with a WAV file
	tmpDir := t.TempDir()
	wavPath := filepath.Join(tmpDir, "test_recording.wav")
	hash := createTestWAV(t, wavPath)

	// First import
	_, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
		DBPath:     dbPath,
		FolderPath: tmpDir,
		DatasetID:  "dstest000001",
		LocationID: "loctest00001",
		ClusterID:  "cltest000001",
		Recursive:  new(true),
	})
	if err != nil {
		t.Fatalf("first import failed: %v", err)
	}

	// Second import of same file (should be skipped as duplicate)
	output, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
		DBPath:     dbPath,
		FolderPath: tmpDir,
		DatasetID:  "dstest000001",
		LocationID: "loctest00001",
		ClusterID:  "cltest000001",
		Recursive:  new(true),
	})
	if err != nil {
		t.Fatalf("second import failed: %v", err)
	}

	// Verify output
	if output.Summary.TotalFiles != 1 {
		t.Errorf("expected 1 total file, got %d", output.Summary.TotalFiles)
	}
	if output.Summary.ImportedFiles != 0 {
		t.Errorf("expected 0 imported files (duplicate), got %d", output.Summary.ImportedFiles)
	}
	if output.Summary.SkippedFiles != 1 {
		t.Errorf("expected 1 skipped file (duplicate), got %d", output.Summary.SkippedFiles)
	}

	// Verify only one file in database (not duplicated) - open new connection
	database, err := sql.Open("duckdb", dbPath)
	if err != nil {
		t.Fatalf("failed to open database for verification: %v", err)
	}
	defer database.Close()

	var fileCount int
	err = database.QueryRow("SELECT COUNT(*) FROM file WHERE xxh64_hash = ? AND active = true", hash).Scan(&fileCount)
	if err != nil {
		t.Fatalf("failed to query file: %v", err)
	}
	if fileCount != 1 {
		t.Errorf("expected 1 file in database (not duplicated), got %d", fileCount)
	}
}

func TestImportAudioFiles_ErrorCases(t *testing.T) {
	ctx := context.Background()

	t.Run("invalid dataset ID", func(t *testing.T) {
		dbPath := setupFileBasedTestDB(t)
		tmpDir := t.TempDir()

		_, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
			DBPath:     dbPath,
			FolderPath: tmpDir,
			DatasetID:  "invalid_id",
			LocationID: "loctest00001",
			ClusterID:  "cltest000001",
		})
		if err == nil {
			t.Error("expected error for invalid dataset ID")
		}
	})

	t.Run("folder does not exist", func(t *testing.T) {
		dbPath := setupFileBasedTestDB(t)
		_, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
			DBPath:     dbPath,
			FolderPath: "/nonexistent/path",
			DatasetID:  "dstest000001",
			LocationID: "loctest00001",
			ClusterID:  "cltest000001",
		})
		if err == nil {
			t.Error("expected error for nonexistent folder")
		}
	})

	t.Run("unstructured dataset rejected", func(t *testing.T) {
		dbPath := setupFileBasedTestDB(t)
		tmpDir := t.TempDir()
		wavPath := filepath.Join(tmpDir, "test.wav")
		createTestWAV(t, wavPath)

		_, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
			DBPath:     dbPath,
			FolderPath: tmpDir,
			DatasetID:  "dstest000002", // unstructured dataset
			LocationID: "loctest00001",
			ClusterID:  "cltest000001",
		})
		if err == nil {
			t.Error("expected error for unstructured dataset")
		}
	})

	t.Run("inactive cluster rejected", func(t *testing.T) {
		dbPath := setupFileBasedTestDB(t)
		tmpDir := t.TempDir()
		wavPath := filepath.Join(tmpDir, "test.wav")
		createTestWAV(t, wavPath)

		_, err := ImportAudioFiles(ctx, ImportAudioFilesInput{
			DBPath:     dbPath,
			FolderPath: tmpDir,
			DatasetID:  "dstest000001",
			LocationID: "loctest00001",
			ClusterID:  "cltest000002", // inactive cluster
		})
		if err == nil {
			t.Error("expected error for inactive cluster")
		}
	})
}

// TestImportCluster tests the lower-level cluster import function.
func TestImportCluster(t *testing.T) {
	ctx := context.Background()
	database := setupImportTestDB(t)
	defer database.Close()

	t.Run("happy path", func(t *testing.T) {
		tmpDir := t.TempDir()
		wavPath := filepath.Join(tmpDir, "test.wav")
		hash := createTestWAV(t, wavPath)

		tx := beginTestTx(t, ctx, database)
		defer tx.Rollback()

		output, err := ImportCluster(database, tx, ClusterImportInput{
			FolderPath: tmpDir,
			DatasetID:  "dstest000001",
			LocationID: "loctest00001",
			ClusterID:  "cltest000001",
			Recursive:  true,
		})
		if err != nil {
			t.Fatalf("ImportCluster failed: %v", err)
		}

		if output.TotalFiles != 1 {
			t.Errorf("expected 1 total file, got %d", output.TotalFiles)
		}
		if output.ImportedFiles != 1 {
			t.Errorf("expected 1 imported file, got %d", output.ImportedFiles)
		}

		// Commit and verify
		if err := tx.Commit(); err != nil {
			t.Fatalf("commit failed: %v", err)
		}

		var count int
		if err := database.QueryRow("SELECT COUNT(*) FROM file WHERE xxh64_hash = ? AND active = true", hash).Scan(&count); err != nil {
			t.Fatalf("query failed: %v", err)
		}
		if count != 1 {
			t.Errorf("expected 1 file in database, got %d", count)
		}
	})

	t.Run("empty folder returns empty output", func(t *testing.T) {
		tmpDir := t.TempDir()

		tx := beginTestTx(t, ctx, database)
		defer tx.Rollback()

		output, err := ImportCluster(database, tx, ClusterImportInput{
			FolderPath: tmpDir,
			DatasetID:  "dstest000001",
			LocationID: "loctest00001",
			ClusterID:  "cltest000001",
			Recursive:  true,
		})
		if err != nil {
			t.Fatalf("ImportCluster failed: %v", err)
		}

		if output.TotalFiles != 0 {
			t.Errorf("expected 0 total files, got %d", output.TotalFiles)
		}
	})
}

// TestCheckDuplicateHash tests the duplicate hash checking function.
func TestCheckDuplicateHash(t *testing.T) {
	database := setupImportTestDB(t)
	defer database.Close()

	t.Run("no duplicate found", func(t *testing.T) {
		id, isDupe, err := CheckDuplicateHash(database, "nonexistent_hash_12345")
		if err != nil {
			t.Fatalf("CheckDuplicateHash failed: %v", err)
		}
		if isDupe {
			t.Error("expected no duplicate for nonexistent hash")
		}
		if id != "" {
			t.Errorf("expected empty id, got %q", id)
		}
	})

	t.Run("duplicate found", func(t *testing.T) {
		// Insert a file with a known hash
		const testHash = "dup_test_hash_001"
		db.InsertTestFileForCluster(t, database, "fitest000003", "cltest000001", "loctest00001", "dup_test.wav", testHash, 1.0)

		id, isDupe, err := CheckDuplicateHash(database, testHash)
		if err != nil {
			t.Fatalf("CheckDuplicateHash failed: %v", err)
		}
		if !isDupe {
			t.Error("expected duplicate to be found")
		}
		if id != "fitest000003" {
			t.Errorf("expected id fitest000003, got %q", id)
		}
	})
}

// TestEnsureClusterPath tests the cluster path setting function.
func TestEnsureClusterPath(t *testing.T) {
	database := setupImportTestDB(t)
	defer database.Close()

	t.Run("set empty path", func(t *testing.T) {
		// Create a cluster with empty path
		db.InsertTestCluster(t, database, "clptest00001", "dstest000001", "loctest00001", "Path Test")
		mustExec(t, database, "UPDATE cluster SET path = NULL WHERE id = 'clptest00001'")

		err := EnsureClusterPath(database, "clptest00001", "/test/path")
		if err != nil {
			t.Fatalf("EnsureClusterPath failed: %v", err)
		}

		var path string
		if err := database.QueryRow("SELECT path FROM cluster WHERE id = 'clptest00001'").Scan(&path); err != nil {
			t.Fatalf("query failed: %v", err)
		}
		// Path is normalized by utils.NormalizeFolderPath which may remove leading slash
		if path == "" {
			t.Error("expected path to be set")
		}
	})

	t.Run("do not overwrite existing path", func(t *testing.T) {
		// cltest000001 already has a path from setup
		err := EnsureClusterPath(database, "cltest000001", "/new/path")
		if err != nil {
			t.Fatalf("EnsureClusterPath failed: %v", err)
		}

		var path string
		if err := database.QueryRow("SELECT path FROM cluster WHERE id = 'cltest000001'").Scan(&path); err != nil {
			t.Fatalf("query failed: %v", err)
		}
		// Path should NOT have been changed
		if path == "/new/path" {
			t.Error("path should not have been overwritten")
		}
	})
}