Fork channel

Create a new channel as a copy of main.

Rename channel

Rename main to:

Delete channel

Delete main? This cannot be undone.

import_unstructured_test.go
package imp

import (
	"context"
	"database/sql"
	"os"
	"path/filepath"
	"testing"
)

func TestImportUnstructured_HappyPath(t *testing.T) {
	ctx := context.Background()
	dbPath := setupFileBasedTestDB(t)

	// Create temp folder with a WAV file
	tmpDir := t.TempDir()
	wavPath := filepath.Join(tmpDir, "test_recording.wav")
	hash := createTestWAV(t, wavPath)

	// Import to unstructured dataset
	output, err := ImportUnstructured(ctx, ImportUnstructuredInput{
		DBPath:     dbPath,
		DatasetID:  "dstest000002", // unstructured dataset
		FolderPath: tmpDir,
		Recursive:  new(true),
	})
	if err != nil {
		t.Fatalf("ImportUnstructured failed: %v", err)
	}

	// Verify output
	if output.TotalFiles != 1 {
		t.Errorf("expected 1 total file, got %d", output.TotalFiles)
	}
	if output.ImportedFiles != 1 {
		t.Errorf("expected 1 imported file, got %d", output.ImportedFiles)
	}
	if output.SkippedFiles != 0 {
		t.Errorf("expected 0 skipped files, got %d", output.SkippedFiles)
	}
	if len(output.Errors) != 0 {
		t.Errorf("unexpected errors: %v", output.Errors)
	}

	// Verify file was inserted into database
	database, err := sql.Open("duckdb", dbPath)
	if err != nil {
		t.Fatalf("failed to open database for verification: %v", err)
	}
	defer database.Close()

	var fileCount int
	err = database.QueryRow("SELECT COUNT(*) FROM file WHERE xxh64_hash = ? AND active = true", hash).Scan(&fileCount)
	if err != nil {
		t.Fatalf("failed to query file: %v", err)
	}
	if fileCount != 1 {
		t.Errorf("expected 1 file in database, got %d", fileCount)
	}

	// Verify file_dataset link
	var linkCount int
	err = database.QueryRow(`
		SELECT COUNT(*) FROM file_dataset fd
		JOIN file f ON fd.file_id = f.id
		WHERE f.xxh64_hash = ? AND fd.dataset_id = 'dstest000002'
	`, hash).Scan(&linkCount)
	if err != nil {
		t.Fatalf("failed to query file_dataset: %v", err)
	}
	if linkCount != 1 {
		t.Errorf("expected 1 file_dataset link, got %d", linkCount)
	}

	// Verify location_id and cluster_id are NULL for unstructured
	var locID, clID sql.NullString
	err = database.QueryRow("SELECT location_id, cluster_id FROM file WHERE xxh64_hash = ?", hash).Scan(&locID, &clID)
	if err != nil {
		t.Fatalf("failed to query file: %v", err)
	}
	if locID.Valid {
		t.Errorf("expected NULL location_id for unstructured file, got %s", locID.String)
	}
	if clID.Valid {
		t.Errorf("expected NULL cluster_id for unstructured file, got %s", clID.String)
	}
}

func TestImportUnstructured_DuplicateHandling(t *testing.T) {
	ctx := context.Background()
	dbPath := setupFileBasedTestDB(t)

	// Create temp folder with a WAV file
	tmpDir := t.TempDir()
	wavPath := filepath.Join(tmpDir, "test_recording.wav")
	hash := createTestWAV(t, wavPath)

	// First import
	_, err := ImportUnstructured(ctx, ImportUnstructuredInput{
		DBPath:     dbPath,
		DatasetID:  "dstest000002",
		FolderPath: tmpDir,
		Recursive:  new(true),
	})
	if err != nil {
		t.Fatalf("first import failed: %v", err)
	}

	// Second import of same file (should be skipped as duplicate)
	output, err := ImportUnstructured(ctx, ImportUnstructuredInput{
		DBPath:     dbPath,
		DatasetID:  "dstest000002",
		FolderPath: tmpDir,
		Recursive:  new(true),
	})
	if err != nil {
		t.Fatalf("second import failed: %v", err)
	}

	// Verify output
	if output.TotalFiles != 1 {
		t.Errorf("expected 1 total file, got %d", output.TotalFiles)
	}
	if output.ImportedFiles != 0 {
		t.Errorf("expected 0 imported files (duplicate), got %d", output.ImportedFiles)
	}
	if output.SkippedFiles != 1 {
		t.Errorf("expected 1 skipped file (duplicate), got %d", output.SkippedFiles)
	}

	// Verify only one file in database (not duplicated)
	database, err := sql.Open("duckdb", dbPath)
	if err != nil {
		t.Fatalf("failed to open database for verification: %v", err)
	}
	defer database.Close()

	var fileCount int
	err = database.QueryRow("SELECT COUNT(*) FROM file WHERE xxh64_hash = ? AND active = true", hash).Scan(&fileCount)
	if err != nil {
		t.Fatalf("failed to query file: %v", err)
	}
	if fileCount != 1 {
		t.Errorf("expected 1 file in database (not duplicated), got %d", fileCount)
	}
}

func TestImportUnstructured_EdgeCases(t *testing.T) {
	ctx := context.Background()

	t.Run("empty folder returns empty output", func(t *testing.T) {
		dbPath := setupFileBasedTestDB(t)
		tmpDir := t.TempDir()

		output, err := ImportUnstructured(ctx, ImportUnstructuredInput{
			DBPath:     dbPath,
			DatasetID:  "dstest000002",
			FolderPath: tmpDir,
			Recursive:  new(true),
		})
		if err != nil {
			t.Fatalf("ImportUnstructured failed: %v", err)
		}

		if output.TotalFiles != 0 {
			t.Errorf("expected 0 total files, got %d", output.TotalFiles)
		}
	})

	t.Run("structured dataset rejected", func(t *testing.T) {
		dbPath := setupFileBasedTestDB(t)
		tmpDir := t.TempDir()
		wavPath := filepath.Join(tmpDir, "test.wav")
		createTestWAV(t, wavPath)

		_, err := ImportUnstructured(ctx, ImportUnstructuredInput{
			DBPath:     dbPath,
			DatasetID:  "dstest000001", // structured dataset
			FolderPath: tmpDir,
		})
		if err == nil {
			t.Error("expected error for structured dataset")
		}
	})

	t.Run("invalid dataset ID", func(t *testing.T) {
		dbPath := setupFileBasedTestDB(t)
		tmpDir := t.TempDir()

		_, err := ImportUnstructured(ctx, ImportUnstructuredInput{
			DBPath:     dbPath,
			DatasetID:  "invalid_id",
			FolderPath: tmpDir,
		})
		if err == nil {
			t.Error("expected error for invalid dataset ID")
		}
	})

	t.Run("folder does not exist", func(t *testing.T) {
		dbPath := setupFileBasedTestDB(t)

		_, err := ImportUnstructured(ctx, ImportUnstructuredInput{
			DBPath:     dbPath,
			DatasetID:  "dstest000002",
			FolderPath: "/nonexistent/path",
		})
		if err == nil {
			t.Error("expected error for nonexistent folder")
		}
	})
}

func TestScanWavFiles(t *testing.T) {
	t.Run("finds WAV files", func(t *testing.T) {
		tmpDir := t.TempDir()
		wavPath := filepath.Join(tmpDir, "test.wav")
		createTestWAV(t, wavPath)

		files, errors := scanWavFiles(tmpDir, false)
		if len(errors) > 0 {
			t.Errorf("unexpected errors: %v", errors)
		}
		if len(files) != 1 {
			t.Errorf("expected 1 file, got %d", len(files))
		}
	})

	t.Run("case insensitive extension", func(t *testing.T) {
		tmpDir := t.TempDir()
		wavPath := filepath.Join(tmpDir, "test.WAV")
		createTestWAV(t, wavPath)

		files, errors := scanWavFiles(tmpDir, false)
		if len(errors) > 0 {
			t.Errorf("unexpected errors: %v", errors)
		}
		if len(files) != 1 {
			t.Errorf("expected 1 file, got %d", len(files))
		}
	})

	t.Run("non-recursive ignores subdirectories", func(t *testing.T) {
		tmpDir := t.TempDir()
		subDir := filepath.Join(tmpDir, "subdir")
		if err := os.Mkdir(subDir, 0755); err != nil {
			t.Fatalf("failed to create subdir: %v", err)
		}

		// Create WAV in both directories
		createTestWAV(t, filepath.Join(tmpDir, "root.wav"))
		createTestWAV(t, filepath.Join(subDir, "sub.wav"))

		files, errors := scanWavFiles(tmpDir, false)
		if len(errors) > 0 {
			t.Errorf("unexpected errors: %v", errors)
		}
		if len(files) != 1 {
			t.Errorf("expected 1 file (non-recursive), got %d", len(files))
		}
	})

	t.Run("recursive finds all files", func(t *testing.T) {
		tmpDir := t.TempDir()
		subDir := filepath.Join(tmpDir, "subdir")
		if err := os.Mkdir(subDir, 0755); err != nil {
			t.Fatalf("failed to create subdir: %v", err)
		}

		// Create WAV in both directories
		createTestWAV(t, filepath.Join(tmpDir, "root.wav"))
		createTestWAV(t, filepath.Join(subDir, "sub.wav"))

		files, errors := scanWavFiles(tmpDir, true)
		if len(errors) > 0 {
			t.Errorf("unexpected errors: %v", errors)
		}
		if len(files) != 2 {
			t.Errorf("expected 2 files (recursive), got %d", len(files))
		}
	})
}