bulk_file_import_test.go
package imp
import (
"context"
"database/sql"
"os"
"path/filepath"
"testing"
)
func TestBulkFileImport(t *testing.T) {
ctx := context.Background()
t.Run("happy path - import from CSV", func(t *testing.T) {
dbPath := setupFileBasedTestDB(t)
// Create temp folder with WAV files
tmpDir := t.TempDir()
wavDir := filepath.Join(tmpDir, "recordings")
if err := os.Mkdir(wavDir, 0755); err != nil {
t.Fatalf("failed to create wav dir: %v", err)
}
// Create a WAV file
wavPath := filepath.Join(wavDir, "test_recording.wav")
createTestWAV(t, wavPath)
// Create CSV file
csvPath := createTestCSVFile(t, tmpDir, [][]string{
{"Test Location", "loctest00001", wavDir, "2024-01", "16000", "1"},
})
// Create log file
logPath := createTestLogFile(t, tmpDir)
// Import
output, err := BulkFileImport(ctx, BulkFileImportInput{
DBPath: dbPath,
DatasetID: "dstest000001",
CSVPath: csvPath,
LogFilePath: logPath,
})
if err != nil {
t.Fatalf("BulkFileImport failed: %v", err)
}
// Verify output
if output.TotalLocations != 1 {
t.Errorf("expected 1 location, got %d", output.TotalLocations)
}
if output.TotalFilesScanned == 0 {
t.Error("expected some files scanned")
}
if output.FilesImported == 0 {
t.Error("expected some files imported")
}
// Verify cluster was created
database, err := sql.Open("duckdb", dbPath)
if err != nil {
t.Fatalf("failed to open database: %v", err)
}
defer database.Close()
var clusterCount int
err = database.QueryRow("SELECT COUNT(*) FROM cluster WHERE active = true").Scan(&clusterCount)
if err != nil {
t.Fatalf("failed to query clusters: %v", err)
}
if clusterCount < 1 {
t.Errorf("expected at least 1 cluster, got %d", clusterCount)
}
})
t.Run("missing CSV file - error", func(t *testing.T) {
dbPath := setupFileBasedTestDB(t)
tmpDir := t.TempDir()
logPath := createTestLogFile(t, tmpDir)
_, err := BulkFileImport(ctx, BulkFileImportInput{
DBPath: dbPath,
DatasetID: "dstest000001",
CSVPath: filepath.Join(tmpDir, "nonexistent.csv"),
LogFilePath: logPath,
})
if err == nil {
t.Error("expected error for missing CSV file")
}
})
t.Run("invalid dataset ID", func(t *testing.T) {
dbPath := setupFileBasedTestDB(t)
tmpDir := t.TempDir()
csvPath := createTestCSVFile(t, tmpDir, [][]string{})
logPath := createTestLogFile(t, tmpDir)
_, err := BulkFileImport(ctx, BulkFileImportInput{
DBPath: dbPath,
DatasetID: "invalid_id",
CSVPath: csvPath,
LogFilePath: logPath,
})
if err == nil {
t.Error("expected error for invalid dataset ID")
}
})
t.Run("location not in dataset - error", func(t *testing.T) {
dbPath := setupFileBasedTestDB(t)
tmpDir := t.TempDir()
wavDir := filepath.Join(tmpDir, "recordings")
if err := os.Mkdir(wavDir, 0755); err != nil {
t.Fatalf("failed to create wav dir: %v", err)
}
createTestWAV(t, filepath.Join(wavDir, "test.wav"))
// Use invalid location ID
csvPath := createTestCSVFile(t, tmpDir, [][]string{
{"Test Location", "invalid_loc!", wavDir, "2024-01", "16000", "1"},
})
logPath := createTestLogFile(t, tmpDir)
_, err := BulkFileImport(ctx, BulkFileImportInput{
DBPath: dbPath,
DatasetID: "dstest000001",
CSVPath: csvPath,
LogFilePath: logPath,
})
if err == nil {
t.Error("expected error for location not in dataset")
}
})
}
func TestParseBulkCSVRow(t *testing.T) {
t.Run("valid row", func(t *testing.T) {
row := []string{"Test Loc", "loctest00001", "/path/to/dir", "2024-01", "16000", "10"}
result, err := parseBulkCSVRow(row)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.LocationName != "Test Loc" {
t.Errorf("expected Test Loc, got %s", result.LocationName)
}
if result.LocationID != "loctest00001" {
t.Errorf("expected loctest00001, got %s", result.LocationID)
}
if result.SampleRate != 16000 {
t.Errorf("expected 16000, got %d", result.SampleRate)
}
if result.FileCount != 10 {
t.Errorf("expected 10, got %d", result.FileCount)
}
})
t.Run("insufficient columns", func(t *testing.T) {
row := []string{"a", "b", "c"}
_, err := parseBulkCSVRow(row)
if err == nil {
t.Error("expected error for insufficient columns")
}
})
t.Run("empty location_name", func(t *testing.T) {
row := []string{"", "loctest00001", "/path", "2024-01", "16000", "10"}
_, err := parseBulkCSVRow(row)
if err == nil {
t.Error("expected error for empty location_name")
}
})
t.Run("empty directory_path", func(t *testing.T) {
row := []string{"Test Loc", "loctest00001", "", "2024-01", "16000", "10"}
_, err := parseBulkCSVRow(row)
if err == nil {
t.Error("expected error for empty directory_path")
}
})
t.Run("invalid sample_rate", func(t *testing.T) {
row := []string{"Test Loc", "loctest00001", "/path", "2024-01", "notanumber", "10"}
_, err := parseBulkCSVRow(row)
if err == nil {
t.Error("expected error for invalid sample_rate")
}
})
t.Run("invalid location_id format", func(t *testing.T) {
row := []string{"Test Loc", "badid", "/path", "2024-01", "16000", "10"}
_, err := parseBulkCSVRow(row)
if err == nil {
t.Error("expected error for invalid location_id format")
}
})
}
func TestBulkReadCSV(t *testing.T) {
t.Run("reads valid CSV", func(t *testing.T) {
tmpDir := t.TempDir()
csvPath := createTestCSVFile(t, tmpDir, [][]string{
{"Loc1", "loctest00001", "/path1", "2024-01", "16000", "10"},
{"Loc2", "loctest00002", "/path2", "2024-02", "48000", "20"},
})
locations, err := bulkReadCSV(csvPath)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(locations) != 2 {
t.Errorf("expected 2 locations, got %d", len(locations))
}
})
t.Run("empty CSV - error", func(t *testing.T) {
tmpDir := t.TempDir()
csvPath := filepath.Join(tmpDir, "empty.csv")
// Write completely empty file (no header either)
if err := os.WriteFile(csvPath, []byte(""), 0644); err != nil {
t.Fatalf("failed to write CSV: %v", err)
}
_, err := bulkReadCSV(csvPath)
if err == nil {
t.Error("expected error for empty CSV")
}
})
}