export_test.go
package tools
import (
"os"
"path/filepath"
"strings"
"testing"
)
// --- datasetTables manifest completeness ---
func TestDatasetTablesManifest(t *testing.T) {
tableNames := make(map[string]bool)
for _, tr := range datasetTables {
tableNames[tr.Table] = true
}
expected := []string{
"dataset", "location", "cluster", "segment", "file_dataset",
"file", "moth_metadata", "file_metadata", "label_metadata",
"label", "label_subtype",
"ebird_taxonomy", "species", "call_type", "cyclic_recording_pattern", "filter",
}
for _, name := range expected {
if !tableNames[name] {
t.Errorf("manifest missing table: %s", name)
}
}
}
func TestDatasetTablesRelations(t *testing.T) {
validRelations := map[string]bool{"owned": true, "owned-via": true, "copy": true}
for _, tr := range datasetTables {
if !validRelations[tr.Relation] {
t.Errorf("table %s has invalid relation: %s", tr.Table, tr.Relation)
}
if tr.Relation == "owned-via" && tr.ViaTable == "" {
t.Errorf("table %s is owned-via but has no ViaTable", tr.Table)
}
if tr.Relation == "owned" && tr.FilterCol == "" {
t.Errorf("table %s is owned but has no FilterCol", tr.Table)
}
if tr.Relation == "copy" && (tr.FilterCol != "" || tr.ViaTable != "") {
t.Errorf("copy table %s should not have FilterCol/ViaTable", tr.Table)
}
}
}
// --- orderByFKDependency ---
func TestOrderByFKDependency(t *testing.T) {
tables := []TableRelationship{
{Table: "label", Relation: "owned-via"},
{Table: "dataset", Relation: "owned"},
{Table: "file", Relation: "owned-via"},
{Table: "location", Relation: "owned"},
{Table: "species", Relation: "copy"},
}
// FK order: dataset, location, species, file, label
fkOrder := []string{"dataset", "location", "species", "file", "label"}
sorted := orderByFKDependency(tables, fkOrder)
// Verify sorted order matches FK order
expectedOrder := []string{"dataset", "location", "species", "file", "label"}
for i, tr := range sorted {
if tr.Table != expectedOrder[i] {
t.Errorf("position %d: got %s, want %s", i, tr.Table, expectedOrder[i])
}
}
}
func TestOrderByFKDependency_UnknownTable(t *testing.T) {
tables := []TableRelationship{
{Table: "unknown_table", Relation: "copy"},
{Table: "dataset", Relation: "owned"},
}
// Unknown table gets order 0 (default int), so it sorts before known tables
sorted := orderByFKDependency(tables, []string{"dataset"})
if sorted[0].Table != "unknown_table" {
t.Errorf("unknown table should sort first (order=0), got %s", sorted[0].Table)
}
}
// --- buildOwnedViaQuery ---
func TestBuildOwnedViaQuery(t *testing.T) {
tests := []struct {
name string
tr TableRelationship
wantVia string // should contain the ViaTable name in the query
}{
{"via cluster", TableRelationship{Table: "file", FilterCol: "cluster_id", ViaTable: "cluster"}, "cluster"},
{"via file", TableRelationship{Table: "moth_metadata", FilterCol: "file_id", ViaTable: "file"}, "file"},
{"via segment", TableRelationship{Table: "label", FilterCol: "segment_id", ViaTable: "segment"}, "segment"},
{"via label", TableRelationship{Table: "label_metadata", FilterCol: "label_id", ViaTable: "label"}, "label"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
query := buildOwnedViaQuery(tt.tr, "ds_test")
if !strings.Contains(query, tt.tr.Table) {
t.Errorf("query doesn't contain table name %s: %s", tt.tr.Table, query)
}
if !strings.Contains(query, tt.wantVia) {
t.Errorf("query doesn't contain via table %s: %s", tt.wantVia, query)
}
if !strings.Contains(query, "dataset_id = ?") {
t.Errorf("query doesn't contain dataset_id parameter: %s", query)
}
})
}
}
// --- buildCountOwnedViaQuery ---
func TestBuildCountOwnedViaQuery(t *testing.T) {
tests := []struct {
name string
tr TableRelationship
wantVia string
}{
{"via cluster", TableRelationship{Table: "file", FilterCol: "cluster_id", ViaTable: "cluster"}, "cluster"},
{"via file", TableRelationship{Table: "moth_metadata", FilterCol: "file_id", ViaTable: "file"}, "file"},
{"via segment", TableRelationship{Table: "label", FilterCol: "segment_id", ViaTable: "segment"}, "segment"},
{"via label", TableRelationship{Table: "label_subtype", FilterCol: "label_id", ViaTable: "label"}, "label"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
query := buildCountOwnedViaQuery(tt.tr)
if !strings.Contains(query, "COUNT(*)") {
t.Errorf("count query doesn't contain COUNT(*): %s", query)
}
if !strings.Contains(query, tt.tr.Table) {
t.Errorf("count query doesn't contain table %s: %s", tt.tr.Table, query)
}
if !strings.Contains(query, "dataset_id = ?") {
t.Errorf("count query doesn't contain dataset_id param: %s", query)
}
})
}
}
// --- checkOutputFile ---
func TestCheckOutputFile(t *testing.T) {
t.Run("dry run always passes", func(t *testing.T) {
err := checkOutputFile(ExportDatasetInput{DryRun: true})
if err != nil {
t.Errorf("dry run should not error: %v", err)
}
})
t.Run("nonexistent output passes", func(t *testing.T) {
err := checkOutputFile(ExportDatasetInput{
Output: filepath.Join(t.TempDir(), "nonexistent.duckdb"),
})
if err != nil {
t.Errorf("nonexistent output should pass: %v", err)
}
})
t.Run("existing output without force fails", func(t *testing.T) {
dir := t.TempDir()
existing := filepath.Join(dir, "existing.duckdb")
if err := os.WriteFile(existing, []byte("test"), 0644); err != nil {
t.Fatalf("setup: %v", err)
}
err := checkOutputFile(ExportDatasetInput{Output: existing})
if err == nil || !strings.Contains(err.Error(), "exists") {
t.Errorf("expected exists error, got: %v", err)
}
})
t.Run("existing output with force passes", func(t *testing.T) {
dir := t.TempDir()
existing := filepath.Join(dir, "existing.duckdb")
if err := os.WriteFile(existing, []byte("test"), 0644); err != nil {
t.Fatalf("setup: %v", err)
}
err := checkOutputFile(ExportDatasetInput{Output: existing, Force: true})
if err != nil {
t.Errorf("force should overwrite: %v", err)
}
})
}
// --- createOutputDir ---
func TestCreateOutputDir(t *testing.T) {
t.Run("creates parent directory", func(t *testing.T) {
base := t.TempDir()
outputPath := filepath.Join(base, "subdir", "output.duckdb")
if err := createOutputDir(outputPath); err != nil {
t.Fatalf("unexpected error: %v", err)
}
if _, err := os.Stat(filepath.Join(base, "subdir")); os.IsNotExist(err) {
t.Error("subdirectory was not created")
}
})
t.Run("current dir passes", func(t *testing.T) {
if err := createOutputDir("output.duckdb"); err != nil {
t.Errorf("current dir should pass: %v", err)
}
})
}
// --- createEventLogFile ---
func TestCreateEventLogFile(t *testing.T) {
t.Run("creates empty file", func(t *testing.T) {
base := t.TempDir()
path := filepath.Join(base, "test.duckdb")
if err := createEventLogFile(path); err != nil {
t.Fatalf("unexpected error: %v", err)
}
data, err := os.ReadFile(path + ".events.jsonl")
if err != nil {
t.Fatalf("failed to read event file: %v", err)
}
if len(data) != 0 {
t.Errorf("expected empty file, got %d bytes", len(data))
}
})
}