package cmd
import (
"context"
"encoding/json"
"flag"
"fmt"
"os"
imp "skraak/tools/import"
)
func RunImport(args []string) error {
if len(args) < 1 {
printImportUsage()
return fmt.Errorf("import subcommand required")
}
switch args[0] {
case "bulk":
return runImportBulk(args[1:])
case "file":
return runImportFile(args[1:])
case "folder":
return runImportFolder(args[1:])
case "segments":
return runImportSegments(args[1:])
case "unstructured":
return runImportUnstructured(args[1:])
default:
printImportUsage()
return fmt.Errorf("unknown import subcommand: %s", args[0])
}
}
func printImportUsage() {
fmt.Fprintf(os.Stderr, "Usage: skraak import <subcommand> [options]\n\n")
fmt.Fprintf(os.Stderr, "Subcommands:\n")
fmt.Fprintf(os.Stderr, " file Import a single WAV file (structured datasets)\n")
fmt.Fprintf(os.Stderr, " folder Import all WAV files from a folder (structured datasets)\n")
fmt.Fprintf(os.Stderr, " bulk Bulk import WAV files from CSV (structured datasets)\n")
fmt.Fprintf(os.Stderr, " unstructured Import WAV files into unstructured dataset (no location/cluster)\n")
fmt.Fprintf(os.Stderr, " segments Import segments from AviaNZ .data files (structured datasets)\n")
fmt.Fprintf(os.Stderr, "\nExamples:\n")
fmt.Fprintf(os.Stderr, " skraak import bulk --db ./db/skraak.duckdb --dataset abc123 --csv import.csv --log progress.log\n")
fmt.Fprintf(os.Stderr, " skraak import file --db ./db/skraak.duckdb --dataset abc123 --location loc456 --cluster clust789 --file /path/to/file.wav\n")
fmt.Fprintf(os.Stderr, " skraak import folder --db ./db/skraak.duckdb --dataset abc123 --location loc456 --cluster clust789 --folder /path/to/folder\n")
fmt.Fprintf(os.Stderr, " skraak import segments --db ./db/skraak.duckdb --dataset abc123 --location loc456 --cluster clust789 --folder /path/to/folder --mapping mapping.json\n")
fmt.Fprintf(os.Stderr, " skraak import unstructured --db ./db/skraak.duckdb --dataset abc123 --folder /path/to/folder\n")
}
func runImportBulk(args []string) error {
fs := flag.NewFlagSet("import bulk", flag.ExitOnError)
dbPath := fs.String("db", "", "Path to DuckDB database (required)")
datasetID := fs.String("dataset", "", "Dataset ID (required)")
csvPath := fs.String("csv", "", "Path to CSV file (required)")
logPath := fs.String("log", "", "Path to progress log file (required)")
fs.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: skraak import bulk [options]\n\n")
fmt.Fprintf(os.Stderr, "Bulk import WAV files across multiple locations/clusters using a CSV file.\n\n")
fmt.Fprintf(os.Stderr, "Options:\n")
fs.PrintDefaults()
fmt.Fprintf(os.Stderr, "\nCSV format: location_name,location_id,directory_path,date_range,sample_rate,file_count\n")
fmt.Fprintf(os.Stderr, "\nMonitor progress: tail -f <log-file>\n")
}
if err := fs.Parse(args); err != nil {
return fmt.Errorf("parsing arguments: %w", err)
}
if err := checkFlags(fs, "--db", *dbPath, "--dataset", *datasetID, "--csv", *csvPath, "--log", *logPath); err != nil {
return err
}
defer initEventLog(*dbPath)()
input := imp.BulkFileImportInput{
DBPath: *dbPath,
DatasetID: *datasetID,
CSVPath: *csvPath,
LogFilePath: *logPath,
}
fmt.Fprintf(os.Stderr, "Starting bulk import...\n")
fmt.Fprintf(os.Stderr, " Database: %s\n", *dbPath)
fmt.Fprintf(os.Stderr, " Dataset: %s\n", *datasetID)
fmt.Fprintf(os.Stderr, " CSV: %s\n", *csvPath)
fmt.Fprintf(os.Stderr, " Log: %s\n", *logPath)
fmt.Fprintf(os.Stderr, "\nMonitor progress: tail -f %s\n\n", *logPath)
output, err := imp.BulkFileImport(context.Background(), input)
if err != nil {
if output.TotalLocations > 0 || output.FilesImported > 0 {
_ = printJSON(output)
}
return fmt.Errorf("bulk import: %w", err)
}
return printJSON(output)
}
func runImportFile(args []string) error {
fs := flag.NewFlagSet("import file", flag.ExitOnError)
dbPath := fs.String("db", "", "Path to DuckDB database (required)")
datasetID := fs.String("dataset", "", "Dataset ID (required)")
locationID := fs.String("location", "", "Location ID (required)")
clusterID := fs.String("cluster", "", "Cluster ID (required)")
filePath := fs.String("file", "", "Path to WAV file (required)")
fs.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: skraak import file [options]\n\n")
fmt.Fprintf(os.Stderr, "Import a single WAV file into the database.\n\n")
fmt.Fprintf(os.Stderr, "Options:\n")
fs.PrintDefaults()
fmt.Fprintf(os.Stderr, "\nExamples:\n")
fmt.Fprintf(os.Stderr, " skraak import file --db ./db/skraak.duckdb --dataset abc123 --location loc456 --cluster clust789 --file /path/to/file.wav\n")
}
if err := fs.Parse(args); err != nil {
return fmt.Errorf("parsing arguments: %w", err)
}
if err := checkFlags(fs, "--db", *dbPath, "--dataset", *datasetID, "--location", *locationID, "--cluster", *clusterID, "--file", *filePath); err != nil {
return err
}
defer initEventLog(*dbPath)()
input := imp.ImportFileInput{
DBPath: *dbPath,
FilePath: *filePath,
DatasetID: *datasetID,
LocationID: *locationID,
ClusterID: *clusterID,
}
fmt.Fprintf(os.Stderr, "Importing file: %s\n", *filePath)
output, err := imp.ImportFile(context.Background(), input)
if err != nil {
return fmt.Errorf("import file: %w", err)
}
return printJSON(output)
}
func runImportFolder(args []string) error {
fs := flag.NewFlagSet("import folder", flag.ExitOnError)
dbPath := fs.String("db", "", "Path to DuckDB database (required)")
datasetID := fs.String("dataset", "", "Dataset ID (required)")
locationID := fs.String("location", "", "Location ID (required)")
clusterID := fs.String("cluster", "", "Cluster ID (required)")
folderPath := fs.String("folder", "", "Path to folder containing WAV files (required)")
recursive := fs.Bool("recursive", true, "Scan subfolders recursively (default: true)")
fs.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: skraak import folder [options]\n\n")
fmt.Fprintf(os.Stderr, "Import all WAV files from a folder into the database.\n\n")
fmt.Fprintf(os.Stderr, "Options:\n")
fs.PrintDefaults()
fmt.Fprintf(os.Stderr, "\nExamples:\n")
fmt.Fprintf(os.Stderr, " skraak import folder --db ./db/skraak.duckdb --dataset abc123 --location loc456 --cluster clust789 --folder /path/to/folder\n")
}
if err := fs.Parse(args); err != nil {
return fmt.Errorf("parsing arguments: %w", err)
}
if err := checkFlags(fs, "--db", *dbPath, "--dataset", *datasetID, "--location", *locationID, "--cluster", *clusterID, "--folder", *folderPath); err != nil {
return err
}
defer initEventLog(*dbPath)()
input := imp.ImportAudioFilesInput{
DBPath: *dbPath,
FolderPath: *folderPath,
DatasetID: *datasetID,
LocationID: *locationID,
ClusterID: *clusterID,
Recursive: recursive,
}
fmt.Fprintf(os.Stderr, "Importing from folder: %s\n", *folderPath)
if *recursive {
fmt.Fprintf(os.Stderr, "Scanning recursively...\n")
}
output, err := imp.ImportAudioFiles(context.Background(), input)
if err != nil {
if len(output.FileIDs) > 0 {
_ = printJSON(output)
}
return fmt.Errorf("import folder: %w", err)
}
return printJSON(output)
}
func runImportSegments(args []string) error {
fs := flag.NewFlagSet("import segments", flag.ExitOnError)
dbPath := fs.String("db", "", "Path to DuckDB database (required)")
datasetID := fs.String("dataset", "", "Dataset ID (required)")
locationID := fs.String("location", "", "Location ID (required)")
clusterID := fs.String("cluster", "", "Cluster ID (required)")
folderPath := fs.String("folder", "", "Path to folder containing .data files (required)")
mappingPath := fs.String("mapping", "", "Path to mapping JSON file (required)")
fs.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: skraak import segments [options]\n\n")
fmt.Fprintf(os.Stderr, "Import segments from AviaNZ .data files into the database.\n")
fmt.Fprintf(os.Stderr, "Applies species/calltype mapping from JSON file.\n\n")
fmt.Fprintf(os.Stderr, "Options:\n")
fs.PrintDefaults()
fmt.Fprintf(os.Stderr, "\nMapping file format:\n")
fmt.Fprintf(os.Stderr, " {\n")
fmt.Fprintf(os.Stderr, " \"GSK\": {\"species\": \"Roroa\", \"calltypes\": {\"Male\": \"Male - Solo\"}},\n")
fmt.Fprintf(os.Stderr, " \"Don't Know\": {\"species\": \"Don't Know\"}\n")
fmt.Fprintf(os.Stderr, " }\n")
fmt.Fprintf(os.Stderr, "\nInvariants:\n")
fmt.Fprintf(os.Stderr, " - All file hashes must already exist in database for the cluster\n")
fmt.Fprintf(os.Stderr, " - All files must have no existing labels (fresh imports only)\n")
fmt.Fprintf(os.Stderr, " - All filters, species, and calltypes must exist in database\n")
fmt.Fprintf(os.Stderr, " - Bookmark flags are ignored (not stored in database)\n")
fmt.Fprintf(os.Stderr, "\nExamples:\n")
fmt.Fprintf(os.Stderr, " skraak import segments --db ./db/skraak.duckdb --dataset dset_id123 --location loc_id456 --cluster clust_id789 --folder /path/to/data --mapping mapping.json\n")
}
if err := fs.Parse(args); err != nil {
return fmt.Errorf("parsing arguments: %w", err)
}
if err := checkFlags(fs, "--db", *dbPath, "--dataset", *datasetID, "--location", *locationID, "--cluster", *clusterID, "--folder", *folderPath, "--mapping", *mappingPath); err != nil {
return err
}
defer initEventLog(*dbPath)()
input := imp.ImportSegmentsInput{
DBPath: *dbPath,
Folder: *folderPath,
Mapping: *mappingPath,
DatasetID: *datasetID,
LocationID: *locationID,
ClusterID: *clusterID,
ProgressHandler: func(processed, total int, message string) {
if total > 0 {
percent := float64(processed) / float64(total) * 100
fmt.Fprintf(os.Stderr, "\rProcessing .data files: %d/%d (%.0f%%) - %s", processed, total, percent, message)
if processed == total {
fmt.Fprintf(os.Stderr, "\n")
}
}
},
}
fmt.Fprintf(os.Stderr, "Importing segments from: %s\n", *folderPath)
fmt.Fprintf(os.Stderr, "Using mapping: %s\n", *mappingPath)
output, err := imp.ImportSegments(context.Background(), input)
if err != nil {
fmt.Fprintf(os.Stderr, "\n")
if len(output.Segments) > 0 || len(output.Errors) > 0 {
_ = printJSON(output)
}
return fmt.Errorf("import segments: %w", err)
}
fmt.Fprintf(os.Stderr, "\nImport complete:\n")
fmt.Fprintf(os.Stderr, " Data files processed: %d\n", output.Summary.DataFilesProcessed)
fmt.Fprintf(os.Stderr, " Segments imported: %d\n", output.Summary.ImportedSegments)
fmt.Fprintf(os.Stderr, " Labels imported: %d\n", output.Summary.ImportedLabels)
fmt.Fprintf(os.Stderr, " Subtypes imported: %d\n", output.Summary.ImportedSubtypes)
return printJSON(output)
}
func runImportUnstructured(args []string) error {
fs := flag.NewFlagSet("import unstructured", flag.ExitOnError)
dbPath := fs.String("db", "", "Path to DuckDB database (required)")
datasetID := fs.String("dataset", "", "Dataset ID (required - must be 'unstructured' type)")
folderPath := fs.String("folder", "", "Path to folder containing WAV files (required)")
recursive := fs.Bool("recursive", true, "Scan subfolders recursively (default: true)")
fs.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: skraak import unstructured [options]\n\n")
fmt.Fprintf(os.Stderr, "Import WAV files into an unstructured dataset.\n")
fmt.Fprintf(os.Stderr, "Files are stored with minimal metadata (hash, duration, sample_rate, file modification time).\n")
fmt.Fprintf(os.Stderr, "No location/cluster hierarchy required.\n\n")
fmt.Fprintf(os.Stderr, "Options:\n")
fs.PrintDefaults()
fmt.Fprintf(os.Stderr, "\nExamples:\n")
fmt.Fprintf(os.Stderr, " skraak import unstructured --db ./db/skraak.duckdb --dataset abc123 --folder /path/to/folder\n")
fmt.Fprintf(os.Stderr, " skraak import unstructured --db ./db/skraak.duckdb --dataset abc123 --folder /path/to/folder --recursive=false\n")
}
if err := fs.Parse(args); err != nil {
return fmt.Errorf("parsing arguments: %w", err)
}
if err := checkFlags(fs, "--db", *dbPath, "--dataset", *datasetID, "--folder", *folderPath); err != nil {
return err
}
defer initEventLog(*dbPath)()
input := imp.ImportUnstructuredInput{
DBPath: *dbPath,
DatasetID: *datasetID,
FolderPath: *folderPath,
Recursive: recursive,
}
fmt.Fprintf(os.Stderr, "Importing into unstructured dataset: %s\n", *datasetID)
fmt.Fprintf(os.Stderr, "Scanning folder: %s\n", *folderPath)
if *recursive {
fmt.Fprintf(os.Stderr, "Scanning recursively...\n")
}
output, err := imp.ImportUnstructured(context.Background(), input)
if err != nil {
return fmt.Errorf("import unstructured: %w", err)
}
return printJSON(output)
}
func printJSON(v any) error {
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
if err := enc.Encode(v); err != nil {
return fmt.Errorf("encoding output: %w", err)
}
return nil
}