package cmd
import (
"context"
"encoding/json"
"flag"
"fmt"
"os"
"skraak/db"
"skraak/tools"
)
func RunImport(args []string) {
if len(args) < 1 {
printImportUsage()
os.Exit(1)
}
switch args[0] {
case "bulk":
runImportBulk(args[1:])
case "file":
runImportFile(args[1:])
case "folder":
runImportFolder(args[1:])
case "selections":
runImportSelections(args[1:])
case "unstructured":
runImportUnstructured(args[1:])
default:
fmt.Fprintf(os.Stderr, "Unknown import subcommand: %s\n\n", args[0])
printImportUsage()
os.Exit(1)
}
}
func printImportUsage() {
fmt.Fprintf(os.Stderr, "Usage: skraak import <subcommand> [options]\n\n")
fmt.Fprintf(os.Stderr, "Subcommands:\n")
fmt.Fprintf(os.Stderr, " bulk Bulk import WAV files from CSV (structured datasets)\n")
fmt.Fprintf(os.Stderr, " file Import a single WAV file (structured datasets)\n")
fmt.Fprintf(os.Stderr, " folder Import all WAV files from a folder (structured datasets)\n")
fmt.Fprintf(os.Stderr, " selections Import ML selections from folder structure (structured datasets)\n")
fmt.Fprintf(os.Stderr, " unstructured Import WAV files into unstructured dataset (no location/cluster)\n")
fmt.Fprintf(os.Stderr, "\nExamples:\n")
fmt.Fprintf(os.Stderr, " skraak import bulk --db ./db/skraak.duckdb --dataset abc123 --csv import.csv --log progress.log\n")
fmt.Fprintf(os.Stderr, " skraak import file --db ./db/skraak.duckdb --dataset abc123 --location loc456 --cluster clust789 --path /path/to/file.wav\n")
fmt.Fprintf(os.Stderr, " skraak import folder --db ./db/skraak.duckdb --dataset abc123 --location loc456 --cluster clust789 --path /path/to/folder\n")
fmt.Fprintf(os.Stderr, " skraak import selections --db ./db/skraak.duckdb --dataset abc123 --cluster clust789 --path /path/to/Clips_filter_date\n")
fmt.Fprintf(os.Stderr, " skraak import unstructured --db ./db/skraak.duckdb --dataset abc123 --path /path/to/folder\n")
}
func runImportBulk(args []string) {
fs := flag.NewFlagSet("import bulk", flag.ExitOnError)
dbPath := fs.String("db", "", "Path to DuckDB database (required)")
datasetID := fs.String("dataset", "", "Dataset ID (required)")
csvPath := fs.String("csv", "", "Path to CSV file (required)")
logPath := fs.String("log", "", "Path to progress log file (required)")
fs.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: skraak import bulk [options]\n\n")
fmt.Fprintf(os.Stderr, "Bulk import WAV files across multiple locations/clusters using a CSV file.\n\n")
fmt.Fprintf(os.Stderr, "Options:\n")
fs.PrintDefaults()
fmt.Fprintf(os.Stderr, "\nCSV format: location_name,location_id,directory_path,date_range,sample_rate,file_count\n")
fmt.Fprintf(os.Stderr, "\nMonitor progress: tail -f <log-file>\n")
}
if err := fs.Parse(args); err != nil {
os.Exit(1)
}
missing := []string{}
if *dbPath == "" {
missing = append(missing, "--db")
}
if *datasetID == "" {
missing = append(missing, "--dataset")
}
if *csvPath == "" {
missing = append(missing, "--csv")
}
if *logPath == "" {
missing = append(missing, "--log")
}
if len(missing) > 0 {
fmt.Fprintf(os.Stderr, "Error: missing required flags: %v\n\n", missing)
fs.Usage()
os.Exit(1)
}
tools.SetDBPath(*dbPath)
eventLogPath := *dbPath + ".events.jsonl"
db.SetEventLogConfig(db.EventLogConfig{
Enabled: true,
Path: eventLogPath,
})
defer db.CloseEventLog()
input := tools.BulkFileImportInput{
DatasetID: *datasetID,
CSVPath: *csvPath,
LogFilePath: *logPath,
}
fmt.Fprintf(os.Stderr, "Starting bulk import...\n")
fmt.Fprintf(os.Stderr, " Database: %s\n", *dbPath)
fmt.Fprintf(os.Stderr, " Dataset: %s\n", *datasetID)
fmt.Fprintf(os.Stderr, " CSV: %s\n", *csvPath)
fmt.Fprintf(os.Stderr, " Log: %s\n", *logPath)
fmt.Fprintf(os.Stderr, "\nMonitor progress: tail -f %s\n\n", *logPath)
output, err := tools.BulkFileImport(context.Background(), input)
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
if output.TotalLocations > 0 || output.FilesImported > 0 {
printJSON(output)
}
os.Exit(1)
}
printJSON(output)
}
func runImportFile(args []string) {
fs := flag.NewFlagSet("import file", flag.ExitOnError)
dbPath := fs.String("db", "", "Path to DuckDB database (required)")
datasetID := fs.String("dataset", "", "Dataset ID (required)")
locationID := fs.String("location", "", "Location ID (required)")
clusterID := fs.String("cluster", "", "Cluster ID (required)")
filePath := fs.String("path", "", "Path to WAV file (required)")
fs.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: skraak import file [options]\n\n")
fmt.Fprintf(os.Stderr, "Import a single WAV file into the database.\n\n")
fmt.Fprintf(os.Stderr, "Options:\n")
fs.PrintDefaults()
fmt.Fprintf(os.Stderr, "\nExamples:\n")
fmt.Fprintf(os.Stderr, " skraak import file --db ./db/skraak.duckdb --dataset abc123 --location loc456 --cluster clust789 --path /path/to/file.wav\n")
}
if err := fs.Parse(args); err != nil {
os.Exit(1)
}
missing := []string{}
if *dbPath == "" {
missing = append(missing, "--db")
}
if *datasetID == "" {
missing = append(missing, "--dataset")
}
if *locationID == "" {
missing = append(missing, "--location")
}
if *clusterID == "" {
missing = append(missing, "--cluster")
}
if *filePath == "" {
missing = append(missing, "--path")
}
if len(missing) > 0 {
fmt.Fprintf(os.Stderr, "Error: missing required flags: %v\n\n", missing)
fs.Usage()
os.Exit(1)
}
tools.SetDBPath(*dbPath)
eventLogPath := *dbPath + ".events.jsonl"
db.SetEventLogConfig(db.EventLogConfig{
Enabled: true,
Path: eventLogPath,
})
defer db.CloseEventLog()
input := tools.ImportFileInput{
FilePath: *filePath,
DatasetID: *datasetID,
LocationID: *locationID,
ClusterID: *clusterID,
}
fmt.Fprintf(os.Stderr, "Importing file: %s\n", *filePath)
output, err := tools.ImportFile(context.Background(), input)
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
os.Exit(1)
}
printJSON(output)
}
func runImportFolder(args []string) {
fs := flag.NewFlagSet("import folder", flag.ExitOnError)
dbPath := fs.String("db", "", "Path to DuckDB database (required)")
datasetID := fs.String("dataset", "", "Dataset ID (required)")
locationID := fs.String("location", "", "Location ID (required)")
clusterID := fs.String("cluster", "", "Cluster ID (required)")
folderPath := fs.String("path", "", "Path to folder containing WAV files (required)")
recursive := fs.Bool("recursive", true, "Scan subfolders recursively (default: true)")
fs.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: skraak import folder [options]\n\n")
fmt.Fprintf(os.Stderr, "Import all WAV files from a folder into the database.\n\n")
fmt.Fprintf(os.Stderr, "Options:\n")
fs.PrintDefaults()
fmt.Fprintf(os.Stderr, "\nExamples:\n")
fmt.Fprintf(os.Stderr, " skraak import folder --db ./db/skraak.duckdb --dataset abc123 --location loc456 --cluster clust789 --path /path/to/folder\n")
}
if err := fs.Parse(args); err != nil {
os.Exit(1)
}
missing := []string{}
if *dbPath == "" {
missing = append(missing, "--db")
}
if *datasetID == "" {
missing = append(missing, "--dataset")
}
if *locationID == "" {
missing = append(missing, "--location")
}
if *clusterID == "" {
missing = append(missing, "--cluster")
}
if *folderPath == "" {
missing = append(missing, "--path")
}
if len(missing) > 0 {
fmt.Fprintf(os.Stderr, "Error: missing required flags: %v\n\n", missing)
fs.Usage()
os.Exit(1)
}
tools.SetDBPath(*dbPath)
eventLogPath := *dbPath + ".events.jsonl"
db.SetEventLogConfig(db.EventLogConfig{
Enabled: true,
Path: eventLogPath,
})
defer db.CloseEventLog()
input := tools.ImportAudioFilesInput{
FolderPath: *folderPath,
DatasetID: *datasetID,
LocationID: *locationID,
ClusterID: *clusterID,
Recursive: recursive,
}
fmt.Fprintf(os.Stderr, "Importing from folder: %s\n", *folderPath)
if *recursive {
fmt.Fprintf(os.Stderr, "Scanning recursively...\n")
}
output, err := tools.ImportAudioFiles(context.Background(), input)
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
if len(output.FileIDs) > 0 {
printJSON(output)
}
os.Exit(1)
}
printJSON(output)
}
func runImportSelections(args []string) {
fs := flag.NewFlagSet("import selections", flag.ExitOnError)
dbPath := fs.String("db", "", "Path to DuckDB database (required)")
datasetID := fs.String("dataset", "", "Dataset ID (required)")
clusterID := fs.String("cluster", "", "Cluster ID (required)")
folderPath := fs.String("path", "", "Path to Clips_* folder (required)")
fs.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: skraak import selections [options]\n\n")
fmt.Fprintf(os.Stderr, "Import ML-detected kiwi call selections from folder structure.\n")
fmt.Fprintf(os.Stderr, "Expects: Clips_{filter}_{date}/Species/CallType/*.wav+.png\n\n")
fmt.Fprintf(os.Stderr, "Options:\n")
fs.PrintDefaults()
fmt.Fprintf(os.Stderr, "\nExamples:\n")
fmt.Fprintf(os.Stderr, " skraak import selections --db ./db/skraak.duckdb --dataset abc123 --cluster clust789 --path /path/to/Clips_filter_2024-01-15\n")
}
if err := fs.Parse(args); err != nil {
os.Exit(1)
}
missing := []string{}
if *dbPath == "" {
missing = append(missing, "--db")
}
if *datasetID == "" {
missing = append(missing, "--dataset")
}
if *clusterID == "" {
missing = append(missing, "--cluster")
}
if *folderPath == "" {
missing = append(missing, "--path")
}
if len(missing) > 0 {
fmt.Fprintf(os.Stderr, "Error: missing required flags: %v\n\n", missing)
fs.Usage()
os.Exit(1)
}
tools.SetDBPath(*dbPath)
eventLogPath := *dbPath + ".events.jsonl"
db.SetEventLogConfig(db.EventLogConfig{
Enabled: true,
Path: eventLogPath,
})
defer db.CloseEventLog()
input := tools.ImportMLSelectionsInput{
FolderPath: *folderPath,
DatasetID: *datasetID,
ClusterID: *clusterID,
}
fmt.Fprintf(os.Stderr, "Importing selections from: %s\n", *folderPath)
output, err := tools.ImportMLSelections(context.Background(), input)
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
if len(output.SelectionIDs) > 0 {
printJSON(output)
}
os.Exit(1)
}
printJSON(output)
}
func runImportUnstructured(args []string) {
fs := flag.NewFlagSet("import unstructured", flag.ExitOnError)
dbPath := fs.String("db", "", "Path to DuckDB database (required)")
datasetID := fs.String("dataset", "", "Dataset ID (required - must be 'unstructured' type)")
folderPath := fs.String("path", "", "Path to folder containing WAV files (required)")
recursive := fs.Bool("recursive", true, "Scan subfolders recursively (default: true)")
fs.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: skraak import unstructured [options]\n\n")
fmt.Fprintf(os.Stderr, "Import WAV files into an unstructured dataset.\n")
fmt.Fprintf(os.Stderr, "Files are stored with minimal metadata (hash, duration, sample_rate, file modification time).\n")
fmt.Fprintf(os.Stderr, "No location/cluster hierarchy required.\n\n")
fmt.Fprintf(os.Stderr, "Options:\n")
fs.PrintDefaults()
fmt.Fprintf(os.Stderr, "\nExamples:\n")
fmt.Fprintf(os.Stderr, " skraak import unstructured --db ./db/skraak.duckdb --dataset abc123 --path /path/to/folder\n")
fmt.Fprintf(os.Stderr, " skraak import unstructured --db ./db/skraak.duckdb --dataset abc123 --path /path/to/folder --recursive=false\n")
}
if err := fs.Parse(args); err != nil {
os.Exit(1)
}
missing := []string{}
if *dbPath == "" {
missing = append(missing, "--db")
}
if *datasetID == "" {
missing = append(missing, "--dataset")
}
if *folderPath == "" {
missing = append(missing, "--path")
}
if len(missing) > 0 {
fmt.Fprintf(os.Stderr, "Error: missing required flags: %v\n\n", missing)
fs.Usage()
os.Exit(1)
}
tools.SetDBPath(*dbPath)
eventLogPath := *dbPath + ".events.jsonl"
db.SetEventLogConfig(db.EventLogConfig{
Enabled: true,
Path: eventLogPath,
})
defer db.CloseEventLog()
input := tools.ImportUnstructuredInput{
DatasetID: *datasetID,
FolderPath: *folderPath,
Recursive: recursive,
}
fmt.Fprintf(os.Stderr, "Importing into unstructured dataset: %s\n", *datasetID)
fmt.Fprintf(os.Stderr, "Scanning folder: %s\n", *folderPath)
if *recursive {
fmt.Fprintf(os.Stderr, "Scanning recursively...\n")
}
output, err := tools.ImportUnstructured(context.Background(), input)
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
os.Exit(1)
}
printJSON(output)
}
func printJSON(v any) {
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
enc.Encode(v)
}