Fork channel

Create a new channel as a copy of main.

Rename channel

Rename main to:

Delete channel

Delete main? This cannot be undone.

import_segments.go
package imp

import (
	"context"
	"database/sql"
	"fmt"
	"time"

	"skraak/datafile"
	"skraak/db"
)

// ImportSegmentsInput defines the input parameters for the import_segments tool
type ImportSegmentsInput struct {
	DBPath          string `json:"db_path"`
	Folder          string `json:"folder"`
	Mapping         string `json:"mapping"`
	DatasetID       string `json:"dataset_id"`
	LocationID      string `json:"location_id"`
	ClusterID       string `json:"cluster_id"`
	ProgressHandler func(processed, total int, message string)
}

// ImportSegmentsOutput defines the output structure for the import_segments tool
type ImportSegmentsOutput struct {
	Summary  ImportSegmentsSummary `json:"summary"`
	Segments []SegmentImport       `json:"segments"`
	Errors   []ImportSegmentError  `json:"errors,omitempty"`
}

// ImportSegmentsSummary provides summary statistics for the import operation
type ImportSegmentsSummary struct {
	DataFilesFound     int   `json:"data_files_found"`
	DataFilesProcessed int   `json:"data_files_processed"`
	TotalSegments      int   `json:"total_segments"`
	ImportedSegments   int   `json:"imported_segments"`
	ImportedLabels     int   `json:"imported_labels"`
	ImportedSubtypes   int   `json:"imported_subtypes"`
	ProcessingTimeMs   int64 `json:"processing_time_ms"`
}

// SegmentImport represents an imported segment in the output
type SegmentImport struct {
	SegmentID string        `json:"segment_id"`
	FileName  string        `json:"file_name"`
	StartTime float64       `json:"start_time"`
	EndTime   float64       `json:"end_time"`
	FreqLow   float64       `json:"freq_low"`
	FreqHigh  float64       `json:"freq_high"`
	Labels    []LabelImport `json:"labels"`
}

// LabelImport represents an imported label in the output
type LabelImport struct {
	LabelID   string `json:"label_id"`
	Species   string `json:"species"`
	CallType  string `json:"calltype,omitempty"`
	Filter    string `json:"filter"`
	Certainty int    `json:"certainty"`
	Comment   string `json:"comment,omitempty"`
}

// ImportSegmentError records errors encountered during segment import
type ImportSegmentError struct {
	File    string      `json:"file,omitempty"`
	Stage   ImportStage `json:"stage"`
	Message string      `json:"message"`
}

// scannedDataFile holds parsed data for a .data file
type scannedDataFile struct {
	DataPath string
	WavPath  string
	WavHash  string
	FileID   string
	Duration float64
	Segments []*datafile.Segment
}

// segmentValidation holds the results of pre-import validation (phases B+C).
type segmentValidation struct {
	scannedFiles  []scannedDataFile
	filterIDMap   map[string]string
	speciesIDMap  map[string]string
	calltypeIDMap map[string]map[string]string
	fileIDMap     map[string]scannedDataFile
}

// ImportSegments imports segments from AviaNZ .data files into the database
func ImportSegments(ctx context.Context, input ImportSegmentsInput) (ImportSegmentsOutput, error) {
	startTime := time.Now()
	var output ImportSegmentsOutput
	output.Segments = make([]SegmentImport, 0)
	output.Errors = make([]ImportSegmentError, 0)

	// Phase A: Input Validation
	if err := validateSegmentImportInput(input); err != nil {
		return output, err
	}

	// Load mapping file
	mapping, err := LoadMappingFile(input.Mapping)
	if err != nil {
		return output, fmt.Errorf("failed to load mapping file: %w", err)
	}

	// Find .data files
	dataFiles, err := datafile.FindDataFiles(input.Folder)
	if err != nil {
		return output, fmt.Errorf("failed to find .data files: %w", err)
	}
	output.Summary.DataFilesFound = len(dataFiles)

	if len(dataFiles) == 0 {
		return output, fmt.Errorf("no .data files found in folder: %s", input.Folder)
	}

	// Phase B+C: Parse data files and validate against DB (read-only)
	var val *segmentValidation
	err = db.WithReadDB(db.ResolveDBPath(input.DBPath, ""), func(database *sql.DB) error {
		var valErrors []ImportSegmentError
		var err error
		val, valErrors, err = validateAndPrepareSegments(database, input, mapping, dataFiles)
		output.Errors = append(output.Errors, valErrors...)
		return err
	})
	if err != nil {
		return output, err
	}
	if val == nil || len(val.fileIDMap) == 0 {
		output.Summary.ProcessingTimeMs = time.Since(startTime).Milliseconds()
		return output, nil
	}

	// Phase D: Transactional Import
	var importedSegments []SegmentImport
	var importedLabels, importedSubtypes int
	var fileUpdates []dataFileUpdate
	var importErrors []ImportSegmentError
	err = db.WithWriteTx(ctx, db.ResolveDBPath(input.DBPath, ""), "import_segments", func(_ *sql.DB, tx *db.LoggedTx) error {
		importedSegments, importedLabels, importedSubtypes, fileUpdates, importErrors = importSegmentsIntoDB(
			ctx, tx, val.fileIDMap, val.scannedFiles, mapping, val.filterIDMap, val.speciesIDMap, val.calltypeIDMap, input.DatasetID, input.ProgressHandler,
		)
		return nil
	})
	output.Errors = append(output.Errors, importErrors...)
	output.Segments = append(output.Segments, importedSegments...)
	if err != nil {
		return output, err
	}

	// Phase E: Write IDs back to .data files
	if len(fileUpdates) > 0 {
		writeErrors := writeIDsToDataFiles(fileUpdates)
		output.Errors = append(output.Errors, writeErrors...)
	}

	output.Summary.DataFilesProcessed = len(val.fileIDMap)
	output.Summary.TotalSegments = countTotalSegments(val.fileIDMap)
	output.Summary.ImportedSegments = len(importedSegments)
	output.Summary.ImportedLabels = importedLabels
	output.Summary.ImportedSubtypes = importedSubtypes
	output.Summary.ProcessingTimeMs = time.Since(startTime).Milliseconds()

	return output, nil
}