import_segments.go
package imp
import (
"context"
"database/sql"
"fmt"
"time"
"skraak/datafile"
"skraak/db"
)
// ImportSegmentsInput defines the input parameters for the import_segments tool
type ImportSegmentsInput struct {
DBPath string `json:"db_path"`
Folder string `json:"folder"`
Mapping string `json:"mapping"`
DatasetID string `json:"dataset_id"`
LocationID string `json:"location_id"`
ClusterID string `json:"cluster_id"`
ProgressHandler func(processed, total int, message string)
}
// ImportSegmentsOutput defines the output structure for the import_segments tool
type ImportSegmentsOutput struct {
Summary ImportSegmentsSummary `json:"summary"`
Segments []SegmentImport `json:"segments"`
Errors []ImportSegmentError `json:"errors,omitempty"`
}
// ImportSegmentsSummary provides summary statistics for the import operation
type ImportSegmentsSummary struct {
DataFilesFound int `json:"data_files_found"`
DataFilesProcessed int `json:"data_files_processed"`
TotalSegments int `json:"total_segments"`
ImportedSegments int `json:"imported_segments"`
ImportedLabels int `json:"imported_labels"`
ImportedSubtypes int `json:"imported_subtypes"`
ProcessingTimeMs int64 `json:"processing_time_ms"`
}
// SegmentImport represents an imported segment in the output
type SegmentImport struct {
SegmentID string `json:"segment_id"`
FileName string `json:"file_name"`
StartTime float64 `json:"start_time"`
EndTime float64 `json:"end_time"`
FreqLow float64 `json:"freq_low"`
FreqHigh float64 `json:"freq_high"`
Labels []LabelImport `json:"labels"`
}
// LabelImport represents an imported label in the output
type LabelImport struct {
LabelID string `json:"label_id"`
Species string `json:"species"`
CallType string `json:"calltype,omitempty"`
Filter string `json:"filter"`
Certainty int `json:"certainty"`
Comment string `json:"comment,omitempty"`
}
// ImportSegmentError records errors encountered during segment import
type ImportSegmentError struct {
File string `json:"file,omitempty"`
Stage ImportStage `json:"stage"`
Message string `json:"message"`
}
// scannedDataFile holds parsed data for a .data file
type scannedDataFile struct {
DataPath string
WavPath string
WavHash string
FileID string
Duration float64
Segments []*datafile.Segment
}
// segmentValidation holds the results of pre-import validation (phases B+C).
type segmentValidation struct {
scannedFiles []scannedDataFile
filterIDMap map[string]string
speciesIDMap map[string]string
calltypeIDMap map[string]map[string]string
fileIDMap map[string]scannedDataFile
}
// ImportSegments imports segments from AviaNZ .data files into the database
func ImportSegments(ctx context.Context, input ImportSegmentsInput) (ImportSegmentsOutput, error) {
startTime := time.Now()
var output ImportSegmentsOutput
output.Segments = make([]SegmentImport, 0)
output.Errors = make([]ImportSegmentError, 0)
// Phase A: Input Validation
if err := validateSegmentImportInput(input); err != nil {
return output, err
}
// Load mapping file
mapping, err := LoadMappingFile(input.Mapping)
if err != nil {
return output, fmt.Errorf("failed to load mapping file: %w", err)
}
// Find .data files
dataFiles, err := datafile.FindDataFiles(input.Folder)
if err != nil {
return output, fmt.Errorf("failed to find .data files: %w", err)
}
output.Summary.DataFilesFound = len(dataFiles)
if len(dataFiles) == 0 {
return output, fmt.Errorf("no .data files found in folder: %s", input.Folder)
}
// Phase B+C: Parse data files and validate against DB (read-only)
var val *segmentValidation
err = db.WithReadDB(db.ResolveDBPath(input.DBPath, ""), func(database *sql.DB) error {
var valErrors []ImportSegmentError
var err error
val, valErrors, err = validateAndPrepareSegments(database, input, mapping, dataFiles)
output.Errors = append(output.Errors, valErrors...)
return err
})
if err != nil {
return output, err
}
if val == nil || len(val.fileIDMap) == 0 {
output.Summary.ProcessingTimeMs = time.Since(startTime).Milliseconds()
return output, nil
}
// Phase D: Transactional Import
var importedSegments []SegmentImport
var importedLabels, importedSubtypes int
var fileUpdates []dataFileUpdate
var importErrors []ImportSegmentError
err = db.WithWriteTx(ctx, db.ResolveDBPath(input.DBPath, ""), "import_segments", func(_ *sql.DB, tx *db.LoggedTx) error {
importedSegments, importedLabels, importedSubtypes, fileUpdates, importErrors = importSegmentsIntoDB(
ctx, tx, val.fileIDMap, val.scannedFiles, mapping, val.filterIDMap, val.speciesIDMap, val.calltypeIDMap, input.DatasetID, input.ProgressHandler,
)
return nil
})
output.Errors = append(output.Errors, importErrors...)
output.Segments = append(output.Segments, importedSegments...)
if err != nil {
return output, err
}
// Phase E: Write IDs back to .data files
if len(fileUpdates) > 0 {
writeErrors := writeIDsToDataFiles(fileUpdates)
output.Errors = append(output.Errors, writeErrors...)
}
output.Summary.DataFilesProcessed = len(val.fileIDMap)
output.Summary.TotalSegments = countTotalSegments(val.fileIDMap)
output.Summary.ImportedSegments = len(importedSegments)
output.Summary.ImportedLabels = importedLabels
output.Summary.ImportedSubtypes = importedSubtypes
output.Summary.ProcessingTimeMs = time.Since(startTime).Milliseconds()
return output, nil
}