big tidy up of tools/

quietlight
May 11, 2026, 8:36 PM
3DVPQOKB6BX63XSBIYYCPWBL2RBG3LXZS3XPQBANJP2FWVRAOVZQC

Dependencies

Change contents

  • replacement in tui/classify.go at line 15
    [6.227304][6.227304:227320]()
    "skraak/tools"
    [6.227304]
    [6.227320]
    "skraak/tools/calls"
  • replacement in tui/classify.go at line 96
    [6.229112][6.229112:229147]()
    state *tools.ClassifyState
    [6.229112]
    [6.229147]
    state *calls.ClassifyState
  • replacement in tui/classify.go at line 121
    [6.229957][6.229957:230002]()
    func New(state *tools.ClassifyState) Model {
    [6.229957]
    [6.230002]
    func New(state *calls.ClassifyState) Model {
  • replacement in tui/classify.go at line 124
    [6.230116][6.230116:230180]()
    sorted := make([]tools.KeyBinding, len(state.Config.Bindings))
    [6.230116]
    [6.230180]
    sorted := make([]calls.KeyBinding, len(state.Config.Bindings))
  • replacement in tui/classify.go at line 369
    [6.13046][6.13046:13119]()
    m.state.ApplyBinding(&tools.BindingResult{Species: result.Species})
    [6.13046]
    [6.13119]
    m.state.ApplyBinding(&calls.BindingResult{Species: result.Species})
  • replacement in tui/classify.go at line 550
    [6.239778][6.239778:239843]()
    func saveClip(state *tools.ClassifyState, prefix string) error {
    [6.239778]
    [6.239843]
    func saveClip(state *calls.ClassifyState, prefix string) error {
  • replacement in tui/classify.go at line 666
    [6.242744][6.242744:242827]()
    func playCurrentSegmentAtSpeed(state *tools.ClassifyState, speed float64) string {
    [6.242744]
    [6.242827]
    func playCurrentSegmentAtSpeed(state *calls.ClassifyState, speed float64) string {
  • replacement in tui/classify.go at line 800
    [6.245750][6.1110:1204]()
    fmt.Fprintf(b, " • %s\n", tools.FormatLabels([]*utils.Label{l}, m.state.Config.Filter))
    [6.245750]
    [6.245845]
    fmt.Fprintf(b, " • %s\n", calls.FormatLabels([]*utils.Label{l}, m.state.Config.Filter))
  • replacement in tui/classify.go at line 832
    [6.247228][6.247228:247337]()
    func generateSpectrogramImage(state *tools.ClassifyState, dataPath string, seg *utils.Segment) image.Image {
    [6.247228]
    [6.247337]
    func generateSpectrogramImage(state *calls.ClassifyState, dataPath string, seg *utils.Segment) image.Image {
  • replacement in tui/classify.go at line 849
    [6.247958][6.247958:248078]()
    func inlineImageCmd(state *tools.ClassifyState, protocol utils.ImageProtocol, gen uint64, currentGen *uint64) tea.Cmd {
    [6.247958]
    [6.248078]
    func inlineImageCmd(state *calls.ClassifyState, protocol utils.ImageProtocol, gen uint64, currentGen *uint64) tea.Cmd {
  • file deletion: avianz_types.go (----------)
    [6.248737][6.524:563](),[6.563][6.1:1]()
    package tools
    // AviaNZMeta is the metadata element in a .data file
    type AviaNZMeta struct {
    Operator string `json:"Operator"`
    Reviewer *string `json:"Reviewer,omitempty"`
    Duration float64 `json:"Duration"`
    }
    // AviaNZLabel represents a species label in a segment
    type AviaNZLabel struct {
    Species string `json:"species"`
    Certainty int `json:"certainty"`
    Filter string `json:"filter"`
    }
    // AviaNZSegment represents a detection segment [start, end, freq_low, freq_high, labels]
    type AviaNZSegment [5]any
  • file deletion: parallel_aggregate.go (----------)
    [6.248737][6.2367:2412](),[6.2412][6.1:1]()
    package tools
    import (
    "fmt"
    "os"
    "path/filepath"
    "sort"
    "sync/atomic"
    )
    // parallelResult is the common interface for birda/raven worker results.
    type parallelResult interface {
    filePath() string
    getCalls() []ClusteredCall
    wasWritten() bool
    wasSkipped() bool
    getError() error
    }
    // aggregateStats holds the collected results from a parallel fan-out/fan-in.
    type aggregateStats struct {
    calls []ClusteredCall
    speciesCount map[string]int
    dataFilesWritten int
    dataFilesSkipped int
    filesProcessed int
    filesDeleted int
    firstErr error
    }
    // aggregateResults collects results from a channel of parallelResult values,
    // handling error tracking, species counting, optional file deletion, and
    // progress reporting. Returns the aggregated stats.
    func aggregateResults(
    results <-chan parallelResult,
    total int,
    processed *atomic.Int32,
    deleteFiles bool,
    progressHandler func(int, int, string),
    ) aggregateStats {
    var stats aggregateStats
    stats.speciesCount = make(map[string]int)
    for result := range results {
    if err := result.getError(); err != nil && stats.firstErr == nil {
    stats.firstErr = err
    }
    if result.wasWritten() {
    stats.dataFilesWritten++
    }
    if result.wasSkipped() {
    stats.dataFilesSkipped++
    }
    for _, call := range result.getCalls() {
    stats.calls = append(stats.calls, call)
    stats.speciesCount[call.EbirdCode]++
    }
    stats.filesProcessed++
    stats.maybeDeleteFile(deleteFiles, result)
    if progressHandler != nil {
    current := int(processed.Add(1))
    progressHandler(current, total, filepath.Base(result.filePath()))
    }
    }
    return stats
    }
    // maybeDeleteFile deletes the source file if requested and it was successfully processed.
    func (s *aggregateStats) maybeDeleteFile(deleteFiles bool, result parallelResult) {
    if !deleteFiles || !result.wasWritten() {
    return
    }
    if err := os.Remove(result.filePath()); err != nil {
    if s.firstErr == nil {
    s.firstErr = fmt.Errorf("failed to delete %s: %w", result.filePath(), err)
    }
    } else {
    s.filesDeleted++
    }
    }
    // sortCallsByFileAndTime sorts calls by filename, then start time.
    func sortCallsByFileAndTime(calls []ClusteredCall) {
    sort.Slice(calls, func(i, j int) bool {
    if calls[i].File != calls[j].File {
    return calls[i].File < calls[j].File
    }
    return calls[i].StartTime < calls[j].StartTime
    })
    }
  • file deletion: calls_from_common.go (----------)
    [6.248737][6.7706:7750](),[6.7750][6.1:1]()
    package tools
    import (
    "fmt"
    "os"
    "path/filepath"
    "sort"
    "sync"
    "sync/atomic"
    )
    // CallsFromSourceInput defines the common input for calls-from-source tools
    type CallsFromSourceInput struct {
    Folder string `json:"folder"`
    File string `json:"file"`
    Delete bool `json:"delete"`
    ProgressHandler ProgressHandler `json:"-"` // Optional progress callback
    }
    // CallsFromSourceOutput defines the common output for calls-from-source tools
    type CallsFromSourceOutput struct {
    Calls []ClusteredCall `json:"calls"`
    TotalCalls int `json:"total_calls"`
    SpeciesCount map[string]int `json:"species_count"`
    DataFilesWritten int `json:"data_files_written"`
    DataFilesSkipped int `json:"data_files_skipped"`
    FilesProcessed int `json:"files_processed"`
    FilesDeleted int `json:"files_deleted"`
    Filter string `json:"filter"`
    Error *string `json:"error,omitempty"`
    }
    // CallSource abstracts a source of bird call data (Raven, BirdNET, etc.)
    type CallSource interface {
    // Name returns the display name (e.g. "Raven", "BirdNET")
    Name() string
    // FindFiles discovers source files in the given folder
    FindFiles(folder string) ([]string, error)
    // ProcessFile processes a single source file and returns calls, write/skip status
    ProcessFile(path string, cache *DirCache) (calls []ClusteredCall, written, skipped bool, err error)
    }
    // callsFromSource is the shared entry point for all call source tools.
    func callsFromSource(src CallSource, input CallsFromSourceInput) (CallsFromSourceOutput, error) {
    var output CallsFromSourceOutput
    output.Filter = src.Name()
    // Collect source files to process
    var files []string
    if input.File != "" {
    files = []string{input.File}
    } else if input.Folder != "" {
    var err error
    files, err = src.FindFiles(input.Folder)
    if err != nil {
    errMsg := fmt.Sprintf("Failed to find %s files: %v", src.Name(), err)
    output.Error = &errMsg
    return output, fmt.Errorf("%s", errMsg)
    }
    } else {
    errMsg := "Either --folder or --file must be specified"
    output.Error = &errMsg
    return output, fmt.Errorf("%s", errMsg)
    }
    if len(files) == 0 {
    errMsg := fmt.Sprintf("No %s files found", src.Name())
    output.Error = &errMsg
    return output, fmt.Errorf("%s", errMsg)
    }
    // Single file or small batch: process sequentially (avoid goroutine overhead)
    if len(files) < 10 {
    return callsFromSourceSequential(src, input, files)
    }
    // Large batch: parallel processing with DirCache
    return callsFromSourceParallel(src, input, files)
    }
    // callsFromSourceSequential processes source files one at a time (for small batches)
    func callsFromSourceSequential(src CallSource, input CallsFromSourceInput, files []string) (CallsFromSourceOutput, error) {
    var output CallsFromSourceOutput
    output.Filter = src.Name()
    // Build DirCache once for the folder
    dirCaches := make(map[string]*DirCache)
    if input.Folder != "" {
    dirCaches[input.Folder] = NewDirCache(input.Folder)
    }
    speciesCount := make(map[string]int)
    var allCalls []ClusteredCall
    dataFilesWritten := 0
    dataFilesSkipped := 0
    filesProcessed := 0
    filesDeleted := 0
    for _, file := range files {
    dir := filepath.Dir(file)
    cache := dirCaches[dir]
    if cache == nil {
    cache = NewDirCache(dir)
    dirCaches[dir] = cache
    }
    calls, written, skipped, err := src.ProcessFile(file, cache)
    if err != nil {
    errMsg := fmt.Sprintf("Error processing %s: %v", file, err)
    output.Error = &errMsg
    return output, fmt.Errorf("%s", errMsg)
    }
    if written {
    dataFilesWritten++
    }
    if skipped {
    dataFilesSkipped++
    }
    for _, call := range calls {
    allCalls = append(allCalls, call)
    speciesCount[call.EbirdCode]++
    }
    filesProcessed++
    // Delete if requested and successfully processed
    if input.Delete && written {
    if err := os.Remove(file); err != nil {
    errMsg := fmt.Sprintf("Failed to delete %s: %v", file, err)
    output.Error = &errMsg
    return output, fmt.Errorf("%s", errMsg)
    }
    filesDeleted++
    }
    if input.ProgressHandler != nil {
    input.ProgressHandler(filesProcessed, len(files), filepath.Base(file))
    }
    }
    // Sort all calls by file, then start time
    sort.Slice(allCalls, func(i, j int) bool {
    if allCalls[i].File != allCalls[j].File {
    return allCalls[i].File < allCalls[j].File
    }
    return allCalls[i].StartTime < allCalls[j].StartTime
    })
    output.Calls = allCalls
    output.TotalCalls = len(allCalls)
    output.SpeciesCount = speciesCount
    output.DataFilesWritten = dataFilesWritten
    output.DataFilesSkipped = dataFilesSkipped
    output.FilesProcessed = filesProcessed
    output.FilesDeleted = filesDeleted
    return output, nil
    }
    // sourceJob represents a single file to process (generic over CallSource)
    type sourceJob struct {
    filePath string
    }
    // sourceResult represents the result of processing a single source file
    type sourceResult struct {
    path string
    calls []ClusteredCall
    written bool
    skipped bool
    err error
    }
    func (r sourceResult) filePath() string { return r.path }
    func (r sourceResult) getCalls() []ClusteredCall { return r.calls }
    func (r sourceResult) wasWritten() bool { return r.written }
    func (r sourceResult) wasSkipped() bool { return r.skipped }
    func (r sourceResult) getError() error { return r.err }
    // callsFromSourceParallel processes source files concurrently using a worker pool and DirCache
    func callsFromSourceParallel(src CallSource, input CallsFromSourceInput, files []string) (CallsFromSourceOutput, error) {
    var output CallsFromSourceOutput
    output.Filter = src.Name()
    total := len(files)
    var processed atomic.Int32
    // Build DirCache for the folder
    dirCaches := &sync.Map{}
    if input.Folder != "" {
    cache := NewDirCache(input.Folder)
    dirCaches.Store(input.Folder, cache)
    }
    // Create job and result channels
    jobs := make(chan sourceJob, total)
    results := make(chan parallelResult, total)
    // Start workers
    var wg sync.WaitGroup
    for range DOT_DATA_WORKERS {
    wg.Add(1)
    go sourceWorker(src, dirCaches, jobs, results, &wg)
    }
    // Send jobs
    for _, file := range files {
    jobs <- sourceJob{filePath: file}
    }
    close(jobs)
    // Wait for workers to finish, then close results
    go func() {
    wg.Wait()
    close(results)
    }()
    // Collect results with progress reporting
    stats := aggregateResults(results, total, &processed, input.Delete, input.ProgressHandler)
    if stats.firstErr != nil {
    errMsg := stats.firstErr.Error()
    output.Error = &errMsg
    return output, stats.firstErr
    }
    sortCallsByFileAndTime(stats.calls)
    output.Calls = stats.calls
    output.TotalCalls = len(stats.calls)
    output.SpeciesCount = stats.speciesCount
    output.DataFilesWritten = stats.dataFilesWritten
    output.DataFilesSkipped = stats.dataFilesSkipped
    output.FilesProcessed = stats.filesProcessed
    output.FilesDeleted = stats.filesDeleted
    return output, nil
    }
    // sourceWorker processes source files from the jobs channel
    func sourceWorker(src CallSource, dirCaches *sync.Map, jobs <-chan sourceJob, results chan<- parallelResult, wg *sync.WaitGroup) {
    defer wg.Done()
    for job := range jobs {
    dir := filepath.Dir(job.filePath)
    // Get or create DirCache for this directory
    var cache *DirCache
    if cached, ok := dirCaches.Load(dir); ok {
    cache = cached.(*DirCache)
    } else {
    cache = NewDirCache(dir)
    dirCaches.Store(dir, cache)
    }
    calls, written, skipped, err := src.ProcessFile(job.filePath, cache)
    results <- sourceResult{
    path: job.filePath,
    calls: calls,
    written: written,
    skipped: skipped,
    err: err,
    }
    }
    }
  • file deletion: isnight.go (----------)
    [6.248737][6.303895:303929](),[6.303929][6.299379:299379]()
    package tools
    import (
    "fmt"
    "strings"
    "time"
    "github.com/sixdouglas/suncalc"
    "skraak/utils"
    )
    // IsNightInput defines the input parameters for the isnight tool
    type IsNightInput struct {
    FilePath string `json:"file_path"`
    Lat float64 `json:"lat"`
    Lng float64 `json:"lng"`
    Timezone string `json:"timezone,omitempty"`
    }
    // IsNightOutput defines the output structure for the isnight tool
    type IsNightOutput struct {
    FilePath string `json:"file_path"`
    TimestampUTC string `json:"timestamp_utc"`
    SolarNight bool `json:"solar_night"`
    CivilNight bool `json:"civil_night"`
    DiurnalActive bool `json:"diurnal_active"`
    MoonPhase float64 `json:"moon_phase"`
    DurationSec float64 `json:"duration_seconds"`
    TimestampSrc string `json:"timestamp_source"`
    MidpointUTC string `json:"midpoint_utc"`
    SunriseUTC string `json:"sunrise_utc,omitempty"`
    SunsetUTC string `json:"sunset_utc,omitempty"`
    DawnUTC string `json:"dawn_utc,omitempty"`
    DuskUTC string `json:"dusk_utc,omitempty"`
    }
    // IsNight determines if a WAV file was recorded at night based on its
    // metadata timestamp and the given GPS coordinates.
    //
    // Timestamp resolution order:
    // 1. AudioMoth comment (timezone embedded)
    // 2. Filename timestamp + timezone offset (requires --timezone)
    // 3. File modification time (system local time)
    func IsNight(input IsNightInput) (IsNightOutput, error) {
    var output IsNightOutput
    // Step 1: Parse WAV header
    metadata, err := utils.ParseWAVHeader(input.FilePath)
    if err != nil {
    return output, fmt.Errorf("WAV header parsing failed: %w", err)
    }
    output.DurationSec = metadata.Duration
    // Step 2: Resolve timestamp (use file mod time as fallback)
    tsResult, err := utils.ResolveTimestamp(metadata, input.FilePath, input.Timezone, true, nil)
    if err != nil {
    return output, fmt.Errorf("cannot determine recording timestamp: %w", err)
    }
    // Determine timestamp source label
    tsSource := "file_mod_time"
    if tsResult.IsAudioMoth {
    tsSource = "audiomoth_comment"
    } else if utils.HasTimestampFilename(input.FilePath) {
    tsSource = "filename"
    }
    // Step 3: Calculate astronomical data using recording midpoint
    astroData := utils.CalculateAstronomicalData(
    tsResult.Timestamp.UTC(),
    metadata.Duration,
    input.Lat,
    input.Lng,
    )
    // Step 4: Get sun event times for informational output
    midpoint := utils.CalculateMidpointTime(tsResult.Timestamp.UTC(), metadata.Duration)
    sunTimes := suncalc.GetTimes(midpoint, input.Lat, input.Lng)
    output.FilePath = input.FilePath
    output.TimestampUTC = tsResult.Timestamp.UTC().Format(time.RFC3339)
    output.SolarNight = astroData.SolarNight
    output.CivilNight = astroData.CivilNight
    output.MoonPhase = astroData.MoonPhase
    output.TimestampSrc = tsSource
    output.MidpointUTC = midpoint.Format(time.RFC3339)
    if dawn, ok := sunTimes[suncalc.Dawn]; ok && !dawn.Value.IsZero() {
    if sunset, ok := sunTimes[suncalc.Sunset]; ok && !sunset.Value.IsZero() {
    output.DiurnalActive = !midpoint.Before(dawn.Value) && !midpoint.After(sunset.Value)
    }
    }
    output.SunriseUTC = sunTimeUTC(sunTimes, suncalc.Sunrise)
    output.SunsetUTC = sunTimeUTC(sunTimes, suncalc.Sunset)
    output.DawnUTC = sunTimeUTC(sunTimes, suncalc.Dawn)
    output.DuskUTC = sunTimeUTC(sunTimes, suncalc.Dusk)
    }
    // String returns a human-readable summary of the isnight result
    func (o IsNightOutput) String() string {
    var sb strings.Builder
    fmt.Fprintf(&sb, "File: %s\n", o.FilePath)
    fmt.Fprintf(&sb, "Timestamp (UTC): %s\n", o.TimestampUTC)
    fmt.Fprintf(&sb, "Midpoint (UTC): %s\n", o.MidpointUTC)
    fmt.Fprintf(&sb, "Duration: %.1f seconds\n", o.DurationSec)
    fmt.Fprintf(&sb, "Source: %s\n", o.TimestampSrc)
    fmt.Fprintf(&sb, "Solar night: %v\n", o.SolarNight)
    fmt.Fprintf(&sb, "Civil night: %v\n", o.CivilNight)
    fmt.Fprintf(&sb, "Moon phase: %.2f\n", o.MoonPhase)
    if o.SunriseUTC != "" {
    fmt.Fprintf(&sb, "Sunrise (UTC): %s\n", o.SunriseUTC)
    }
    if o.SunsetUTC != "" {
    fmt.Fprintf(&sb, "Sunset (UTC): %s\n", o.SunsetUTC)
    }
    if o.DawnUTC != "" {
    fmt.Fprintf(&sb, "Dawn (UTC): %s\n", o.DawnUTC)
    }
    if o.DuskUTC != "" {
    fmt.Fprintf(&sb, "Dusk (UTC): %s\n", o.DuskUTC)
    }
    return sb.String()
    }
    // populateSunTimes fills in sun event times and diurnal status from suncalc results.
    func populateSunTimes(output *IsNightOutput, sunTimes map[suncalc.DayTimeName]suncalc.DayTime, midpoint time.Time) {
    // Diurnal: midpoint is between dawn and sunset
    // sunTimeUTC returns the UTC RFC3339 string for a suncalc event, or "" if absent/zero.
    func sunTimeUTC(sunTimes map[suncalc.DayTimeName]suncalc.DayTime, name suncalc.DayTimeName) string {
    if entry, ok := sunTimes[name]; ok && !entry.Value.IsZero() {
    return entry.Value.UTC().Format(time.RFC3339)
    }
    return ""
    }
    populateSunTimes(&output, sunTimes, midpoint)
    return output, nil
    }
  • file deletion: import_unstructured.go (----------)
    [6.248737][6.315559:315605](),[6.315605][6.307677:307677]()
    package tools
    import (
    "context"
    "fmt"
    "io/fs"
    "os"
    "path/filepath"
    "strings"
    "time"
    "skraak/db"
    "skraak/utils"
    )
    // ImportUnstructuredInput defines the input parameters for importing files into an unstructured dataset
    type ImportUnstructuredInput struct {
    DatasetID string `json:"dataset_id"`
    FolderPath string `json:"folder_path"`
    Recursive *bool `json:"recursive,omitempty"`
    }
    // ImportUnstructuredOutput defines the output structure
    type ImportUnstructuredOutput struct {
    TotalFiles int `json:"total_files"`
    ImportedFiles int `json:"imported_files"`
    SkippedFiles int `json:"skipped_files"` // Duplicates
    FailedFiles int `json:"failed_files"`
    TotalDuration float64 `json:"total_duration_seconds"`
    ProcessingTime string `json:"processing_time"`
    Errors []utils.FileImportError `json:"errors,omitempty"`
    }
    // ImportUnstructured imports WAV files into an unstructured dataset
    // Files are stored with minimal metadata: hash, duration, sample_rate, file_mod_time as timestamp
    // No location/cluster hierarchy, no astronomical data, no AudioMoth parsing
    func ImportUnstructured(
    ctx context.Context,
    input ImportUnstructuredInput,
    ) (ImportUnstructuredOutput, error) {
    startTime := time.Now()
    var output ImportUnstructuredOutput
    // Default recursive to true
    recursive := true
    if input.Recursive != nil {
    recursive = *input.Recursive
    }
    // Validate input
    if err := validateUnstructuredInput(input); err != nil {
    return output, fmt.Errorf("validation failed: %w", err)
    }
    // Scan for WAV files (no DB needed)
    files, scanErrors := scanWavFiles(input.FolderPath, recursive)
    output.Errors = append(output.Errors, scanErrors...)
    output.TotalFiles = len(files)
    if len(files) == 0 {
    output.ProcessingTime = time.Since(startTime).String()
    return output, nil
    }
    // Process each file
    for _, filePath := range files {
    fileResult, procErr := processUnstructuredFile(tx, filePath, input.DatasetID)
    if procErr != nil {
    output.FailedFiles++
    output.Errors = append(output.Errors, utils.FileImportError{
    FileName: filepath.Base(filePath),
    Error: procErr.Error(),
    Stage: utils.StageProcess,
    })
    continue
    }
    if fileResult.Skipped {
    output.SkippedFiles++
    } else {
    output.ImportedFiles++
    output.TotalDuration += fileResult.Duration
    }
    }
    return nil
    })
    if err != nil {
    return output, err
    }
    output.ProcessingTime = time.Since(startTime).String()
    return output, nil
    }
    // unstructuredFileResult holds the result of processing a single file
    type unstructuredFileResult struct {
    Skipped bool // True if duplicate
    Duration float64 // Duration in seconds
    }
    // processUnstructuredFile processes a single WAV file for unstructured import
    func processUnstructuredFile(tx *db.LoggedTx, filePath, datasetID string) (*unstructuredFileResult, error) {
    result := &unstructuredFileResult{}
    // Step 1: Parse WAV header
    metadata, err := utils.ParseWAVHeader(filePath)
    if err != nil {
    return nil, fmt.Errorf("WAV header parsing failed: %w", err)
    }
    // Step 2: Calculate hash
    hash, err := utils.ComputeXXH64(filePath)
    if err != nil {
    return nil, fmt.Errorf("hash calculation failed: %w", err)
    }
    // Step 3: Check for duplicate - if exists, skip entirely (do not link to dataset)
    _, isDuplicate, err := utils.CheckDuplicateHash(tx, hash)
    if err != nil {
    return nil, fmt.Errorf("duplicate check failed: %w", err)
    }
    if isDuplicate {
    // File already exists in database - skip completely, do not link to dataset
    result.Skipped = true
    result.Duration = metadata.Duration
    return result, nil
    }
    // Step 4: Generate file ID
    fileID, err := utils.GenerateLongID()
    if err != nil {
    return nil, fmt.Errorf("ID generation failed: %w", err)
    }
    // Step 5: Use file modification time as timestamp (no timezone conversion)
    timestamp := metadata.FileModTime
    // Step 6: Insert into file table
    _, err = tx.Exec(`
    INSERT INTO file (
    id, file_name, xxh64_hash, location_id, cluster_id,
    timestamp_local, duration, sample_rate,
    maybe_solar_night, maybe_civil_night, moon_phase,
    active
    ) VALUES (?, ?, ?, NULL, NULL, ?, ?, ?, NULL, NULL, NULL, TRUE)
    `,
    fileID,
    filepath.Base(filePath),
    hash,
    timestamp,
    metadata.Duration,
    metadata.SampleRate,
    )
    if err != nil {
    return nil, fmt.Errorf("file insert failed: %w", err)
    }
    // Step 7: Insert into file_dataset table
    _, err = tx.Exec(
    "INSERT INTO file_dataset (file_id, dataset_id) VALUES (?, ?)",
    fileID, datasetID,
    )
    if err != nil {
    return nil, fmt.Errorf("file_dataset insert failed: %w", err)
    }
    result.Duration = metadata.Duration
    return result, nil
    }
    // validateUnstructuredInput validates the input parameters
    func validateUnstructuredInput(input ImportUnstructuredInput) error {
    // Validate dataset ID format
    if err := utils.ValidateShortID(input.DatasetID, "dataset_id"); err != nil {
    return err
    }
    // Verify folder exists
    info, err := os.Stat(input.FolderPath)
    if err != nil {
    return fmt.Errorf("folder not accessible: %w", err)
    }
    if !info.IsDir() {
    return fmt.Errorf("path is not a directory: %s", input.FolderPath)
    }
    // Verify dataset exists and is active
    if _, err := db.DatasetExistsAndActive(database, input.DatasetID); err != nil {
    return err
    }
    // Verify dataset is 'unstructured' type
    if err := db.ValidateDatasetTypeUnstructured(database, input.DatasetID); err != nil {
    return err
    }
    return nil
    })
    }
    // scanWavFiles scans a folder for WAV files
    func scanWavFiles(folderPath string, recursive bool) ([]string, []utils.FileImportError) {
    var files []string
    var errors []utils.FileImportError
    walkFunc := func(path string, d fs.DirEntry, err error) error {
    if err != nil {
    errors = append(errors, utils.FileImportError{
    FileName: path,
    Error: err.Error(),
    Stage: utils.StageScan,
    })
    return nil
    }
    // Skip directories if not recursive
    if d.IsDir() {
    if !recursive && path != folderPath {
    return fs.SkipDir
    }
    return nil
    }
    // Check for .wav extension (case-insensitive)
    if strings.HasSuffix(strings.ToLower(d.Name()), ".wav") {
    files = append(files, path)
    }
    return nil
    }
    if recursive {
    if err := filepath.WalkDir(folderPath, walkFunc); err != nil {
    errors = append(errors, utils.FileImportError{
    FileName: folderPath,
    Error: err.Error(),
    Stage: utils.StageScan,
    })
    }
    } else {
    // Non-recursive: only scan top-level
    entries, err := os.ReadDir(folderPath)
    if err != nil {
    errors = append(errors, utils.FileImportError{
    FileName: folderPath,
    Error: err.Error(),
    Stage: utils.StageScan,
    })
    return nil, errors
    }
    for _, entry := range entries {
    if !entry.IsDir() && strings.HasSuffix(strings.ToLower(entry.Name()), ".wav") {
    files = append(files, filepath.Join(folderPath, entry.Name()))
    }
    }
    }
    return files, errors
    }
    return db.WithReadDB(resolveDBPath(input.DBPath), func(database *sql.DB) error {
    err := db.WithWriteTx(ctx, resolveDBPath(input.DBPath), "import_unstructured", func(database *sql.DB, tx *db.LoggedTx) error {
    DBPath string `json:"db_path"`
    "database/sql"
  • file deletion: import_segments_test.go (----------)
    [6.248737][6.318117:318164](),[6.318164][6.315607:315607]()
    package tools
    import (
    "testing"
    "skraak/utils"
    )
    func TestValidateSegmentImportInput(t *testing.T) {
    t.Run("invalid dataset ID - too short", func(t *testing.T) {
    input := ImportSegmentsInput{
    DatasetID: "abc",
    }
    err := validateSegmentImportInput(input)
    if err == nil {
    t.Fatal("expected error for short dataset ID")
    }
    })
    t.Run("invalid dataset ID - too long", func(t *testing.T) {
    input := ImportSegmentsInput{
    DatasetID: "abc123def456ghi789",
    }
    err := validateSegmentImportInput(input)
    if err == nil {
    t.Fatal("expected error for long dataset ID")
    }
    })
    t.Run("invalid dataset ID - invalid characters", func(t *testing.T) {
    input := ImportSegmentsInput{
    DatasetID: "abc123!!!456",
    }
    err := validateSegmentImportInput(input)
    if err == nil {
    t.Fatal("expected error for invalid characters in dataset ID")
    }
    })
    t.Run("invalid location ID", func(t *testing.T) {
    input := ImportSegmentsInput{
    DatasetID: "abc123def456",
    LocationID: "invalid",
    }
    err := validateSegmentImportInput(input)
    if err == nil {
    t.Fatal("expected error for invalid location ID")
    }
    })
    t.Run("invalid cluster ID", func(t *testing.T) {
    input := ImportSegmentsInput{
    DatasetID: "abc123def456",
    LocationID: "xyz789uvw012",
    ClusterID: "invalid",
    }
    err := validateSegmentImportInput(input)
    if err == nil {
    t.Fatal("expected error for invalid cluster ID")
    }
    })
    }
    func TestCountTotalSegments(t *testing.T) {
    t.Run("empty", func(t *testing.T) {
    count := countTotalSegments(map[string]scannedDataFile{})
    if count != 0 {
    t.Errorf("expected 0, got %d", count)
    }
    })
    t.Run("single file - no segments", func(t *testing.T) {
    files := map[string]scannedDataFile{
    "file1": {Segments: []*utils.Segment{}},
    }
    count := countTotalSegments(files)
    if count != 0 {
    t.Errorf("expected 0, got %d", count)
    }
    })
    t.Run("single file - multiple segments", func(t *testing.T) {
    files := map[string]scannedDataFile{
    "file1": {Segments: []*utils.Segment{{}, {}, {}}},
    }
    count := countTotalSegments(files)
    if count != 3 {
    t.Errorf("expected 3, got %d", count)
    }
    })
    t.Run("multiple files", func(t *testing.T) {
    files := map[string]scannedDataFile{
    "file1": {Segments: []*utils.Segment{{}, {}}},
    "file2": {Segments: []*utils.Segment{{}}},
    "file3": {Segments: []*utils.Segment{{}, {}, {}, {}}},
    }
    count := countTotalSegments(files)
    if count != 7 {
    t.Errorf("expected 7, got %d", count)
    }
    })
    }
  • file deletion: import_segments.go (----------)
    [6.248737][6.345207:345249](),[6.345249][6.318166:318166]()
    package tools
    import (
    "context"
    "database/sql"
    "fmt"
    "os"
    "path/filepath"
    "strings"
    "time"
    "skraak/db"
    "skraak/utils"
    )
    // ImportSegmentsInput defines the input parameters for the import_segments tool
    type ImportSegmentsInput struct {
    Folder string `json:"folder"`
    Mapping string `json:"mapping"`
    DatasetID string `json:"dataset_id"`
    LocationID string `json:"location_id"`
    ClusterID string `json:"cluster_id"`
    ProgressHandler func(processed, total int, message string)
    }
    // ImportSegmentsOutput defines the output structure for the import_segments tool
    type ImportSegmentsOutput struct {
    Summary ImportSegmentsSummary `json:"summary"`
    Segments []SegmentImport `json:"segments"`
    Errors []ImportSegmentError `json:"errors,omitempty"`
    }
    // ImportSegmentsSummary provides summary statistics for the import operation
    type ImportSegmentsSummary struct {
    DataFilesFound int `json:"data_files_found"`
    DataFilesProcessed int `json:"data_files_processed"`
    TotalSegments int `json:"total_segments"`
    ImportedSegments int `json:"imported_segments"`
    ImportedLabels int `json:"imported_labels"`
    ImportedSubtypes int `json:"imported_subtypes"`
    ProcessingTimeMs int64 `json:"processing_time_ms"`
    }
    // SegmentImport represents an imported segment in the output
    type SegmentImport struct {
    SegmentID string `json:"segment_id"`
    FileName string `json:"file_name"`
    StartTime float64 `json:"start_time"`
    EndTime float64 `json:"end_time"`
    FreqLow float64 `json:"freq_low"`
    FreqHigh float64 `json:"freq_high"`
    Labels []LabelImport `json:"labels"`
    }
    // LabelImport represents an imported label in the output
    type LabelImport struct {
    LabelID string `json:"label_id"`
    Species string `json:"species"`
    CallType string `json:"calltype,omitempty"`
    Filter string `json:"filter"`
    Certainty int `json:"certainty"`
    Comment string `json:"comment,omitempty"`
    }
    // ImportSegmentError records errors encountered during segment import
    type ImportSegmentError struct {
    File string `json:"file,omitempty"`
    Stage utils.ImportStage `json:"stage"`
    Message string `json:"message"`
    }
    // scannedDataFile holds parsed data for a .data file
    type scannedDataFile struct {
    DataPath string
    WavPath string
    WavHash string
    FileID string
    Duration float64
    Segments []*utils.Segment
    }
    // ImportSegments imports segments from AviaNZ .data files into the database
    func ImportSegments(ctx context.Context, input ImportSegmentsInput) (ImportSegmentsOutput, error) {
    startTime := time.Now()
    var output ImportSegmentsOutput
    output.Segments = make([]SegmentImport, 0)
    output.Errors = make([]ImportSegmentError, 0)
    // Phase A: Input Validation
    if err := validateSegmentImportInput(input); err != nil {
    return output, err
    }
    // Load mapping file
    mapping, err := utils.LoadMappingFile(input.Mapping)
    if err != nil {
    return output, fmt.Errorf("failed to load mapping file: %w", err)
    }
    // Find .data files
    dataFiles, err := utils.FindDataFiles(input.Folder)
    if err != nil {
    return output, fmt.Errorf("failed to find .data files: %w", err)
    }
    output.Summary.DataFilesFound = len(dataFiles)
    if len(dataFiles) == 0 {
    return output, fmt.Errorf("no .data files found in folder: %s", input.Folder)
    }
    // Phase B+C: Parse data files and validate against DB
    if err != nil {
    return output, fmt.Errorf("failed to open database: %w", err)
    }
    defer database.Close()
    val, valErrors, err := validateAndPrepareSegments(database, input, mapping, dataFiles)
    output.Errors = append(output.Errors, valErrors...)
    if err != nil {
    return output, err
    }
    if val == nil || len(val.fileIDMap) == 0 {
    output.Summary.ProcessingTimeMs = time.Since(startTime).Milliseconds()
    return output, nil
    }
    // Phase D: Transactional Import
    importedSegments, importedLabels, importedSubtypes, fileUpdates, importErrors := importSegmentsIntoDB(
    ctx, database, val.fileIDMap, val.scannedFiles, mapping, val.filterIDMap, val.speciesIDMap, val.calltypeIDMap, input.DatasetID, input.ProgressHandler,
    )
    output.Errors = append(output.Errors, importErrors...)
    output.Segments = append(output.Segments, importedSegments...)
    // Phase E: Write IDs back to .data files
    if len(fileUpdates) > 0 {
    writeErrors := writeIDsToDataFiles(fileUpdates)
    output.Errors = append(output.Errors, writeErrors...)
    }
    output.Summary.DataFilesProcessed = len(val.fileIDMap)
    output.Summary.TotalSegments = countTotalSegments(val.fileIDMap)
    output.Summary.ImportedSegments = len(importedSegments)
    output.Summary.ImportedLabels = importedLabels
    output.Summary.ImportedSubtypes = importedSubtypes
    output.Summary.ProcessingTimeMs = time.Since(startTime).Milliseconds()
    return output, nil
    }
    // validateSegmentImportInput validates input parameters
    func validateSegmentImportInput(input ImportSegmentsInput) error {
    // Validate folder exists
    if info, err := os.Stat(input.Folder); err != nil {
    return fmt.Errorf("folder does not exist: %s", input.Folder)
    } else if !info.IsDir() {
    return fmt.Errorf("path is not a folder: %s", input.Folder)
    }
    // Validate mapping file exists
    if _, err := os.Stat(input.Mapping); err != nil {
    return fmt.Errorf("mapping file does not exist: %s", input.Mapping)
    }
    // Validate IDs
    if err := utils.ValidateShortID(input.DatasetID, "dataset_id"); err != nil {
    return err
    }
    if err := utils.ValidateShortID(input.LocationID, "location_id"); err != nil {
    return err
    }
    if err := utils.ValidateShortID(input.ClusterID, "cluster_id"); err != nil {
    return err
    }
    return nil
    }
    // validateSegmentHierarchy validates dataset/location/cluster relationships
    func validateSegmentHierarchy(dbConn *sql.DB, datasetID, locationID, clusterID string) error {
    // Validate dataset exists and is structured
    if err := db.ValidateDatasetTypeForImport(dbConn, datasetID); err != nil {
    return err
    }
    // Validate location belongs to dataset
    if err := db.ValidateLocationBelongsToDataset(dbConn, locationID, datasetID); err != nil {
    return err
    }
    // Validate cluster belongs to location
    if err := db.ClusterBelongsToLocation(dbConn, clusterID, locationID); err != nil {
    return err
    }
    return nil
    }
    // scanAllDataFiles parses all .data files and collects unique values
    func scanAllDataFiles(dataFiles []string, folder string) (
    []scannedDataFile,
    []ImportSegmentError,
    map[string]bool,
    map[string]bool,
    map[string]map[string]bool,
    ) {
    var scanned []scannedDataFile
    var errors []ImportSegmentError
    uniqueFilters := make(map[string]bool)
    uniqueSpecies := make(map[string]bool)
    uniqueCalltypes := make(map[string]map[string]bool) // species -> calltype -> true
    for _, dataPath := range dataFiles {
    // Find corresponding WAV file
    wavPath := strings.TrimSuffix(dataPath, ".data")
    if _, err := os.Stat(wavPath); err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(dataPath),
    Stage: utils.StageValidation,
    Message: fmt.Sprintf("corresponding WAV file not found: %s", filepath.Base(wavPath)),
    })
    continue
    }
    // Parse .data file
    df, err := utils.ParseDataFile(dataPath)
    if err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(dataPath),
    Stage: utils.StageValidation,
    Message: fmt.Sprintf("failed to parse .data file: %v", err),
    })
    continue
    }
    // Collect unique filters, species, calltypes
    for _, seg := range df.Segments {
    for _, label := range seg.Labels {
    uniqueFilters[label.Filter] = true
    uniqueSpecies[label.Species] = true
    if label.CallType != "" {
    if uniqueCalltypes[label.Species] == nil {
    uniqueCalltypes[label.Species] = make(map[string]bool)
    }
    uniqueCalltypes[label.Species][label.CallType] = true
    }
    }
    }
    scanned = append(scanned, scannedDataFile{
    DataPath: dataPath,
    WavPath: wavPath,
    Duration: df.Meta.Duration,
    Segments: df.Segments,
    })
    }
    return scanned, errors, uniqueFilters, uniqueSpecies, uniqueCalltypes
    }
    // validateFiltersExist checks all filters exist in DB and returns ID map
    func validateFiltersExist(dbConn *sql.DB, filterNames map[string]bool) (map[string]string, error) {
    filterIDMap := make(map[string]string)
    if len(filterNames) == 0 {
    return filterIDMap, nil
    }
    names := make([]string, 0, len(filterNames))
    for name := range filterNames {
    names = append(names, name)
    }
    query := `SELECT id, name FROM filter WHERE name IN (` + db.Placeholders(len(names)) + `) AND active = true`
    args := make([]any, len(names))
    for i, name := range names {
    args[i] = name
    }
    rows, err := dbConn.Query(query, args...)
    if err != nil {
    return nil, fmt.Errorf("failed to query filters: %w", err)
    }
    defer rows.Close()
    for rows.Next() {
    var id, name string
    if err := rows.Scan(&id, &name); err == nil {
    filterIDMap[name] = id
    }
    }
    // Check for missing filters
    var missing []string
    for name := range filterNames {
    if _, exists := filterIDMap[name]; !exists {
    missing = append(missing, name)
    }
    }
    if len(missing) > 0 {
    return nil, fmt.Errorf("filters not found in database: [%s]", strings.Join(missing, ", "))
    }
    return filterIDMap, nil
    }
    // loadSpeciesCalltypeIDs loads species and calltype ID maps
    func loadSpeciesCalltypeIDs(
    dbConn *sql.DB,
    mapping utils.MappingFile,
    uniqueSpecies map[string]bool,
    uniqueCalltypes map[string]map[string]bool,
    ) (map[string]string, map[string]map[string]string, error) {
    speciesIDMap := make(map[string]string)
    calltypeIDMap := make(map[string]map[string]string) // (dbSpecies, dbCalltype) -> calltype_id
    // Collect all DB species labels from mapping
    dbSpeciesSet := make(map[string]bool)
    for dataSpecies := range uniqueSpecies {
    if dbSpecies, ok := mapping.GetDBSpecies(dataSpecies); ok {
    dbSpeciesSet[dbSpecies] = true
    }
    }
    // Load species IDs
    if len(dbSpeciesSet) > 0 {
    dbSpeciesList := make([]string, 0, len(dbSpeciesSet))
    for s := range dbSpeciesSet {
    dbSpeciesList = append(dbSpeciesList, s)
    }
    query := `SELECT id, label FROM species WHERE label IN (` + db.Placeholders(len(dbSpeciesList)) + `) AND active = true`
    args := make([]any, len(dbSpeciesList))
    for i, s := range dbSpeciesList {
    args[i] = s
    }
    rows, err := dbConn.Query(query, args...)
    if err != nil {
    return nil, nil, fmt.Errorf("failed to query species: %w", err)
    }
    defer rows.Close()
    for rows.Next() {
    var id, label string
    if err := rows.Scan(&id, &label); err == nil {
    speciesIDMap[label] = id
    }
    }
    }
    // Load calltype IDs
    for dataSpecies, ctSet := range uniqueCalltypes {
    dbSpecies, ok := mapping.GetDBSpecies(dataSpecies)
    if !ok {
    continue
    }
    if calltypeIDMap[dbSpecies] == nil {
    calltypeIDMap[dbSpecies] = make(map[string]string)
    }
    for dataCalltype := range ctSet {
    dbCalltype := mapping.GetDBCalltype(dataSpecies, dataCalltype)
    // Query calltype ID
    var calltypeID string
    err := dbConn.QueryRow(`
    SELECT ct.id
    FROM call_type ct
    JOIN species s ON ct.species_id = s.id
    WHERE s.label = ? AND ct.label = ? AND ct.active = true
    `, dbSpecies, dbCalltype).Scan(&calltypeID)
    if err == nil {
    calltypeIDMap[dbSpecies][dbCalltype] = calltypeID
    }
    }
    }
    return speciesIDMap, calltypeIDMap, nil
    }
    // validateAndMapFiles validates files exist by hash, are linked to dataset, and have no existing labels
    func validateAndMapFiles(
    dbConn *sql.DB,
    scannedFiles []scannedDataFile,
    clusterID string,
    datasetID string,
    ) (map[string]scannedDataFile, []ImportSegmentError) {
    fileIDMap := make(map[string]scannedDataFile)
    var errors []ImportSegmentError
    for _, sf := range scannedFiles {
    // Compute hash
    hash, err := utils.ComputeXXH64(sf.WavPath)
    if err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.WavPath),
    Stage: utils.StageHash,
    Message: fmt.Sprintf("failed to compute hash: %v", err),
    })
    continue
    }
    sf.WavHash = hash
    // Find file by hash in cluster
    var fileID string
    var duration float64
    err = dbConn.QueryRow(`
    SELECT id, duration FROM file WHERE xxh64_hash = ? AND cluster_id = ? AND active = true
    `, hash, clusterID).Scan(&fileID, &duration)
    if err == sql.ErrNoRows {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.WavPath),
    Stage: utils.StageValidation,
    Message: fmt.Sprintf("file hash not found in database for cluster (hash: %s)", hash),
    })
    continue
    }
    if err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.WavPath),
    Stage: utils.StageValidation,
    Message: fmt.Sprintf("failed to query file: %v", err),
    })
    continue
    }
    sf.FileID = fileID
    sf.Duration = duration
    // Verify file is linked to dataset via file_dataset junction table (composite FK)
    var fileLinkedToDataset bool
    err = dbConn.QueryRow(`
    SELECT EXISTS(SELECT 1 FROM file_dataset WHERE file_id = ? AND dataset_id = ?)
    `, fileID, datasetID).Scan(&fileLinkedToDataset)
    if err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.WavPath),
    Stage: utils.StageValidation,
    Message: fmt.Sprintf("failed to verify file-dataset link: %v", err),
    })
    continue
    }
    if !fileLinkedToDataset {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.WavPath),
    Stage: utils.StageValidation,
    Message: fmt.Sprintf("file exists in cluster but is not linked to dataset %s", datasetID),
    })
    continue
    }
    // Check no existing labels for this file
    var labelCount int
    err = dbConn.QueryRow(`
    SELECT COUNT(*) FROM label l
    JOIN segment s ON l.segment_id = s.id
    WHERE s.file_id = ? AND l.active = true
    `, fileID).Scan(&labelCount)
    if err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.WavPath),
    Stage: utils.StageValidation,
    Message: fmt.Sprintf("failed to check existing labels: %v", err),
    })
    continue
    }
    if labelCount > 0 {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.WavPath),
    Stage: utils.StageValidation,
    Message: fmt.Sprintf("file already has %d label(s) - fresh imports only", labelCount),
    })
    continue
    }
    fileIDMap[fileID] = sf
    }
    return fileIDMap, errors
    }
    // dataFileUpdate holds data to write back to .data file after import
    type dataFileUpdate struct {
    DataPath string
    WavHash string
    LabelIDs map[int]map[int]string // segmentIndex -> labelIndex -> labelID
    }
    // importSegmentsIntoDB performs the transactional import
    func importSegmentsIntoDB(
    ctx context.Context,
    database *sql.DB,
    fileIDMap map[string]scannedDataFile,
    scannedFiles []scannedDataFile,
    mapping utils.MappingFile,
    filterIDMap map[string]string,
    speciesIDMap map[string]string,
    calltypeIDMap map[string]map[string]string,
    datasetID string,
    progressHandler func(processed, total int, message string),
    ) ([]SegmentImport, int, int, []dataFileUpdate, []ImportSegmentError) {
    var importedSegments []SegmentImport
    var errors []ImportSegmentError
    importedLabels := 0
    importedSubtypes := 0
    var fileUpdates []dataFileUpdate
    tx, err := db.BeginLoggedTx(ctx, database, "import_segments")
    if err != nil {
    errors = append(errors, ImportSegmentError{
    Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to begin transaction: %v", err),
    })
    return nil, 0, 0, nil, errors
    }
    defer tx.Rollback()
    totalFiles := len(fileIDMap)
    processedFiles := 0
    for _, sf := range fileIDMap {
    if sf.FileID == "" {
    continue
    }
    processedFiles++
    if progressHandler != nil {
    progressHandler(processedFiles, totalFiles, filepath.Base(sf.DataPath))
    }
    fileUpdate := dataFileUpdate{
    DataPath: sf.DataPath,
    WavHash: sf.WavHash,
    LabelIDs: make(map[int]map[int]string),
    }
    for segIdx, seg := range sf.Segments {
    segImp, labelIDs, subtypes, segErrs := importSegment(ctx, tx, seg, segIdx, sf, datasetID, mapping, filterIDMap, speciesIDMap, calltypeIDMap)
    errors = append(errors, segErrs...)
    importedSubtypes += subtypes
    if len(segImp.Labels) == 0 {
    // Delete orphaned segment (no labels succeeded)
    if _, err := tx.ExecContext(ctx, `DELETE FROM segment WHERE id = ?`, segImp.SegmentID); err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to delete orphaned segment: %v", err),
    })
    }
    } else {
    importedSegments = append(importedSegments, segImp)
    importedLabels += len(labelIDs)
    fileUpdate.LabelIDs[segIdx] = labelIDs
    }
    }
    fileUpdates = append(fileUpdates, fileUpdate)
    }
    if err := tx.Commit(); err != nil {
    errors = append(errors, ImportSegmentError{
    Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to commit transaction: %v", err),
    })
    return nil, 0, 0, nil, errors
    }
    return importedSegments, importedLabels, importedSubtypes, fileUpdates, errors
    }
    // countTotalSegments counts total segments from validated files
    func countTotalSegments(fileIDMap map[string]scannedDataFile) int {
    count := 0
    for _, sf := range fileIDMap {
    count += len(sf.Segments)
    }
    return count
    }
    // writeIDsToDataFiles writes skraak_hash and skraak_label_ids back to .data files
    func writeIDsToDataFiles(fileUpdates []dataFileUpdate) []ImportSegmentError {
    var errors []ImportSegmentError
    for _, fu := range fileUpdates {
    // Parse the .data file
    df, err := utils.ParseDataFile(fu.DataPath)
    if err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(fu.DataPath),
    Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to re-parse .data file for writing: %v", err),
    })
    continue
    }
    // Write skraak_hash to metadata
    if df.Meta.Extra == nil {
    df.Meta.Extra = make(map[string]any)
    }
    df.Meta.Extra["skraak_hash"] = fu.WavHash
    // Write skraak_label_id to each label
    for segIdx, labelIDs := range fu.LabelIDs {
    if segIdx >= len(df.Segments) {
    continue
    }
    seg := df.Segments[segIdx]
    for labelIdx, labelID := range labelIDs {
    if labelIdx >= len(seg.Labels) {
    continue
    }
    label := seg.Labels[labelIdx]
    if label.Extra == nil {
    label.Extra = make(map[string]any)
    }
    label.Extra["skraak_label_id"] = labelID
    }
    }
    // Write the updated .data file
    if err := df.Write(fu.DataPath); err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(fu.DataPath),
    Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to write updated .data file: %v", err),
    })
    continue
    }
    }
    return errors
    }
    if seg.EndTime > sf.Duration {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("segment end time (%.2f) exceeds file duration (%.2f)", seg.EndTime, sf.Duration),
    })
    return SegmentImport{}, nil, 0, errors
    }
    segmentID, err := utils.GenerateLongID()
    if err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to generate segment ID: %v", err),
    })
    return SegmentImport{}, nil, 0, errors
    }
    _, err = tx.ExecContext(ctx, `
    INSERT INTO segment (id, file_id, dataset_id, start_time, end_time, freq_low, freq_high, created_at, last_modified, active)
    VALUES (?, ?, ?, ?, ?, ?, ?, now(), now(), true)
    `, segmentID, sf.FileID, datasetID, seg.StartTime, seg.EndTime, seg.FreqLow, seg.FreqHigh)
    if err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to insert segment: %v", err),
    })
    return SegmentImport{}, nil, 0, errors
    }
    segImport := SegmentImport{
    SegmentID: segmentID,
    FileName: filepath.Base(sf.WavPath),
    StartTime: seg.StartTime,
    EndTime: seg.EndTime,
    FreqLow: seg.FreqLow,
    FreqHigh: seg.FreqHigh,
    Labels: make([]LabelImport, 0),
    }
    labelIDs := make(map[int]string)
    var subtypesImported int
    for labelIdx, label := range seg.Labels {
    result := importSingleLabel(ctx, tx, label, segmentID, segIdx, labelIdx, sf, mapping, filterIDMap, speciesIDMap, calltypeIDMap)
    if result.hasError {
    errors = append(errors, result.err)
    continue
    }
    labelIDs[labelIdx] = result.labelID
    segImport.Labels = append(segImport.Labels, result.labelImport)
    subtypesImported += result.subtypesImported
    }
    return segImport, labelIDs, subtypesImported, errors
    }
    // importSegment inserts a single segment and its labels into the DB.
    func importSegment(
    ctx context.Context,
    tx *db.LoggedTx,
    seg *utils.Segment,
    segIdx int,
    sf scannedDataFile,
    datasetID string,
    mapping utils.MappingFile,
    filterIDMap map[string]string,
    speciesIDMap map[string]string,
    calltypeIDMap map[string]map[string]string,
    ) (SegmentImport, map[int]string, int, []ImportSegmentError) {
    var errors []ImportSegmentError
    if seg.StartTime >= seg.EndTime {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("invalid segment bounds: start=%.2f >= end=%.2f", seg.StartTime, seg.EndTime),
    })
    return SegmentImport{}, nil, 0, errors
    }
    }
    // importLabelResult holds the result of importing a single label.
    type importLabelResult struct {
    labelImport LabelImport
    labelID string
    subtypesImported int
    err ImportSegmentError
    hasError bool
    }
    // importSingleLabel inserts a single label and its metadata/subtype into the DB.
    func importSingleLabel(
    ctx context.Context,
    tx *db.LoggedTx,
    label *utils.Label,
    segmentID string,
    segIdx, labelIdx int,
    sf scannedDataFile,
    mapping utils.MappingFile,
    filterIDMap map[string]string,
    speciesIDMap map[string]string,
    calltypeIDMap map[string]map[string]string,
    ) importLabelResult {
    dbSpecies, ok := mapping.GetDBSpecies(label.Species)
    if !ok {
    return importLabelResult{err: ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("species not found in mapping: %s", label.Species),
    }, hasError: true}
    }
    speciesID, ok := speciesIDMap[dbSpecies]
    if !ok {
    return importLabelResult{err: ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("species ID not found: %s", dbSpecies),
    }, hasError: true}
    }
    filterID, ok := filterIDMap[label.Filter]
    if !ok {
    return importLabelResult{err: ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("filter ID not found: %s", label.Filter),
    }, hasError: true}
    }
    labelID, err := utils.GenerateLongID()
    if err != nil {
    return importLabelResult{err: ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to generate label ID: %v", err),
    }, hasError: true}
    }
    _, err = tx.ExecContext(ctx, `
    INSERT INTO label (id, segment_id, species_id, filter_id, certainty, created_at, last_modified, active)
    VALUES (?, ?, ?, ?, ?, now(), now(), true)
    `, labelID, segmentID, speciesID, filterID, label.Certainty)
    if err != nil {
    return importLabelResult{err: ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to insert label: %v", err),
    }, hasError: true}
    }
    // Insert label_metadata if comment exists
    if label.Comment != "" {
    escapedComment := strings.ReplaceAll(label.Comment, `"`, `\"`)
    metadataJSON := fmt.Sprintf(`{"comment": "%s"}`, escapedComment)
    if _, err := tx.ExecContext(ctx, `
    INSERT INTO label_metadata (label_id, json, created_at, last_modified, active)
    VALUES (?, ?, now(), now(), true)
    `, labelID, metadataJSON); err != nil {
    return importLabelResult{err: ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to insert label_metadata: %v", err),
    }, hasError: true}
    }
    }
    labelImport := LabelImport{
    LabelID: labelID,
    Species: dbSpecies,
    Filter: label.Filter,
    Certainty: label.Certainty,
    }
    if label.Comment != "" {
    labelImport.Comment = label.Comment
    }
    // Insert label_subtype if calltype exists
    if label.CallType != "" {
    if err := importCalltype(ctx, tx, labelID, label, dbSpecies, filterID, mapping, calltypeIDMap, sf); err != nil {
    return importLabelResult{err: *err, hasError: true}
    }
    labelImport.CallType = mapping.GetDBCalltype(label.Species, label.CallType)
    return importLabelResult{labelImport: labelImport, labelID: labelID, subtypesImported: 1}
    }
    return importLabelResult{labelImport: labelImport, labelID: labelID}
    }
    // importCalltype inserts a label_subtype row for a calltype label.
    func importCalltype(
    ctx context.Context,
    tx *db.LoggedTx,
    labelID string,
    label *utils.Label,
    dbSpecies string,
    filterID string,
    mapping utils.MappingFile,
    calltypeIDMap map[string]map[string]string,
    sf scannedDataFile,
    ) *ImportSegmentError {
    dbCalltype := mapping.GetDBCalltype(label.Species, label.CallType)
    calltypeID := ""
    if calltypeIDMap[dbSpecies] != nil {
    calltypeID = calltypeIDMap[dbSpecies][dbCalltype]
    }
    if calltypeID == "" {
    return &ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("calltype ID not found: %s/%s", dbSpecies, dbCalltype),
    }
    }
    subtypeID, err := utils.GenerateLongID()
    if err != nil {
    return &ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to generate label_subtype ID: %v", err),
    }
    }
    _, err = tx.ExecContext(ctx, `
    INSERT INTO label_subtype (id, label_id, calltype_id, filter_id, certainty, created_at, last_modified, active)
    VALUES (?, ?, ?, ?, ?, now(), now(), true)
    `, subtypeID, labelID, calltypeID, filterID, label.Certainty)
    if err != nil {
    return &ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to insert label_subtype: %v", err),
    }
    }
    return nil
    database, err := db.OpenWriteableDB(resolveDBPath(input.DBPath))
    // segmentValidation holds the results of pre-import validation (phases B+C).
    type segmentValidation struct {
    scannedFiles []scannedDataFile
    filterIDMap map[string]string
    speciesIDMap map[string]string
    calltypeIDMap map[string]map[string]string
    fileIDMap map[string]scannedDataFile
    }
    // validateAndPrepareSegments performs phases B+C: parse data files, validate DB state, and prepare ID maps.
    func validateAndPrepareSegments(
    database *sql.DB,
    input ImportSegmentsInput,
    mapping utils.MappingFile,
    dataFiles []string,
    ) (*segmentValidation, []ImportSegmentError, error) {
    // Phase B: Parse all .data files and collect unique values
    scannedFiles, parseErrors, uniqueFilters, uniqueSpecies, uniqueCalltypes := scanAllDataFiles(dataFiles, input.Folder)
    if len(scannedFiles) == 0 {
    return nil, parseErrors, nil
    }
    // Validate dataset/location/cluster hierarchy
    if err := validateSegmentHierarchy(database, input.DatasetID, input.LocationID, input.ClusterID); err != nil {
    return nil, parseErrors, err
    }
    // Validate all filters exist
    filterIDMap, err := validateFiltersExist(database, uniqueFilters)
    if err != nil {
    return nil, parseErrors, fmt.Errorf("filter validation failed: %w", err)
    }
    // Validate mapping covers all species/calltypes and they exist in DB
    validationResult, err := utils.ValidateMappingAgainstDB(database, mapping, uniqueSpecies, uniqueCalltypes)
    if err != nil {
    return nil, parseErrors, fmt.Errorf("mapping validation failed: %w", err)
    }
    if validationResult.HasErrors() {
    return nil, parseErrors, fmt.Errorf("mapping validation failed: %s", validationResult.Error())
    }
    // Load species and calltype ID maps
    speciesIDMap, calltypeIDMap, err := loadSpeciesCalltypeIDs(database, mapping, uniqueSpecies, uniqueCalltypes)
    if err != nil {
    return nil, parseErrors, fmt.Errorf("failed to load species/calltype IDs: %w", err)
    }
    // Validate files: hash exists, linked to dataset, no existing labels
    fileIDMap, hashErrors := validateAndMapFiles(database, scannedFiles, input.ClusterID, input.DatasetID)
    allErrors := append(parseErrors, hashErrors...)
    return &segmentValidation{
    scannedFiles: scannedFiles,
    filterIDMap: filterIDMap,
    speciesIDMap: speciesIDMap,
    calltypeIDMap: calltypeIDMap,
    fileIDMap: fileIDMap,
    }, allErrors, nil
    }
    DBPath string `json:"db_path"`
  • file deletion: import_files.go (----------)
    [6.248737][6.351133:351172](),[6.351172][6.345251:345251]()
    package tools
    import (
    "context"
    "database/sql"
    "fmt"
    "os"
    "time"
    "skraak/db"
    "skraak/utils"
    )
    // ImportAudioFilesInput defines the input parameters for the import_audio_files tool
    type ImportAudioFilesInput struct {
    FolderPath string `json:"folder_path"`
    DatasetID string `json:"dataset_id"`
    LocationID string `json:"location_id"`
    ClusterID string `json:"cluster_id"`
    Recursive *bool `json:"recursive,omitempty"` // *bool because default is true; plain bool would make "not provided" indistinguishable from "false"
    }
    // ImportAudioFilesOutput defines the output structure for the import_audio_files tool
    type ImportAudioFilesOutput struct {
    Summary ImportSummary `json:"summary"`
    FileIDs []string `json:"file_ids"`
    Errors []utils.FileImportError `json:"errors,omitempty"`
    }
    // ImportSummary provides summary statistics for the import operation
    type ImportSummary struct {
    TotalFiles int `json:"total_files"`
    ImportedFiles int `json:"imported_files"`
    SkippedFiles int `json:"skipped_files"` // Duplicates
    FailedFiles int `json:"failed_files"`
    AudioMothFiles int `json:"audiomoth_files"`
    TotalDuration float64 `json:"total_duration_seconds"`
    ProcessingTime string `json:"processing_time"`
    }
    // ImportAudioFiles batch imports WAV files from a folder with hash-based duplicate detection
    func ImportAudioFiles(
    ctx context.Context,
    input ImportAudioFilesInput,
    ) (ImportAudioFilesOutput, error) {
    startTime := time.Now()
    var output ImportAudioFilesOutput
    // Default recursive to true
    recursive := true
    if input.Recursive != nil {
    recursive = *input.Recursive
    }
    // Validate database hierarchy (dataset → location → cluster)
    return output, fmt.Errorf("validation failed: %w", err)
    }
    // Open database
    if err != nil {
    return output, fmt.Errorf("failed to open database: %w", err)
    }
    defer database.Close()
    // Set cluster path if empty
    err = utils.EnsureClusterPath(database, input.ClusterID, input.FolderPath)
    if err != nil {
    return output, fmt.Errorf("failed to set cluster path: %w", err)
    }
    // Import the cluster (ALL THE LOGIC IS HERE)
    FolderPath: input.FolderPath,
    DatasetID: input.DatasetID,
    LocationID: input.LocationID,
    ClusterID: input.ClusterID,
    Recursive: recursive,
    })
    if err != nil {
    return output, fmt.Errorf("cluster import failed: %w", err)
    }
    // Map to output format
    output = ImportAudioFilesOutput{
    Summary: ImportSummary{
    TotalFiles: clusterOutput.TotalFiles,
    ImportedFiles: clusterOutput.ImportedFiles,
    SkippedFiles: clusterOutput.SkippedFiles,
    FailedFiles: clusterOutput.FailedFiles,
    AudioMothFiles: clusterOutput.AudioMothFiles,
    TotalDuration: clusterOutput.TotalDuration,
    ProcessingTime: time.Since(startTime).String(),
    },
    FileIDs: []string{}, // File IDs not tracked currently
    Errors: clusterOutput.Errors,
    }
    return output, nil
    }
    // validateImportInput validates all input parameters and database relationships
    func validateImportInput(input ImportAudioFilesInput, dbPath string) error {
    // Verify folder exists
    info, err := os.Stat(input.FolderPath)
    if err != nil {
    return fmt.Errorf("folder not accessible: %w", err)
    }
    if !info.IsDir() {
    return fmt.Errorf("path is not a directory: %s", input.FolderPath)
    }
    return validateHierarchyIDs(input.DatasetID, input.LocationID, input.ClusterID, dbPath)
    }
    // validateHierarchyIDs validates dataset/location/cluster ID formats and database relationships
    func validateHierarchyIDs(datasetID, locationID, clusterID, dbPath string) error {
    // Validate ID formats first (fast fail before DB queries)
    if err := utils.ValidateShortID(datasetID, "dataset_id"); err != nil {
    return err
    }
    if err := utils.ValidateShortID(locationID, "location_id"); err != nil {
    return err
    }
    if err := utils.ValidateShortID(clusterID, "cluster_id"); err != nil {
    return err
    }
    return db.WithReadDB(dbPath, func(database *sql.DB) error {
    // Verify dataset exists, is active, and is 'structured' type
    if err := db.ValidateDatasetTypeForImport(database, datasetID); err != nil {
    return err
    }
    // Verify location exists and belongs to dataset
    if err := db.ValidateLocationBelongsToDataset(database, locationID, datasetID); err != nil {
    return err
    }
    // Verify cluster exists and belongs to location
    if err := db.ClusterBelongsToLocation(database, clusterID, locationID); err != nil {
    return err
    }
    return nil
    })
    }
    }
    if err := tx.Commit(); err != nil {
    return output, fmt.Errorf("transaction commit failed: %w", err)
    tx.Rollback()
    tx, err := db.BeginLoggedTx(ctx, database, "import_audio_files")
    if err != nil {
    return output, fmt.Errorf("failed to begin transaction: %w", err)
    }
    clusterOutput, err := utils.ImportCluster(database, tx.UnderlyingTx(), utils.ClusterImportInput{
    database, err := db.OpenWriteableDB(resolveDBPath(input.DBPath))
    if err := validateImportInput(input, resolveDBPath(input.DBPath)); err != nil {
    DBPath string `json:"db_path"`
  • file deletion: import_file.go (----------)
    [6.248737][6.357911:357949](),[6.357949][6.351174:351174]()
    package tools
    import (
    "context"
    "database/sql"
    "fmt"
    "os"
    "path/filepath"
    "strings"
    "time"
    "skraak/db"
    "skraak/utils"
    )
    // ImportFileInput defines the input parameters for the import_file tool
    type ImportFileInput struct {
    FilePath string `json:"file_path"`
    DatasetID string `json:"dataset_id"`
    LocationID string `json:"location_id"`
    ClusterID string `json:"cluster_id"`
    }
    // ImportFileOutput defines the output structure for the import_file tool
    type ImportFileOutput struct {
    FileID string `json:"file_id"`
    FileName string `json:"file_name"`
    Hash string `json:"hash"`
    Duration float64 `json:"duration_seconds"`
    SampleRate int `json:"sample_rate"`
    TimestampLocal time.Time `json:"timestamp_local"`
    IsAudioMoth bool `json:"is_audiomoth"`
    IsDuplicate bool `json:"is_duplicate"`
    ProcessingTime string `json:"processing_time"`
    Error *string `json:"error,omitempty"`
    }
    // ImportFile imports a single WAV file into the database with duplicate detection
    func ImportFile(
    ctx context.Context,
    input ImportFileInput,
    ) (ImportFileOutput, error) {
    startTime := time.Now()
    var output ImportFileOutput
    // Phase 1: Validate file path
    _, err := validateFilePath(input.FilePath)
    if err != nil {
    return output, fmt.Errorf("file validation failed: %w", err)
    }
    output.FileName = filepath.Base(input.FilePath)
    // Phase 2: Validate database hierarchy
    return output, fmt.Errorf("hierarchy validation failed: %w", err)
    }
    // Phase 3: Open database connection (single connection for all DB operations)
    if err != nil {
    return output, fmt.Errorf("database connection failed: %w", err)
    }
    defer database.Close()
    // Phase 4: Get location data for astronomical calculations
    locData, err := utils.GetLocationData(database, input.LocationID)
    if err != nil {
    return output, fmt.Errorf("failed to get location data: %w", err)
    }
    // Phase 5: Process file metadata
    result, err := utils.ProcessSingleFile(input.FilePath, locData.Latitude, locData.Longitude, locData.TimezoneID, true)
    if err != nil {
    errMsg := err.Error()
    output.Error = &errMsg
    output.ProcessingTime = time.Since(startTime).String()
    return output, fmt.Errorf("file processing failed: %w", err)
    }
    // Populate output with extracted metadata
    output.FileName = result.FileName
    output.Hash = result.Hash
    output.Duration = result.Duration
    output.SampleRate = result.SampleRate
    output.TimestampLocal = result.TimestampLocal
    output.IsAudioMoth = result.IsAudioMoth
    // Phase 6: Ensure cluster path is set
    if err := utils.EnsureClusterPath(database, input.ClusterID, filepath.Dir(input.FilePath)); err != nil {
    return output, fmt.Errorf("failed to set cluster path: %w", err)
    }
    // Phase 7: Insert into database
    fileID, isDuplicate, err := insertFileIntoDB(ctx, database, result, input.DatasetID, input.ClusterID, input.LocationID)
    if err != nil {
    errMsg := err.Error()
    output.Error = &errMsg
    output.ProcessingTime = time.Since(startTime).String()
    return output, fmt.Errorf("database insertion failed: %w", err)
    }
    output.FileID = fileID
    output.IsDuplicate = isDuplicate
    output.ProcessingTime = time.Since(startTime).String()
    return output, nil
    }
    // validateFilePath validates the file exists, is a regular file, is a WAV file, and is not empty
    func validateFilePath(filePath string) (os.FileInfo, error) {
    // Check file exists
    info, err := os.Stat(filePath)
    if err != nil {
    if os.IsNotExist(err) {
    return nil, fmt.Errorf("file does not exist: %s", filePath)
    }
    return nil, fmt.Errorf("cannot access file: %w", err)
    }
    // Check it's a regular file
    if !info.Mode().IsRegular() {
    return nil, fmt.Errorf("path is not a regular file: %s", filePath)
    }
    // Check extension is .wav (case-insensitive)
    ext := strings.ToLower(filepath.Ext(filePath))
    if ext != ".wav" {
    return nil, fmt.Errorf("file must be a WAV file (got extension: %s)", ext)
    }
    // Check file is not empty
    if info.Size() == 0 {
    return nil, fmt.Errorf("file is empty: %s", filePath)
    }
    return info, nil
    }
    // insertFileIntoDB inserts a single file into the database
    // Returns (fileID, isDuplicate, error)
    func insertFileIntoDB(
    ctx context.Context,
    database *sql.DB,
    result *utils.FileProcessingResult,
    datasetID, clusterID, locationID string,
    ) (string, bool, error) {
    // Begin logged transaction
    tx, err := db.BeginLoggedTx(ctx, database, "import_audio_file")
    if err != nil {
    return "", false, fmt.Errorf("failed to begin transaction: %w", err)
    }
    defer tx.Rollback() // Rollback if not committed
    // Check for duplicate hash
    existingID, isDup, err := utils.CheckDuplicateHash(tx, result.Hash)
    if err != nil {
    return "", false, err
    }
    if isDup {
    return existingID, true, nil
    }
    // Generate file ID
    fileID, err := utils.GenerateLongID()
    if err != nil {
    return "", false, fmt.Errorf("ID generation failed: %w", err)
    }
    // Insert file record
    _, err = tx.ExecContext(ctx, `
    INSERT INTO file (
    id, file_name, xxh64_hash, location_id, timestamp_local,
    cluster_id, duration, sample_rate, maybe_solar_night, maybe_civil_night,
    moon_phase, created_at, last_modified, active
    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, now(), now(), true)
    `,
    fileID, result.FileName, result.Hash, locationID,
    result.TimestampLocal, clusterID, result.Duration, result.SampleRate,
    result.AstroData.SolarNight, result.AstroData.CivilNight, result.AstroData.MoonPhase,
    )
    if err != nil {
    return "", false, fmt.Errorf("file insert failed: %w", err)
    }
    // Insert file_dataset junction
    _, err = tx.ExecContext(ctx, `
    INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified)
    VALUES (?, ?, now(), now())
    `, fileID, datasetID)
    if err != nil {
    return "", false, fmt.Errorf("file_dataset insert failed: %w", err)
    }
    // If AudioMoth, insert moth_metadata
    if result.IsAudioMoth && result.MothData != nil {
    _, err = tx.ExecContext(ctx, `
    INSERT INTO moth_metadata (
    file_id, timestamp, recorder_id, gain, battery_v, temp_c,
    created_at, last_modified, active
    ) VALUES (?, ?, ?, ?, ?, ?, now(), now(), true)
    `,
    fileID,
    result.MothData.Timestamp,
    &result.MothData.RecorderID,
    &result.MothData.Gain,
    &result.MothData.BatteryV,
    &result.MothData.TempC,
    )
    if err != nil {
    return "", false, fmt.Errorf("moth_metadata insert failed: %w", err)
    }
    }
    // Commit transaction
    if err = tx.Commit(); err != nil {
    return "", false, fmt.Errorf("transaction commit failed: %w", err)
    }
    return fileID, false, nil
    }
    database, err := db.OpenWriteableDB(resolveDBPath(input.DBPath))
    if err := validateHierarchyIDs(input.DatasetID, input.LocationID, input.ClusterID, resolveDBPath(input.DBPath)); err != nil {
    DBPath string `json:"db_path"`
  • file deletion: calls_summarise.go (----------)
    [6.248737][6.400201:400243](),[6.400243][6.392542:392542]()
    package tools
    import (
    "sort"
    "strings"
    "skraak/utils"
    )
    // CallsSummariseInput defines the input for the calls-summarise tool
    type CallsSummariseInput struct {
    Folder string `json:"folder"`
    Brief bool `json:"brief"`
    Filter string `json:"filter,omitempty"`
    }
    // CallsSummariseOutput defines the output for the calls-summarise tool
    type CallsSummariseOutput struct {
    Segments []SegmentSummary `json:"segments"`
    Folder string `json:"folder"`
    DataFilesRead int `json:"data_files_read"`
    DataFilesSkipped []string `json:"data_files_skipped"`
    TotalSegments int `json:"total_segments"`
    Filters map[string]FilterStats `json:"filters"`
    ReviewStatus ReviewStatus `json:"review_status"`
    Operators []string `json:"operators"`
    Reviewers []string `json:"reviewers"`
    Error *string `json:"error,omitempty"`
    }
    // SegmentSummary represents a single segment in the output
    type SegmentSummary struct {
    File string `json:"file"`
    StartTime float64 `json:"start_time"`
    EndTime float64 `json:"end_time"`
    Labels []LabelSummary `json:"labels"`
    }
    // LabelSummary represents a label in the output (omits empty fields)
    type LabelSummary struct {
    Filter string `json:"filter"`
    Certainty int `json:"certainty"`
    Species string `json:"species"`
    CallType string `json:"calltype,omitempty"`
    Comment string `json:"comment,omitempty"`
    Bookmark bool `json:"bookmark,omitempty"`
    }
    // FilterStats contains per-filter statistics
    type FilterStats struct {
    Segments int `json:"segments"`
    Species map[string]int `json:"species"`
    Calltypes map[string]map[string]int `json:"calltypes,omitempty"` // species -> calltype -> count
    }
    // ReviewStatus contains review progress statistics
    type ReviewStatus struct {
    Unreviewed int `json:"unreviewed"` // certainty < 100
    Confirmed int `json:"confirmed"` // certainty = 100
    DontKnow int `json:"dont_know"` // certainty = 0
    WithCallType int `json:"with_calltype"`
    WithComments int `json:"with_comments"`
    Bookmarked int `json:"bookmarked"`
    }
    // CallsSummarise reads all .data files in a folder and produces a summary
    func CallsSummarise(input CallsSummariseInput) (CallsSummariseOutput, error) {
    var output CallsSummariseOutput
    // Find all .data files
    filePaths, err := utils.FindDataFiles(input.Folder)
    if err != nil {
    errMsg := err.Error()
    output.Error = &errMsg
    return output, err
    }
    // Initialize empty slices/maps (avoid null in JSON)
    output.Segments = make([]SegmentSummary, 0)
    output.Folder = input.Folder
    output.Filters = make(map[string]FilterStats)
    output.Operators = make([]string, 0)
    output.Reviewers = make([]string, 0)
    output.DataFilesSkipped = make([]string, 0)
    if len(filePaths) == 0 {
    return output, nil
    }
    // Track unique operators and reviewers
    operatorSet := make(map[string]bool)
    reviewerSet := make(map[string]bool)
    // Count segments for total
    if input.Brief {
    for _, fs := range output.Filters {
    output.TotalSegments += fs.Segments
    }
    } else {
    output.TotalSegments = len(output.Segments)
    }
    finaliseSummary(&output, operatorSet, reviewerSet, input.Brief)
    return output, nil
    }
    // summariseFiles processes all data files, populating output stats
    func summariseFiles(filePaths []string, input CallsSummariseInput, output *CallsSummariseOutput, operatorSet, reviewerSet map[string]bool) {
    for _, path := range filePaths {
    df, err := utils.ParseDataFile(path)
    if err != nil {
    output.DataFilesSkipped = append(output.DataFilesSkipped, path)
    continue
    }
    output.DataFilesRead++
    trackMeta(df.Meta, operatorSet, reviewerSet)
    var relPath string
    if !input.Brief {
    relPath = extractRelativePath(input.Folder, path)
    }
    for _, seg := range df.Segments {
    filteredLabels := filterLabels(seg.Labels, input.Filter)
    if input.Filter != "" && len(filteredLabels) == 0 {
    continue
    }
    updateStatsFromLabels(filteredLabels, output)
    if !input.Brief {
    output.Segments = append(output.Segments, SegmentSummary{
    File: relPath,
    StartTime: seg.StartTime,
    EndTime: seg.EndTime,
    Labels: buildLabelSummaries(filteredLabels),
    })
    }
    // trackMeta records operator and reviewer from file metadata
    func trackMeta(meta *utils.DataMeta, operatorSet, reviewerSet map[string]bool) {
    if meta == nil {
    return
    }
    if meta.Operator != "" {
    operatorSet[meta.Operator] = true
    }
    if meta.Reviewer != "" {
    reviewerSet[meta.Reviewer] = true
    }
    }
    // filterLabels returns labels matching the filter, or all labels if filter is empty
    func filterLabels(labels []*utils.Label, filter string) []*utils.Label {
    if filter == "" {
    return labels
    }
    var filtered []*utils.Label
    for _, l := range labels {
    if l.Filter == filter {
    filtered = append(filtered, l)
    }
    }
    return filtered
    }
    // buildLabelSummaries converts labels to label summaries
    func buildLabelSummaries(labels []*utils.Label) []LabelSummary {
    var summaries []LabelSummary
    for _, l := range labels {
    ls := LabelSummary{
    Filter: l.Filter,
    Certainty: l.Certainty,
    Species: l.Species,
    }
    if l.CallType != "" {
    ls.CallType = l.CallType
    }
    if l.Comment != "" {
    ls.Comment = l.Comment
    }
    if l.Bookmark {
    ls.Bookmark = true
    }
    summaries = append(summaries, ls)
    }
    return summaries
    }
    // updateStatsFromLabels updates filter stats and review status from a set of labels
    func updateStatsFromLabels(labels []*utils.Label, output *CallsSummariseOutput) {
    for _, l := range labels {
    updateFilterStats(l, output)
    updateReviewStatus(l, output)
    }
    }
    // updateFilterStats increments filter-level statistics for a single label
    func updateFilterStats(l *utils.Label, output *CallsSummariseOutput) {
    fs, exists := output.Filters[l.Filter]
    if !exists {
    fs = FilterStats{
    Segments: 0,
    Species: make(map[string]int),
    Calltypes: make(map[string]map[string]int),
    }
    }
    if l.CallType != "" {
    if fs.Calltypes[l.Species] == nil {
    fs.Calltypes[l.Species] = make(map[string]int)
    }
    fs.Calltypes[l.Species][l.CallType]++
    }
    output.Filters[l.Filter] = fs
    }
    // updateReviewStatus increments review status counters for a single label
    func updateReviewStatus(l *utils.Label, output *CallsSummariseOutput) {
    switch l.Certainty {
    case 100:
    output.ReviewStatus.Confirmed++
    case 0:
    output.ReviewStatus.DontKnow++
    default:
    output.ReviewStatus.Unreviewed++
    }
    if l.CallType != "" {
    output.ReviewStatus.WithCallType++
    }
    if l.Comment != "" {
    output.ReviewStatus.WithComments++
    }
    if l.Bookmark {
    output.ReviewStatus.Bookmarked++
    }
    // finaliseSummary sorts output, cleans empty maps, and converts sets to sorted slices
    func finaliseSummary(output *CallsSummariseOutput, operatorSet, reviewerSet map[string]bool, brief bool) {
    // Clean up empty calltypes maps
    for filter, fs := range output.Filters {
    if len(fs.Calltypes) == 0 {
    fs.Calltypes = nil
    output.Filters[filter] = fs
    }
    }
    // Convert sets to sorted slices
    for op := range operatorSet {
    output.Operators = append(output.Operators, op)
    }
    for r := range reviewerSet {
    output.Reviewers = append(output.Reviewers, r)
    }
    sort.Strings(output.Operators)
    sort.Strings(output.Reviewers)
    // Sort segments by file, then start time
    if !brief {
    sort.Slice(output.Segments, func(i, j int) bool {
    if output.Segments[i].File != output.Segments[j].File {
    return output.Segments[i].File < output.Segments[j].File
    }
    return output.Segments[i].StartTime < output.Segments[j].StartTime
    })
    }
    }
    // extractRelativePath extracts the audio filename from a .data file path
    // e.g., "/folder/tx51_LISTENING_20260221_203004.WAV.data" -> "tx51_LISTENING_20260221_203004.WAV"
    // Preserves the original case of the extension as-is.
    func extractRelativePath(folder, dataPath string) string {
    // Get the filename
    filename := dataPath
    if idx := strings.LastIndex(dataPath, "/"); idx >= 0 {
    filename = dataPath[idx+1:]
    }
    // Remove .data extension, preserve everything else
    return strings.TrimSuffix(filename, ".data")
    }
    }
    fs.Segments++
    fs.Species[l.Species]++
    }
    }
    }
    summariseFiles(filePaths, input, &output, operatorSet, reviewerSet)
  • file deletion: calls_show_images.go (----------)
    [6.248737][6.403517:403561](),[6.403561][6.400245:400245]()
    package tools
    import (
    "fmt"
    "os"
    "strings"
    "skraak/utils"
    )
    // CallsShowImagesInput defines the input for the show-images tool
    type CallsShowImagesInput struct {
    DataFilePath string `json:"data_file_path"`
    Color bool `json:"color"`
    ImageSize int `json:"image_size"`
    Sixel bool `json:"sixel"`
    ITerm bool `json:"iterm"`
    }
    // CallsShowImagesOutput defines the output for the show-images tool
    type CallsShowImagesOutput struct {
    SegmentsShown int `json:"segments_shown"`
    WavFile string `json:"wav_file"`
    Error string `json:"error,omitempty"`
    }
    // CallsShowImages reads a .data file and displays spectrogram images for each segment
    func CallsShowImages(input CallsShowImagesInput) (CallsShowImagesOutput, error) {
    var output CallsShowImagesOutput
    // Validate file exists
    if _, err := os.Stat(input.DataFilePath); os.IsNotExist(err) {
    output.Error = fmt.Sprintf("File not found: %s", input.DataFilePath)
    return output, fmt.Errorf("%s", output.Error)
    }
    // Derive WAV file path (strip .data suffix)
    wavPath := strings.TrimSuffix(input.DataFilePath, ".data")
    output.WavFile = wavPath
    // Check WAV file exists
    if _, err := os.Stat(wavPath); os.IsNotExist(err) {
    output.Error = fmt.Sprintf("WAV file not found: %s", wavPath)
    return output, fmt.Errorf("%s", output.Error)
    }
    // Parse .data file (includes labels for future filtering)
    dataFile, err := utils.ParseDataFile(input.DataFilePath)
    if err != nil {
    output.Error = err.Error()
    return output, fmt.Errorf("%s", output.Error)
    }
    if len(dataFile.Segments) == 0 {
    output.Error = "No segments found in .data file"
    return output, fmt.Errorf("%s", output.Error)
    }
    // Resolve image size
    imgSize := input.ImageSize
    if imgSize == 0 {
    imgSize = utils.SpectrogramDisplaySize
    }
    // Select graphics protocol
    protocol := utils.ProtocolKitty
    if input.ITerm {
    protocol = utils.ProtocolITerm
    } else if input.Sixel {
    protocol = utils.ProtocolSixel
    }
    // Generate spectrogram for each segment and output
    for i, seg := range dataFile.Segments {
    // Generate spectrogram image
    img, err := utils.GenerateSegmentSpectrogram(input.DataFilePath, seg.StartTime, seg.EndTime, input.Color, imgSize)
    if err != nil || img == nil {
    continue
    }
    // Print segment info
    labelInfo := formatSegmentLabels(seg.Labels)
    fmt.Fprintf(os.Stderr, "Segment %d: %.1fs - %.1fs (%.1fs)%s\n",
    i+1, seg.StartTime, seg.EndTime, seg.EndTime-seg.StartTime, labelInfo)
    // Write to stdout via terminal graphics protocol
    if err := utils.WriteImage(img, os.Stdout, protocol); err != nil {
    output.Error = fmt.Sprintf("Failed to write image: %v", err)
    return output, fmt.Errorf("%s", output.Error)
    }
    fmt.Println() // Newline after image
    }
    output.SegmentsShown = len(dataFile.Segments)
    return output, nil
    }
    // formatSegmentLabels formats labels for display in segment info
    func formatSegmentLabels(labels []*utils.Label) string {
    if len(labels) == 0 {
    return ""
    }
    var parts []string
    for _, l := range labels {
    part := l.Species
    if l.CallType != "" {
    part += "/" + l.CallType
    }
    if l.Filter != "" {
    part += " [" + l.Filter + "]"
    }
    parts = append(parts, part)
    }
    return " " + strings.Join(parts, ", ")
    }
  • file deletion: calls_push_certainty_test.go (----------)
    [6.248737][6.406958:407010](),[6.407010][6.403563:403563]()
    package tools
    import (
    "encoding/json"
    "os"
    "path/filepath"
    "testing"
    "skraak/utils"
    )
    func TestPushCertaintyPromotesMatchingLabels(t *testing.T) {
    tempDir := t.TempDir()
    // File with two Kiwi segments: certainty=90 and certainty=70
    file1 := `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]], [10, 20, 100, 1000, [{"species": "Kiwi", "certainty": 70}]]]`
    file1Path := filepath.Join(tempDir, "file1.data")
    if err := os.WriteFile(file1Path, []byte(file1), 0644); err != nil {
    t.Fatal(err)
    }
    // File with one Tomtit at certainty=90 (must not be promoted when species=Kiwi)
    file2 := `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Tomtit", "certainty": 90}]]]`
    file2Path := filepath.Join(tempDir, "file2.data")
    if err := os.WriteFile(file2Path, []byte(file2), 0644); err != nil {
    t.Fatal(err)
    }
    result, err := PushCertainty(PushCertaintyConfig{
    Folder: tempDir,
    Species: "Kiwi",
    Reviewer: "TestReviewer",
    })
    if err != nil {
    t.Fatal(err)
    }
    if result.SegmentsUpdated != 1 {
    t.Errorf("expected 1 segment updated, got %d", result.SegmentsUpdated)
    }
    if result.FilesUpdated != 1 {
    t.Errorf("expected 1 file updated, got %d", result.FilesUpdated)
    }
    // Verify file1: certainty=90 Kiwi → 100, certainty=70 Kiwi → unchanged
    df, err := utils.ParseDataFile(file1Path)
    if err != nil {
    t.Fatal(err)
    }
    if df.Segments[0].Labels[0].Certainty != 100 {
    t.Errorf("expected certainty=100, got %d", df.Segments[0].Labels[0].Certainty)
    }
    if df.Segments[1].Labels[0].Certainty != 70 {
    t.Errorf("expected certainty=70 unchanged, got %d", df.Segments[1].Labels[0].Certainty)
    }
    if df.Meta.Reviewer != "TestReviewer" {
    t.Errorf("expected reviewer=TestReviewer, got %q", df.Meta.Reviewer)
    }
    // Verify Tomtit file was not modified
    df2, err := utils.ParseDataFile(file2Path)
    if err != nil {
    t.Fatal(err)
    }
    if df2.Segments[0].Labels[0].Certainty != 90 {
    t.Errorf("Tomtit certainty should be unchanged at 90, got %d", df2.Segments[0].Labels[0].Certainty)
    }
    }
    func TestPushCertaintyFilterScope(t *testing.T) {
    tempDir := t.TempDir()
    // Segment has two labels from different filters, both Kiwi certainty=90
    data := []any{
    map[string]any{"Operator": "test"},
    []any{0.0, 10.0, 100.0, 1000.0, []any{
    map[string]any{"species": "Kiwi", "certainty": 90, "filter": "model-a"},
    map[string]any{"species": "Kiwi", "certainty": 90, "filter": "model-b"},
    }},
    }
    raw, _ := json.Marshal(data)
    filePath := filepath.Join(tempDir, "file1.data")
    if err := os.WriteFile(filePath, raw, 0644); err != nil {
    t.Fatal(err)
    }
    // Push only model-a
    result, err := PushCertainty(PushCertaintyConfig{
    Folder: tempDir,
    Filter: "model-a",
    Species: "Kiwi",
    Reviewer: "TestReviewer",
    })
    if err != nil {
    t.Fatal(err)
    }
    if result.SegmentsUpdated != 1 {
    t.Errorf("expected 1 segment updated, got %d", result.SegmentsUpdated)
    }
    // Verify only model-a label was promoted; model-b stays at 90
    df, err := utils.ParseDataFile(filePath)
    if err != nil {
    t.Fatal(err)
    }
    for _, label := range df.Segments[0].Labels {
    if label.Filter == "model-a" && label.Certainty != 100 {
    t.Errorf("model-a label should be 100, got %d", label.Certainty)
    }
    if label.Filter == "model-b" && label.Certainty != 90 {
    t.Errorf("model-b label should be unchanged at 90, got %d", label.Certainty)
    }
    }
    }
  • file deletion: calls_push_certainty.go (----------)
    [6.248737][6.409526:409573](),[6.409573][6.407012:407012]()
    package tools
    import (
    "fmt"
    "skraak/utils"
    )
    // PushCertaintyConfig holds the configuration for push-certainty
    type PushCertaintyConfig struct {
    Folder string
    File string
    Filter string
    Species string
    CallType string
    Night bool
    Day bool
    Lat float64
    Lng float64
    Timezone string
    Reviewer string
    }
    // PushCertaintyResult holds the result of push-certainty
    type PushCertaintyResult struct {
    SegmentsUpdated int `json:"segments_updated"`
    FilesUpdated int `json:"files_updated"`
    TimeFilteredCount int `json:"time_filtered_count"`
    }
    // PushCertainty promotes all certainty=90 segments matching the filter scope to certainty=100.
    // Uses identical filtering logic to LoadDataFiles so the scope matches calls classify exactly.
    func PushCertainty(config PushCertaintyConfig) (*PushCertaintyResult, error) {
    state, err := LoadDataFiles(ClassifyConfig{
    Folder: config.Folder,
    File: config.File,
    Filter: config.Filter,
    Species: config.Species,
    CallType: config.CallType,
    Certainty: 90,
    Sample: -1,
    Night: config.Night,
    Day: config.Day,
    Lat: config.Lat,
    Lng: config.Lng,
    Timezone: config.Timezone,
    })
    if err != nil {
    return nil, err
    }
    var segsUpdated, filesUpdated int
    for i, df := range state.DataFiles {
    changed := false
    for _, seg := range state.FilteredSegs()[i] {
    for _, label := range seg.Labels {
    if labelMatchesPush(label, config.Filter, config.Species, config.CallType) {
    label.Certainty = 100
    changed = true
    segsUpdated++
    }
    }
    }
    if changed {
    df.Meta.Reviewer = config.Reviewer
    if err := df.Write(df.FilePath); err != nil {
    return nil, fmt.Errorf("write %s: %w", df.FilePath, err)
    }
    filesUpdated++
    }
    }
    return &PushCertaintyResult{
    SegmentsUpdated: segsUpdated,
    FilesUpdated: filesUpdated,
    TimeFilteredCount: state.TimeFilteredCount,
    }, nil
    }
    // labelMatchesPush returns true if the label matches the push scope and has certainty=90.
    // Certainty is already guaranteed by LoadDataFiles, but we re-check to target only the
    // specific label that matched (a segment may carry labels from multiple filters).
    func labelMatchesPush(label *utils.Label, filter, species, callType string) bool {
    if filter != "" && label.Filter != filter {
    return false
    }
    if species != "" && label.Species != species {
    return false
    }
    if callType != "" && label.CallType != callType {
    return false
    }
    return label.Certainty == 90
    }
  • file deletion: calls_propagate_test.go (----------)
    [6.248737][6.430676:430723](),[6.430723][6.409575:409575]()
    package tools
    import (
    "path/filepath"
    "testing"
    "skraak/utils"
    )
    // helpers
    func seg(start, end float64, labels ...*utils.Label) *utils.Segment {
    return &utils.Segment{
    StartTime: start,
    EndTime: end,
    FreqLow: 100,
    FreqHigh: 8000,
    Labels: labels,
    }
    }
    func lbl(filter, species, calltype string, certainty int) *utils.Label {
    return &utils.Label{
    Filter: filter,
    Species: species,
    CallType: calltype,
    Certainty: certainty,
    }
    }
    func writeFile(t *testing.T, segs ...*utils.Segment) string {
    t.Helper()
    dir := t.TempDir()
    path := filepath.Join(dir, "test.data")
    df := &utils.DataFile{
    Meta: &utils.DataMeta{Operator: "ML", Reviewer: "David", Duration: 3600},
    Segments: segs,
    }
    if err := df.Write(path); err != nil {
    t.Fatalf("write fixture: %v", err)
    }
    return path
    }
    func readFile(t *testing.T, path string) *utils.DataFile {
    t.Helper()
    df, err := utils.ParseDataFile(path)
    if err != nil {
    t.Fatalf("parse %s: %v", path, err)
    }
    return df
    }
    // findLabel returns the label with matching filter and time on the parsed file, or nil.
    func findLabel(df *utils.DataFile, filter string, start, end float64) *utils.Label {
    for _, s := range df.Segments {
    if s.StartTime != start || s.EndTime != end {
    continue
    }
    for _, l := range s.Labels {
    if l.Filter == filter {
    return l
    }
    }
    }
    return nil
    }
    const (
    fFrom = "opensoundscape-kiwi-1.2"
    fTo = "opensoundscape-kiwi-1.5"
    )
    func TestPropagate_HappyPathSingle(t *testing.T) {
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v (%s)", err, out.Error)
    }
    if out.Propagated != 1 || out.TargetsExamined != 1 || out.SkippedConflict != 0 || out.SkippedNoOverlap != 0 {
    t.Fatalf("counts wrong: %+v", out)
    }
    df := readFile(t, path)
    target := findLabel(df, fTo, 100, 125)
    if target == nil {
    t.Fatal("target label missing")
    }
    if target.Species != "Kiwi" || target.CallType != "Male" || target.Certainty != 90 {
    t.Errorf("target not updated correctly: species=%q calltype=%q cert=%d", target.Species, target.CallType, target.Certainty)
    }
    if df.Meta.Reviewer != "Skraak" {
    t.Errorf("reviewer = %q, want Skraak", df.Meta.Reviewer)
    }
    }
    func TestPropagate_NoOverlap(t *testing.T) {
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(500, 525, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.Propagated != 0 || out.TargetsExamined != 1 || out.SkippedNoOverlap != 1 {
    t.Fatalf("counts wrong: %+v", out)
    }
    df := readFile(t, path)
    target := findLabel(df, fTo, 500, 525)
    if target.Certainty != 70 {
    t.Errorf("target should not be modified, cert=%d", target.Certainty)
    }
    if df.Meta.Reviewer != "David" {
    t.Errorf("reviewer should stay David (no write), got %q", df.Meta.Reviewer)
    }
    }
    func TestPropagate_SourceWrongSpecies_Ignored(t *testing.T) {
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Weka", "", 100)),
    seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.Propagated != 0 || out.SkippedNoOverlap != 1 {
    t.Fatalf("counts wrong: %+v", out)
    }
    }
    func TestPropagate_SourceWrongCertainty_Ignored(t *testing.T) {
    // cert=70 and cert=0 source labels must NOT count as sources.
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 70)),
    seg(200, 225, lbl(fFrom, "Don't Know", "", 0)),
    seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
    seg(200, 225, lbl(fTo, "Kiwi", "Male", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.Propagated != 0 || out.SkippedNoOverlap != 2 {
    t.Fatalf("counts wrong: %+v", out)
    }
    }
    func TestPropagate_SourceWrongFilter_Ignored(t *testing.T) {
    path := writeFile(t,
    seg(100, 125, lbl("some-other-filter", "Kiwi", "Male", 100)),
    seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if !out.FiltersMissing || out.Propagated != 0 || out.TargetsExamined != 0 {
    t.Fatalf("expected FiltersMissing=true with zero counts, got: %+v", out)
    }
    }
    func TestPropagate_TargetCert100_NotTouched(t *testing.T) {
    // Target with cert=100 is human-verified — must NOT be overwritten.
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(100, 125, lbl(fTo, "Kiwi", "Male", 100)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.TargetsExamined != 0 || out.Propagated != 0 {
    t.Fatalf("cert=100 target must not be examined: %+v", out)
    }
    df := readFile(t, path)
    if df.Meta.Reviewer != "David" {
    t.Errorf("reviewer should stay David (no write), got %q", df.Meta.Reviewer)
    }
    }
    func TestPropagate_TargetCert90_NotTouched(t *testing.T) {
    // Target with cert=90 (already propagated earlier) must NOT be re-propagated.
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(100, 125, lbl(fTo, "Kiwi", "Female", 90)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.TargetsExamined != 0 || out.Propagated != 0 {
    t.Fatalf("cert=90 target must not be examined: %+v", out)
    }
    df := readFile(t, path)
    target := findLabel(df, fTo, 100, 125)
    if target.Certainty != 90 || target.CallType != "Female" {
    t.Errorf("cert=90 target was modified: %+v", target)
    }
    }
    func TestPropagate_TargetCert0_Propagated(t *testing.T) {
    // Target at cert=0 ("Don't Know" / "Noise") SHOULD be propagated when an
    // overlapping cert=100 source exists — rescues labels from the noise bucket
    // so they surface for review even if occasionally wrong.
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(100, 125, lbl(fTo, "Don't Know", "", 0)),
    seg(200, 225, lbl(fFrom, "Kiwi", "Female", 100)),
    seg(200, 225, lbl(fTo, "Noise", "", 0)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.TargetsExamined != 2 || out.Propagated != 2 {
    t.Fatalf("cert=0 targets must be propagated: %+v", out)
    }
    df := readFile(t, path)
    for _, c := range []struct {
    start, end float64
    calltype string
    }{{100, 125, "Male"}, {200, 225, "Female"}} {
    l := findLabel(df, fTo, c.start, c.end)
    if l == nil || l.Species != "Kiwi" || l.CallType != c.calltype || l.Certainty != 90 {
    t.Errorf("at %v-%v got %+v, want Kiwi+%s cert=90", c.start, c.end, l, c.calltype)
    }
    }
    }
    func TestPropagate_MultipleSourcesAgree(t *testing.T) {
    // Two overlapping sources with same calltype → propagate.
    path := writeFile(t,
    seg(100, 110, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(105, 120, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.Propagated != 1 || out.SkippedConflict != 0 {
    t.Fatalf("counts wrong: %+v", out)
    }
    df := readFile(t, path)
    target := findLabel(df, fTo, 100, 125)
    if target.CallType != "Male" {
    t.Errorf("calltype should be Male, got %q", target.CallType)
    }
    }
    func TestPropagate_MultipleSourcesConflict(t *testing.T) {
    // Two overlapping sources with different calltypes → conflict, skip, report.
    path := writeFile(t,
    seg(100, 110, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(115, 120, lbl(fFrom, "Kiwi", "Female", 100)),
    seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.Propagated != 0 || out.SkippedConflict != 1 {
    t.Fatalf("expected 1 conflict skip: %+v", out)
    }
    if len(out.Conflicts) != 1 {
    t.Fatalf("expected 1 conflict report, got %d", len(out.Conflicts))
    }
    if out.Conflicts[0].TargetStart != 100 || out.Conflicts[0].TargetEnd != 125 {
    t.Errorf("conflict target wrong: %+v", out.Conflicts[0])
    }
    if len(out.Conflicts[0].SourceChoices) != 2 {
    t.Errorf("expected 2 source choices, got %d", len(out.Conflicts[0].SourceChoices))
    }
    // Target must NOT be modified.
    df := readFile(t, path)
    target := findLabel(df, fTo, 100, 125)
    if target.CallType != "Duet" || target.Certainty != 70 {
    t.Errorf("conflicted target was modified: %+v", target)
    }
    if df.Meta.Reviewer != "David" {
    t.Errorf("reviewer should stay David (no write), got %q", df.Meta.Reviewer)
    }
    }
    func TestPropagate_EmptyCallTypePropagates(t *testing.T) {
    // Source with empty calltype → target gets empty calltype.
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Kiwi", "", 100)),
    seg(100, 125, lbl(fTo, "Kiwi", "Male", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.Propagated != 1 {
    t.Fatalf("expected propagated=1: %+v", out)
    }
    df := readFile(t, path)
    target := findLabel(df, fTo, 100, 125)
    if target.CallType != "" {
    t.Errorf("calltype should be cleared, got %q", target.CallType)
    }
    if target.Species != "Kiwi" || target.Certainty != 90 {
    t.Errorf("target fields wrong: %+v", target)
    }
    }
    func TestPropagate_SpeciesOverride(t *testing.T) {
    // Target species was different from --species; must be overwritten.
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(100, 125, lbl(fTo, "Don't Know", "", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.Propagated != 1 {
    t.Fatalf("expected propagated=1: %+v", out)
    }
    df := readFile(t, path)
    target := findLabel(df, fTo, 100, 125)
    if target.Species != "Kiwi" || target.CallType != "Male" || target.Certainty != 90 {
    t.Errorf("target not overwritten correctly: %+v", target)
    }
    }
    func TestPropagate_OverlapBoundaryExclusive(t *testing.T) {
    // Segments touching at a point (src ends exactly where tgt starts) do NOT overlap.
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(125, 150, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.Propagated != 0 || out.SkippedNoOverlap != 1 {
    t.Fatalf("touching boundary must not count as overlap: %+v", out)
    }
    }
    func TestPropagate_OverlapPartial(t *testing.T) {
    // 1-second overlap is enough.
    path := writeFile(t,
    seg(100, 126, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(125, 150, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.Propagated != 1 {
    t.Fatalf("expected propagated=1: %+v", out)
    }
    }
    func TestPropagate_SupersetEitherDirection(t *testing.T) {
    // Source engulfs target.
    path1 := writeFile(t,
    seg(100, 200, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(110, 150, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    if out, _ := CallsPropagate(CallsPropagateInput{File: path1, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"}); out.Propagated != 1 {
    t.Errorf("source-engulfs-target: %+v", out)
    }
    // Target engulfs source.
    path2 := writeFile(t,
    seg(110, 150, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(100, 200, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    if out, _ := CallsPropagate(CallsPropagateInput{File: path2, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"}); out.Propagated != 1 {
    t.Errorf("target-engulfs-source: %+v", out)
    }
    }
    func TestPropagate_MissingFlags(t *testing.T) {
    cases := []struct {
    name string
    in CallsPropagateInput
    }{
    {"no file", CallsPropagateInput{FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"}},
    {"no from", CallsPropagateInput{File: "x", ToFilter: fTo, Species: "Kiwi"}},
    {"no to", CallsPropagateInput{File: "x", FromFilter: fFrom, Species: "Kiwi"}},
    {"no species", CallsPropagateInput{File: "x", FromFilter: fFrom, ToFilter: fTo}},
    }
    for _, c := range cases {
    t.Run(c.name, func(t *testing.T) {
    _, err := CallsPropagate(c.in)
    if err == nil {
    t.Errorf("expected error")
    }
    })
    }
    }
    func TestPropagate_SameFromAndTo(t *testing.T) {
    _, err := CallsPropagate(CallsPropagateInput{
    File: "x", FromFilter: fFrom, ToFilter: fFrom, Species: "Kiwi",
    })
    if err == nil {
    t.Error("expected error when --from == --to")
    }
    }
    func TestPropagate_NonexistentFile(t *testing.T) {
    _, err := CallsPropagate(CallsPropagateInput{
    File: "/nonexistent/path.data", FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err == nil {
    t.Error("expected error for nonexistent file")
    }
    }
    func TestPropagate_RealisticMixed(t *testing.T) {
    // Mimics the 20260228_211500.WAV.data case: cert=0 "Don't Know" and cert=100 Kiwi sources
    // coexist; only cert=100 Kiwi gets propagated.
    path := writeFile(t,
    // Sources (kiwi-1.2)
    seg(45, 52.5, lbl(fFrom, "Don't Know", "", 0)),
    seg(142.5, 177.5, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(195, 217.5, lbl(fFrom, "Don't Know", "", 0)),
    seg(647.5, 682.5, lbl(fFrom, "Kiwi", "Female", 100)),
    seg(815, 855, lbl(fFrom, "Kiwi", "Duet", 100)),
    // Targets (kiwi-1.5)
    seg(147.5, 167.5, lbl(fTo, "Kiwi", "Male", 70)),
    seg(647.5, 672.5, lbl(fTo, "Kiwi", "Female", 70)),
    seg(815, 852.5, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.TargetsExamined != 3 || out.Propagated != 3 || out.SkippedConflict != 0 {
    t.Fatalf("counts wrong: %+v", out)
    }
    df := readFile(t, path)
    expect := []struct {
    start, end float64
    calltype string
    }{
    {147.5, 167.5, "Male"},
    {647.5, 672.5, "Female"},
    {815, 852.5, "Duet"},
    }
    for _, e := range expect {
    l := findLabel(df, fTo, e.start, e.end)
    if l == nil || l.Certainty != 90 || l.CallType != e.calltype || l.Species != "Kiwi" {
    t.Errorf("at %v-%v got %+v, want Kiwi+%s cert=90", e.start, e.end, l, e.calltype)
    }
    }
    }
    func TestPropagate_NoWriteIfNothingChanged(t *testing.T) {
    // File with only non-target segments should not be rewritten (reviewer unchanged).
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.Propagated != 0 || out.TargetsExamined != 0 {
    t.Fatalf("expected no activity: %+v", out)
    }
    df := readFile(t, path)
    if df.Meta.Reviewer != "David" {
    t.Errorf("reviewer should not be touched, got %q", df.Meta.Reviewer)
    }
    }
    // writeFileAt is like writeFile but puts the file inside an existing dir
    // with a caller-provided basename (must end in .data).
    func writeFileAt(t *testing.T, dir, base string, segs ...*utils.Segment) string {
    t.Helper()
    path := filepath.Join(dir, base)
    df := &utils.DataFile{
    Meta: &utils.DataMeta{Operator: "ML", Reviewer: "David", Duration: 3600},
    Segments: segs,
    }
    if err := df.Write(path); err != nil {
    t.Fatalf("write fixture: %v", err)
    }
    return path
    }
    func TestPropagateFolder_AggregatesAndSkipsMissing(t *testing.T) {
    dir := t.TempDir()
    // File A: both filters present, one clean propagation.
    aPath := writeFileAt(t, dir, "a.wav.data",
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    // File B: only target filter — missing source, must be skipped silently.
    bPath := writeFileAt(t, dir, "b.wav.data",
    seg(200, 225, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    // File C: only source filter — missing target, must be skipped silently.
    writeFileAt(t, dir, "c.wav.data",
    seg(300, 325, lbl(fFrom, "Kiwi", "Male", 100)),
    )
    // File D: both filters, but no overlap → targets examined, none propagated.
    dPath := writeFileAt(t, dir, "d.wav.data",
    seg(400, 425, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(500, 525, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    out, err := CallsPropagateFolder(CallsPropagateFolderInput{
    Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    assertPropagateStats(t, out, CallsPropagateFolderOutput{
    FilesTotal: 4,
    FilesWithBothFilters: 2,
    FilesSkippedNoFilter: 2,
    FilesChanged: 1,
    FilesErrored: 0,
    TargetsExamined: 2,
    Propagated: 1,
    SkippedNoOverlap: 1,
    })
    t.Run("file_a_propagated", func(t *testing.T) {
    aDf := readFile(t, aPath)
    if aDf.Meta.Reviewer != "Skraak" {
    t.Errorf("reviewer: got %q, want Skraak", aDf.Meta.Reviewer)
    }
    if l := findLabel(aDf, fTo, 100, 125); l == nil || l.Certainty != 90 || l.CallType != "Male" {
    t.Errorf("target label: got %+v, want cert=90 calltype=Male", l)
    }
    })
    t.Run("file_b_skipped", func(t *testing.T) {
    bDf := readFile(t, bPath)
    if bDf.Meta.Reviewer != "David" {
    t.Errorf("reviewer should not be touched, got %q", bDf.Meta.Reviewer)
    }
    })
    t.Run("file_d_no_overlap", func(t *testing.T) {
    dDf := readFile(t, dPath)
    if dDf.Meta.Reviewer != "David" {
    t.Errorf("reviewer should not be touched, got %q", dDf.Meta.Reviewer)
    }
    if l := findLabel(dDf, fTo, 500, 525); l == nil || l.Certainty != 70 {
    t.Errorf("target label should be unchanged cert=70, got %+v", l)
    }
    })
    }
    func TestPropagateFolder_EmptyFolder(t *testing.T) {
    dir := t.TempDir()
    out, err := CallsPropagateFolder(CallsPropagateFolderInput{
    Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.FilesTotal != 0 || out.Propagated != 0 {
    t.Errorf("expected empty result, got %+v", out)
    }
    }
    func TestPropagateFolder_MissingRequiredFlags(t *testing.T) {
    dir := t.TempDir()
    cases := []CallsPropagateFolderInput{
    {Folder: "", FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"},
    {Folder: dir, FromFilter: "", ToFilter: fTo, Species: "Kiwi"},
    {Folder: dir, FromFilter: fFrom, ToFilter: "", Species: "Kiwi"},
    {Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: ""},
    {Folder: dir, FromFilter: fFrom, ToFilter: fFrom, Species: "Kiwi"},
    }
    for i, in := range cases {
    if _, err := CallsPropagateFolder(in); err == nil {
    t.Errorf("case %d: expected error for input %+v", i, in)
    }
    }
    }
    func TestPropagateFolder_NonexistentFolder(t *testing.T) {
    _, err := CallsPropagateFolder(CallsPropagateFolderInput{
    Folder: "/nonexistent/path/xyz", FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err == nil {
    t.Fatal("expected error for nonexistent folder")
    }
    }
    func TestPropagateFolder_ConflictsTaggedWithFile(t *testing.T) {
    dir := t.TempDir()
    // Two sources with different calltypes both overlapping one target.
    writeFileAt(t, dir, "conflict.wav.data",
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(110, 130, lbl(fFrom, "Kiwi", "Female", 100)),
    seg(100, 130, lbl(fTo, "Kiwi", "", 70)),
    )
    out, err := CallsPropagateFolder(CallsPropagateFolderInput{
    Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.SkippedConflict != 1 || len(out.Conflicts) != 1 {
    t.Fatalf("expected one conflict, got %+v", out)
    }
    if out.Conflicts[0].File == "" {
    t.Errorf("conflict should be tagged with file path, got %+v", out.Conflicts[0])
    }
    }
    }
    // assertPropagateStats checks output stats against expected values.
    func assertPropagateStats(t *testing.T, got, want CallsPropagateFolderOutput) {
    t.Helper()
    checks := []struct {
    name string
    got int
    want int
    }{
    {"FilesTotal", got.FilesTotal, want.FilesTotal},
    {"FilesWithBothFilters", got.FilesWithBothFilters, want.FilesWithBothFilters},
    {"FilesSkippedNoFilter", got.FilesSkippedNoFilter, want.FilesSkippedNoFilter},
    {"FilesChanged", got.FilesChanged, want.FilesChanged},
    {"FilesErrored", got.FilesErrored, want.FilesErrored},
    {"TargetsExamined", got.TargetsExamined, want.TargetsExamined},
    {"Propagated", got.Propagated, want.Propagated},
    {"SkippedNoOverlap", got.SkippedNoOverlap, want.SkippedNoOverlap},
    }
    for _, c := range checks {
    if c.got != c.want {
    t.Errorf("%s: got %d, want %d", c.name, c.got, c.want)
    }
    }
  • file deletion: calls_propagate.go (----------)
    [6.248737][6.441079:441121](),[6.441121][6.430725:430725]()
    package tools
    import (
    "fmt"
    "os"
    "skraak/utils"
    )
    type CallsPropagateInput struct {
    File string `json:"file"`
    FromFilter string `json:"from_filter"`
    ToFilter string `json:"to_filter"`
    Species string `json:"species"`
    }
    type CallsPropagateOutput struct {
    File string `json:"file"`
    FromFilter string `json:"from_filter"`
    ToFilter string `json:"to_filter"`
    Species string `json:"species"`
    FiltersMissing bool `json:"filters_missing,omitempty"`
    TargetsExamined int `json:"targets_examined"`
    Propagated int `json:"propagated"`
    SkippedNoOverlap int `json:"skipped_no_overlap"`
    SkippedConflict int `json:"skipped_conflict"`
    Conflicts []PropagateConflict `json:"conflicts,omitempty"`
    Changes []PropagateChange `json:"changes,omitempty"`
    Error string `json:"error,omitempty"`
    }
    type CallsPropagateFolderInput struct {
    Folder string `json:"folder"`
    FromFilter string `json:"from_filter"`
    ToFilter string `json:"to_filter"`
    Species string `json:"species"`
    }
    type CallsPropagateFolderOutput struct {
    Folder string `json:"folder"`
    FromFilter string `json:"from_filter"`
    ToFilter string `json:"to_filter"`
    Species string `json:"species"`
    FilesTotal int `json:"files_total"`
    FilesWithBothFilters int `json:"files_with_both_filters"`
    FilesSkippedNoFilter int `json:"files_skipped_no_filter"`
    FilesChanged int `json:"files_changed"`
    FilesErrored int `json:"files_errored"`
    TargetsExamined int `json:"targets_examined"`
    Propagated int `json:"propagated"`
    SkippedNoOverlap int `json:"skipped_no_overlap"`
    SkippedConflict int `json:"skipped_conflict"`
    Conflicts []PropagateConflict `json:"conflicts,omitempty"`
    Errors []CallsPropagateOutput `json:"errors,omitempty"`
    Error string `json:"error,omitempty"`
    }
    type PropagateConflict struct {
    File string `json:"file,omitempty"`
    TargetStart float64 `json:"target_start"`
    TargetEnd float64 `json:"target_end"`
    TargetCallType string `json:"target_calltype,omitempty"`
    SourceChoices []PropagateSourceChoice `json:"source_choices"`
    }
    type PropagateSourceChoice struct {
    Start float64 `json:"start"`
    End float64 `json:"end"`
    Species string `json:"species"`
    CallType string `json:"calltype,omitempty"`
    }
    type PropagateChange struct {
    TargetStart float64 `json:"target_start"`
    TargetEnd float64 `json:"target_end"`
    PrevSpecies string `json:"prev_species"`
    PrevCallType string `json:"prev_calltype,omitempty"`
    PrevCertainty int `json:"prev_certainty"`
    NewSpecies string `json:"new_species"`
    NewCallType string `json:"new_calltype,omitempty"`
    NewCertainty int `json:"new_certainty"`
    }
    // CallsPropagate copies verified classifications (certainty==100) from one filter's
    // segments to overlapping target segments of another filter, within a single .data file.
    // Target labels with certainty==70 (ML-unverified) or certainty==0 (Don't Know / Noise)
    // are updated — targets at certainty==100 (human-verified) and certainty==90 (already
    // propagated) are left alone. Only source labels matching --species are considered.
    // Propagated target labels are set to certainty=90 and file reviewer is set to "Skraak".
    func CallsPropagate(input CallsPropagateInput) (CallsPropagateOutput, error) {
    output := CallsPropagateOutput{
    File: input.File,
    FromFilter: input.FromFilter,
    ToFilter: input.ToFilter,
    Species: input.Species,
    }
    if err := validatePropagateInput(&output, input); err != nil {
    return output, err
    }
    df, err := utils.ParseDataFile(input.File)
    if err != nil {
    output.Error = fmt.Sprintf("parse %s: %v", input.File, err)
    return output, fmt.Errorf("%s", output.Error)
    }
    // Fast path: skip files that don't contain both filters at all.
    if !hasBothFilters(df, input.FromFilter, input.ToFilter) {
    output.FiltersMissing = true
    return output, nil
    }
    sources := collectPropagateSources(df, input.FromFilter, input.Species)
    propagateTargets(df, sources, input, &output)
    if output.Propagated > 0 {
    df.Meta.Reviewer = "Skraak"
    if err := df.Write(input.File); err != nil {
    output.Error = fmt.Sprintf("write %s: %v", input.File, err)
    return output, fmt.Errorf("%s", output.Error)
    }
    }
    return output, nil
    }
    // validatePropagateInput checks required fields and file existence
    func validatePropagateInput(output *CallsPropagateOutput, input CallsPropagateInput) error {
    checks := []struct {
    val string
    msg string
    }{
    {input.File, "--file is required"},
    {input.FromFilter, "--from is required"},
    {input.ToFilter, "--to is required"},
    {input.Species, "--species is required"},
    }
    for _, c := range checks {
    if c.val == "" {
    output.Error = c.msg
    return fmt.Errorf("%s", c.msg)
    }
    }
    if input.FromFilter == input.ToFilter {
    output.Error = "--from and --to must differ"
    return fmt.Errorf("%s", output.Error)
    }
    if _, err := os.Stat(input.File); os.IsNotExist(err) {
    output.Error = fmt.Sprintf("file not found: %s", input.File)
    return fmt.Errorf("%s", output.Error)
    }
    // hasBothFilters checks whether the data file contains both from and to filters
    func hasBothFilters(df *utils.DataFile, fromFilter, toFilter string) bool {
    hasFrom, hasTo := false, false
    for _, seg := range df.Segments {
    for _, lbl := range seg.Labels {
    if lbl.Filter == fromFilter {
    hasFrom = true
    }
    if lbl.Filter == toFilter {
    hasTo = true
    }
    if hasFrom && hasTo {
    return true
    }
    }
    }
    // sourceRef pairs a segment with its matching source label
    type sourceRef struct {
    seg *utils.Segment
    label *utils.Label
    }
    // collectPropagateSources gathers verified source labels (certainty==100) for the given filter/species
    func collectPropagateSources(df *utils.DataFile, fromFilter, species string) []sourceRef {
    var sources []sourceRef
    for _, seg := range df.Segments {
    for _, lbl := range seg.Labels {
    if lbl.Filter == fromFilter && lbl.Species == species && lbl.Certainty == 100 {
    sources = append(sources, sourceRef{seg: seg, label: lbl})
    break
    }
    }
    }
    // propagateTargets iterates target segments, finds overlapping sources, and applies agreed classifications
    func propagateTargets(df *utils.DataFile, sources []sourceRef, input CallsPropagateInput, output *CallsPropagateOutput) {
    for _, tSeg := range df.Segments {
    toLabel := findUpdatableTargetLabel(tSeg.Labels, input.ToFilter)
    if toLabel == nil {
    continue
    }
    output.TargetsExamined++
    overlaps := findOverlappingSources(sources, tSeg)
    if len(overlaps) == 0 {
    output.SkippedNoOverlap++
    continue
    }
    agreedCallType, conflict := resolveCallType(overlaps)
    if conflict {
    output.SkippedConflict++
    output.Conflicts = append(output.Conflicts, buildConflictRecord(tSeg, toLabel, overlaps))
    continue
    }
    applyPropagation(toLabel, input.Species, agreedCallType, tSeg, output)
    }
    }
    // findUpdatableTargetLabel finds a target label with certainty 70 or 0 for the given filter
    func findUpdatableTargetLabel(labels []*utils.Label, toFilter string) *utils.Label {
    for _, lbl := range labels {
    if lbl.Filter == toFilter && (lbl.Certainty == 70 || lbl.Certainty == 0) {
    return lbl
    }
    }
    return nil
    }
    // findOverlappingSources returns sources whose segments overlap with the target segment
    func findOverlappingSources(sources []sourceRef, tSeg *utils.Segment) []sourceRef {
    var overlaps []sourceRef
    for _, s := range sources {
    if s.seg.StartTime < tSeg.EndTime && tSeg.StartTime < s.seg.EndTime {
    overlaps = append(overlaps, s)
    }
    // resolveCallType checks if all overlapping sources agree on a call type.
    // Returns the agreed call type and whether there is a conflict.
    func resolveCallType(overlaps []sourceRef) (string, bool) {
    agreedCallType := overlaps[0].label.CallType
    for _, s := range overlaps[1:] {
    if s.label.CallType != agreedCallType {
    return "", true
    }
    }
    return agreedCallType, false
    }
    // buildConflictRecord creates a PropagateConflict from overlapping disagreeing sources
    func buildConflictRecord(tSeg *utils.Segment, toLabel *utils.Label, overlaps []sourceRef) PropagateConflict {
    choices := make([]PropagateSourceChoice, 0, len(overlaps))
    for _, s := range overlaps {
    choices = append(choices, PropagateSourceChoice{
    Start: s.seg.StartTime,
    End: s.seg.EndTime,
    Species: s.label.Species,
    CallType: s.label.CallType,
    })
    }
    return PropagateConflict{
    TargetStart: tSeg.StartTime,
    TargetEnd: tSeg.EndTime,
    TargetCallType: toLabel.CallType,
    SourceChoices: choices,
    }
    // applyPropagation updates the target label and records the change
    func applyPropagation(toLabel *utils.Label, species, callType string, tSeg *utils.Segment, output *CallsPropagateOutput) {
    change := PropagateChange{
    TargetStart: tSeg.StartTime,
    TargetEnd: tSeg.EndTime,
    PrevSpecies: toLabel.Species,
    PrevCallType: toLabel.CallType,
    PrevCertainty: toLabel.Certainty,
    NewSpecies: species,
    NewCallType: callType,
    NewCertainty: 90,
    }
    output.Propagated++
    output.Changes = append(output.Changes, change)
    }
    // CallsPropagateFolder runs CallsPropagate against every .data file in a folder,
    // aggregating counts. Files that do not contain both --from and --to filters are
    // skipped silently (counted as files_skipped_no_filter). Parse/write errors on
    // individual files are collected in Errors; they don't abort the run.
    func CallsPropagateFolder(input CallsPropagateFolderInput) (CallsPropagateFolderOutput, error) {
    output := CallsPropagateFolderOutput{
    Folder: input.Folder,
    FromFilter: input.FromFilter,
    ToFilter: input.ToFilter,
    Species: input.Species,
    }
    if input.Folder == "" {
    output.Error = "--folder is required"
    return output, fmt.Errorf("%s", output.Error)
    }
    if input.FromFilter == "" {
    output.Error = "--from is required"
    return output, fmt.Errorf("%s", output.Error)
    }
    if input.ToFilter == "" {
    output.Error = "--to is required"
    return output, fmt.Errorf("%s", output.Error)
    }
    if input.Species == "" {
    output.Error = "--species is required"
    return output, fmt.Errorf("%s", output.Error)
    }
    if input.FromFilter == input.ToFilter {
    output.Error = "--from and --to must differ"
    return output, fmt.Errorf("%s", output.Error)
    }
    info, err := os.Stat(input.Folder)
    if err != nil {
    output.Error = fmt.Sprintf("folder not found: %s", input.Folder)
    return output, fmt.Errorf("%s", output.Error)
    }
    if !info.IsDir() {
    output.Error = fmt.Sprintf("not a directory: %s", input.Folder)
    return output, fmt.Errorf("%s", output.Error)
    }
    files, err := utils.FindDataFiles(input.Folder)
    if err != nil {
    output.Error = fmt.Sprintf("list .data files: %v", err)
    return output, fmt.Errorf("%s", output.Error)
    }
    output.FilesTotal = len(files)
    for _, f := range files {
    fileOut, err := CallsPropagate(CallsPropagateInput{
    File: f,
    FromFilter: input.FromFilter,
    ToFilter: input.ToFilter,
    Species: input.Species,
    })
    if err != nil {
    output.FilesErrored++
    output.Errors = append(output.Errors, fileOut)
    continue
    }
    if fileOut.FiltersMissing {
    output.FilesSkippedNoFilter++
    continue
    }
    output.FilesWithBothFilters++
    output.TargetsExamined += fileOut.TargetsExamined
    output.Propagated += fileOut.Propagated
    output.SkippedNoOverlap += fileOut.SkippedNoOverlap
    output.SkippedConflict += fileOut.SkippedConflict
    if fileOut.Propagated > 0 {
    output.FilesChanged++
    }
    for _, c := range fileOut.Conflicts {
    c.File = f
    output.Conflicts = append(output.Conflicts, c)
    }
    }
    return output, nil
    }
    toLabel.Species = species
    toLabel.CallType = callType
    toLabel.Certainty = 90
    }
    }
    return overlaps
    }
    return sources
    }
    return false
    }
    return nil
    }
  • file deletion: calls_modify_test.go (----------)
    [6.248737][6.450654:450698](),[6.450698][6.441123:441123]()
    package tools
    import (
    "path/filepath"
    "testing"
    "skraak/utils"
    )
    func TestCallsModifyBookmark(t *testing.T) {
    // Create a temp .data file with a bookmarked segment
    tmpDir := t.TempDir()
    tmpFile := filepath.Join(tmpDir, "test.data")
    df := &utils.DataFile{
    Meta: &utils.DataMeta{Operator: "test", Duration: 60},
    Segments: []*utils.Segment{
    {
    StartTime: 10.0,
    EndTime: 15.0,
    FreqLow: 100,
    FreqHigh: 5000,
    Labels: []*utils.Label{
    {Species: "Kiwi", Certainty: 80, Filter: "myfilter", CallType: "Duet", Bookmark: true},
    },
    },
    },
    }
    if err := df.Write(tmpFile); err != nil {
    t.Fatalf("failed to write test file: %v", err)
    }
    // Test 1: Adding bookmark when already true should do nothing
    bookmark := true
    result, err := CallsModify(CallsModifyInput{
    File: tmpFile,
    Reviewer: "tester",
    Filter: "myfilter",
    Segment: "10-15",
    Certainty: 80,
    Bookmark: &bookmark,
    })
    // Should return error "no changes needed"
    if err == nil {
    t.Errorf("expected error 'no changes needed' when bookmark already true, got nil")
    }
    if result.Error != "No changes needed: all values already match" {
    t.Errorf("expected 'no changes needed' error, got: %s", result.Error)
    }
    // Verify bookmark is still true in the file
    df2, err := utils.ParseDataFile(tmpFile)
    if err != nil {
    t.Fatalf("failed to parse file: %v", err)
    }
    if !df2.Segments[0].Labels[0].Bookmark {
    t.Errorf("bookmark should still be true, got false")
    }
    }
    func TestCallsModifyBookmarkFalse(t *testing.T) {
    // Create a temp .data file WITHOUT a bookmark
    tmpDir := t.TempDir()
    tmpFile := filepath.Join(tmpDir, "test.data")
    df := &utils.DataFile{
    Meta: &utils.DataMeta{Operator: "test", Duration: 60},
    Segments: []*utils.Segment{
    {
    StartTime: 10.0,
    EndTime: 15.0,
    FreqLow: 100,
    FreqHigh: 5000,
    Labels: []*utils.Label{
    {Species: "Kiwi", Certainty: 80, Filter: "myfilter", CallType: "Duet", Bookmark: false},
    },
    },
    },
    }
    if err := df.Write(tmpFile); err != nil {
    t.Fatalf("failed to write test file: %v", err)
    }
    // Test: Adding bookmark when false should set it to true
    bookmark := true
    result, err := CallsModify(CallsModifyInput{
    File: tmpFile,
    Reviewer: "tester",
    Filter: "myfilter",
    Segment: "10-15",
    Certainty: 80,
    Bookmark: &bookmark,
    })
    if err != nil {
    t.Errorf("unexpected error: %v", err)
    }
    if result.Bookmark == nil || !*result.Bookmark {
    t.Errorf("expected bookmark=true in result, got %v", result.Bookmark)
    }
    // Verify bookmark is true in the file
    df2, err := utils.ParseDataFile(tmpFile)
    if err != nil {
    t.Fatalf("failed to parse file: %v", err)
    }
    if !df2.Segments[0].Labels[0].Bookmark {
    t.Errorf("bookmark should be true, got false")
    }
    }
    func TestCallsModifyCommentAdditive(t *testing.T) {
    // Create a temp .data file with an existing comment
    tmpDir := t.TempDir()
    tmpFile := filepath.Join(tmpDir, "test.data")
    df := &utils.DataFile{
    Meta: &utils.DataMeta{Operator: "test", Duration: 60},
    Segments: []*utils.Segment{
    {
    StartTime: 10.0,
    EndTime: 15.0,
    FreqLow: 100,
    FreqHigh: 5000,
    Labels: []*utils.Label{
    {Species: "Kiwi", Certainty: 80, Filter: "myfilter", Comment: "First observation"},
    },
    },
    },
    }
    if err := df.Write(tmpFile); err != nil {
    t.Fatalf("failed to write test file: %v", err)
    }
    // Test: Adding comment should be additive
    result, err := CallsModify(CallsModifyInput{
    File: tmpFile,
    Reviewer: "tester",
    Filter: "myfilter",
    Segment: "10-15",
    Certainty: 80,
    Comment: "Good example",
    })
    if err != nil {
    t.Errorf("unexpected error: %v", err)
    }
    expectedComment := "First observation | Good example"
    if result.Comment != expectedComment {
    t.Errorf("expected comment=%q, got %q", expectedComment, result.Comment)
    }
    // Verify comment in file
    df2, err := utils.ParseDataFile(tmpFile)
    if err != nil {
    t.Fatalf("failed to parse file: %v", err)
    }
    if df2.Segments[0].Labels[0].Comment != expectedComment {
    t.Errorf("expected comment in file=%q, got %q", expectedComment, df2.Segments[0].Labels[0].Comment)
    }
    }
    func TestCallsModifyCommentAdditiveMultiple(t *testing.T) {
    // Create a temp .data file and add multiple comments
    tmpDir := t.TempDir()
    tmpFile := filepath.Join(tmpDir, "test.data")
    df := &utils.DataFile{
    Meta: &utils.DataMeta{Operator: "test", Duration: 60},
    Segments: []*utils.Segment{
    {
    StartTime: 10.0,
    EndTime: 15.0,
    FreqLow: 100,
    FreqHigh: 5000,
    Labels: []*utils.Label{
    {Species: "Kiwi", Certainty: 80, Filter: "myfilter"},
    },
    },
    },
    }
    if err := df.Write(tmpFile); err != nil {
    t.Fatalf("failed to write test file: %v", err)
    }
    // Add first comment
    _, err := CallsModify(CallsModifyInput{
    File: tmpFile,
    Reviewer: "tester",
    Filter: "myfilter",
    Segment: "10-15",
    Certainty: 80,
    Comment: "First",
    })
    if err != nil {
    t.Fatalf("unexpected error on first comment: %v", err)
    }
    // Add second comment
    _, err = CallsModify(CallsModifyInput{
    File: tmpFile,
    Reviewer: "tester",
    Filter: "myfilter",
    Segment: "10-15",
    Certainty: 80,
    Comment: "Second",
    })
    if err != nil {
    t.Fatalf("unexpected error on second comment: %v", err)
    }
    // Add third comment
    result, err := CallsModify(CallsModifyInput{
    File: tmpFile,
    Reviewer: "tester",
    Filter: "myfilter",
    Segment: "10-15",
    Certainty: 80,
    Comment: "Third",
    })
    if err != nil {
    t.Fatalf("unexpected error on third comment: %v", err)
    }
    expectedComment := "First | Second | Third"
    if result.Comment != expectedComment {
    t.Errorf("expected comment=%q, got %q", expectedComment, result.Comment)
    }
    }
    func TestCallsModifyCommentTooLong(t *testing.T) {
    // Create a temp .data file with an existing long comment
    tmpDir := t.TempDir()
    tmpFile := filepath.Join(tmpDir, "test.data")
    existingComment := "This is a fairly long existing comment that takes up space"
    df := &utils.DataFile{
    Meta: &utils.DataMeta{Operator: "test", Duration: 60},
    Segments: []*utils.Segment{
    {
    StartTime: 10.0,
    EndTime: 15.0,
    FreqLow: 100,
    FreqHigh: 5000,
    Labels: []*utils.Label{
    {Species: "Kiwi", Certainty: 80, Filter: "myfilter", Comment: existingComment},
    },
    },
    },
    }
    if err := df.Write(tmpFile); err != nil {
    t.Fatalf("failed to write test file: %v", err)
    }
    // Test: Adding a long comment that would exceed 140 chars should fail
    longNewComment := "This is another very long comment that when combined with the existing one will exceed the limit"
    result, err := CallsModify(CallsModifyInput{
    File: tmpFile,
    Reviewer: "tester",
    Filter: "myfilter",
    Segment: "10-15",
    Certainty: 80,
    Comment: longNewComment,
    })
    if err == nil {
    t.Errorf("expected error for combined comment exceeding 140 chars, got nil")
    }
    if result.Error == "" {
    t.Errorf("expected error message, got empty")
    }
    // Verify original comment is preserved
    df2, err := utils.ParseDataFile(tmpFile)
    if err != nil {
    t.Fatalf("failed to parse file: %v", err)
    }
    if df2.Segments[0].Labels[0].Comment != existingComment {
    t.Errorf("original comment should be preserved, got %q", df2.Segments[0].Labels[0].Comment)
    }
    }
    func TestCallsModifyPreservesBookmarkOnOtherChange(t *testing.T) {
    // Create a temp .data file with a bookmark
    tmpDir := t.TempDir()
    tmpFile := filepath.Join(tmpDir, "test.data")
    df := &utils.DataFile{
    Meta: &utils.DataMeta{Operator: "test", Duration: 60},
    Segments: []*utils.Segment{
    {
    StartTime: 10.0,
    EndTime: 15.0,
    FreqLow: 100,
    FreqHigh: 5000,
    Labels: []*utils.Label{
    {Species: "Kiwi", Certainty: 80, Filter: "myfilter", Bookmark: true},
    },
    },
    },
    }
    if err := df.Write(tmpFile); err != nil {
    t.Fatalf("failed to write test file: %v", err)
    }
    // Change certainty (without passing --bookmark) - bookmark should be preserved
    result, err := CallsModify(CallsModifyInput{
    File: tmpFile,
    Reviewer: "tester",
    Filter: "myfilter",
    Segment: "10-15",
    Certainty: 100,
    // No Bookmark set
    })
    if err != nil {
    t.Errorf("unexpected error: %v", err)
    }
    if result.Bookmark != nil {
    t.Errorf("bookmark should not be in output when not changed, got %v", result.Bookmark)
    }
    // Verify bookmark is still true in the file
    df2, err := utils.ParseDataFile(tmpFile)
    if err != nil {
    t.Fatalf("failed to parse file: %v", err)
    }
    if !df2.Segments[0].Labels[0].Bookmark {
    t.Errorf("bookmark should still be true after changing certainty, got false")
    }
    }
    func TestCallsModifyInvalidSegment(t *testing.T) {
    tmpDir := t.TempDir()
    tmpFile := filepath.Join(tmpDir, "test.data")
    df := &utils.DataFile{
    Meta: &utils.DataMeta{Operator: "test", Duration: 60},
    Segments: []*utils.Segment{
    {
    StartTime: 10.0,
    EndTime: 15.0,
    FreqLow: 100,
    FreqHigh: 5000,
    Labels: []*utils.Label{
    {Species: "Kiwi", Certainty: 80, Filter: "myfilter"},
    },
    },
    },
    }
    if err := df.Write(tmpFile); err != nil {
    t.Fatalf("failed to write test file: %v", err)
    }
    // Test: Non-existent segment should error
    result, err := CallsModify(CallsModifyInput{
    File: tmpFile,
    Reviewer: "tester",
    Filter: "myfilter",
    Segment: "99-100",
    Certainty: 80,
    })
    if err == nil {
    t.Errorf("expected error for non-existent segment, got nil")
    }
    if result.Error == "" {
    t.Errorf("expected error message, got empty")
    }
    }
  • file deletion: calls_modify.go (----------)
    [6.248737][6.458192:458231](),[6.458231][6.450700:450700]()
    package tools
    import (
    "fmt"
    "math"
    "os"
    "strings"
    "skraak/utils"
    )
    // CallsModifyInput defines the input for the modify tool
    type CallsModifyInput struct {
    File string `json:"file"`
    Reviewer string `json:"reviewer"`
    Filter string `json:"filter"`
    Segment string `json:"segment"`
    Certainty int `json:"certainty"`
    Species string `json:"species"`
    Bookmark *bool `json:"bookmark"`
    Comment string `json:"comment"`
    }
    // CallsModifyOutput defines the output for the modify tool
    type CallsModifyOutput struct {
    File string `json:"file"`
    SegmentStart int `json:"segment_start"`
    SegmentEnd int `json:"segment_end"`
    Species string `json:"species,omitempty"`
    CallType string `json:"calltype,omitempty"`
    Certainty int `json:"certainty,omitempty"`
    Bookmark *bool `json:"bookmark,omitempty"`
    Comment string `json:"comment,omitempty"`
    PreviousValue string `json:"previous_value,omitempty"`
    Error string `json:"error,omitempty"`
    }
    // validateModifyInput checks required fields and comment constraints.
    func validateModifyInput(input CallsModifyInput) error {
    if input.File == "" {
    return fmt.Errorf("--file is required")
    }
    if input.Reviewer == "" {
    return fmt.Errorf("--reviewer is required")
    }
    if input.Filter == "" {
    return fmt.Errorf("--filter is required")
    }
    if input.Segment == "" {
    return fmt.Errorf("--segment is required")
    }
    if len(input.Comment) > 140 {
    return fmt.Errorf("--comment must be 140 characters or less")
    }
    for i, r := range input.Comment {
    if r > 127 {
    return fmt.Errorf("--comment must be ASCII only (non-ASCII at position %d)", i)
    }
    }
    return nil
    }
    // resolveSpecies parses species+calltype from the input species string.
    // If input species is empty, keeps the existing label values.
    func resolveSpecies(inputSpecies string, label *utils.Label) (species, callType string) {
    if inputSpecies == "" {
    return label.Species, label.CallType
    }
    if before, after, ok := strings.Cut(inputSpecies, "+"); ok {
    return before, after
    }
    return inputSpecies, ""
    }
    // hasModifyChanges checks whether any field would actually change.
    func hasModifyChanges(newSpecies, newCallType string, input CallsModifyInput, label *utils.Label) bool {
    if newSpecies != label.Species || newCallType != label.CallType {
    return true
    }
    if input.Certainty != label.Certainty {
    return true
    }
    if input.Bookmark != nil && *input.Bookmark != label.Bookmark {
    return true
    }
    if input.Comment != "" {
    return true
    }
    return false
    }
    // applyLabelChanges updates the label and data file, populating the output.
    func applyLabelChanges(label *utils.Label, dataFile *utils.DataFile, input CallsModifyInput, newSpecies, newCallType string, output *CallsModifyOutput) error {
    dataFile.Meta.Reviewer = input.Reviewer
    label.Species = newSpecies
    label.CallType = newCallType
    output.Species = newSpecies
    output.CallType = newCallType
    label.Certainty = input.Certainty
    output.Certainty = input.Certainty
    if input.Bookmark != nil && *input.Bookmark != label.Bookmark {
    label.Bookmark = *input.Bookmark
    output.Bookmark = input.Bookmark
    }
    if input.Comment != "" {
    var newComment string
    if label.Comment != "" {
    newComment = label.Comment + " | " + input.Comment
    } else {
    newComment = input.Comment
    }
    if len(newComment) > 140 {
    return fmt.Errorf("combined comment exceeds 140 characters (%d)", len(newComment))
    }
    }
    output.File = input.File
    output.SegmentStart = startTime
    output.SegmentEnd = endTime
    if _, err := os.Stat(input.File); os.IsNotExist(err) {
    output.Error = fmt.Sprintf("File not found: %s", input.File)
    return output, fmt.Errorf("%s", output.Error)
    }
    dataFile, err := utils.ParseDataFile(input.File)
    if err != nil {
    output.Error = fmt.Sprintf("Failed to parse file: %v", err)
    return output, fmt.Errorf("%s", output.Error)
    }
    segment := findSegment(dataFile.Segments, startTime, endTime, input.Filter)
    if segment == nil {
    output.Error = fmt.Sprintf("No segment found matching time range %d-%d", startTime, endTime)
    return output, fmt.Errorf("%s", output.Error)
    }
    if targetLabel == nil {
    output.Error = fmt.Sprintf("No label found with filter '%s' in segment %d-%d", input.Filter, startTime, endTime)
    return output, fmt.Errorf("%s", output.Error)
    }
    output.PreviousValue = formatLabel(targetLabel)
    newSpecies, newCallType := resolveSpecies(input.Species, targetLabel)
    if !hasModifyChanges(newSpecies, newCallType, input, targetLabel) {
    output.Error = "No changes needed: all values already match"
    return output, fmt.Errorf("%s", output.Error)
    }
    if err := applyLabelChanges(targetLabel, dataFile, input, newSpecies, newCallType, &output); err != nil {
    output.Error = err.Error()
    return output, err
    }
    if err := dataFile.Write(input.File); err != nil {
    output.Error = fmt.Sprintf("Failed to save file: %v", err)
    return output, fmt.Errorf("%s", output.Error)
    }
    return output, nil
    }
    // parseSegmentRange parses "12-15" format into start and end integers
    func parseSegmentRange(s string) (int, int, error) {
    parts := strings.Split(s, "-")
    if len(parts) != 2 {
    return 0, 0, fmt.Errorf("invalid segment format: %s (expected start-end, e.g., 12-15)", s)
    }
    var start, end int
    if _, err := fmt.Sscanf(parts[0], "%d", &start); err != nil {
    return 0, 0, fmt.Errorf("invalid start time: %s", parts[0])
    }
    if _, err := fmt.Sscanf(parts[1], "%d", &end); err != nil {
    return 0, 0, fmt.Errorf("invalid end time: %s", parts[1])
    }
    if start < 0 || end < 0 {
    return 0, 0, fmt.Errorf("times must be non-negative")
    }
    if start >= end {
    return 0, 0, fmt.Errorf("start time must be less than end time")
    }
    return start, end, nil
    }
    // findSegment finds a segment matching the time range using floor/ceil matching.
    // It also checks that the segment contains a label with the specified filter,
    // so that duplicate segments (same time range, different filters) are resolved correctly.
    func findSegment(segments []*utils.Segment, startTime, endTime int, filter string) *utils.Segment {
    for _, seg := range segments {
    segStart := int(math.Floor(seg.StartTime))
    segEnd := int(math.Ceil(seg.EndTime))
    if segEnd == segStart {
    segEnd = segStart + 1 // minimum 1 second
    }
    if segStart == startTime && segEnd == endTime {
    for _, label := range seg.Labels {
    if label.Filter == filter {
    return seg
    }
    }
    }
    }
    return nil
    }
    // formatLabel formats a label for display
    func formatLabel(label *utils.Label) string {
    result := label.Species
    if label.CallType != "" {
    result += "+" + label.CallType
    }
    result += fmt.Sprintf(" (%d%%)", label.Certainty)
    return result
    }
    }
    // findLabelByFilter finds the first label matching the given filter in a segment.
    func findLabelByFilter(segment *utils.Segment, filter string) *utils.Label {
    for _, label := range segment.Labels {
    if label.Filter == filter {
    return label
    }
    }
    return nil
    targetLabel := findLabelByFilter(segment, input.Filter)
    startTime, endTime, err := parseSegmentRange(input.Segment)
    if err != nil {
    output.Error = err.Error()
    return output, err
    }
    label.Comment = newComment
    output.Comment = newComment
    }
    return nil
    }
    // CallsModify modifies a label in a .data file
    func CallsModify(input CallsModifyInput) (CallsModifyOutput, error) {
    var output CallsModifyOutput
    if err := validateModifyInput(input); err != nil {
    output.Error = err.Error()
    return output, err
  • file deletion: calls_from_raven.go (----------)
    [6.248737][6.471700:471743](),[6.471743][6.458233:458233]()
    package tools
    import (
    "bufio"
    "fmt"
    "os"
    "path/filepath"
    "strconv"
    "strings"
    "skraak/utils"
    )
    // CallsFromRavenInput defines the input for the calls-from-raven tool
    type CallsFromRavenInput struct {
    Folder string `json:"folder"`
    File string `json:"file"`
    Delete bool `json:"delete"`
    ProgressHandler ProgressHandler `json:"-"` // Optional progress callback
    }
    // CallsFromRavenOutput defines the output for the calls-from-raven tool
    type CallsFromRavenOutput struct {
    Calls []ClusteredCall `json:"calls"`
    TotalCalls int `json:"total_calls"`
    SpeciesCount map[string]int `json:"species_count"`
    DataFilesWritten int `json:"data_files_written"`
    DataFilesSkipped int `json:"data_files_skipped"`
    FilesProcessed int `json:"files_processed"`
    FilesDeleted int `json:"files_deleted"`
    Filter string `json:"filter"`
    Error *string `json:"error,omitempty"`
    }
    // ravenSource implements CallSource for Raven selection files
    type ravenSource struct{}
    func (ravenSource) Name() string { return "Raven" }
    func (ravenSource) FindFiles(folder string) ([]string, error) {
    var files []string
    entries, err := os.ReadDir(folder)
    if err != nil {
    return nil, err
    }
    for _, entry := range entries {
    name := entry.Name()
    if strings.HasSuffix(name, ".selections.txt") {
    files = append(files, filepath.Join(folder, name))
    }
    }
    return files, nil
    }
    func (ravenSource) ProcessFile(ravenFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
    return processRavenFileCached(ravenFile, cache)
    }
    // CallsFromRaven processes Raven selection files and writes .data files
    func CallsFromRaven(input CallsFromRavenInput) (CallsFromRavenOutput, error) {
    src := ravenSource{}
    commonInput := CallsFromSourceInput(input)
    commonOutput, err := callsFromSource(src, commonInput)
    // Convert to Raven-specific output type
    var output CallsFromRavenOutput
    output.Calls = commonOutput.Calls
    output.TotalCalls = commonOutput.TotalCalls
    output.SpeciesCount = commonOutput.SpeciesCount
    output.DataFilesWritten = commonOutput.DataFilesWritten
    output.DataFilesSkipped = commonOutput.DataFilesSkipped
    output.FilesProcessed = commonOutput.FilesProcessed
    output.FilesDeleted = commonOutput.FilesDeleted
    output.Filter = commonOutput.Filter
    output.Error = commonOutput.Error
    return output, err
    }
    // RavenSelection represents a single Raven selection
    type RavenSelection struct {
    StartTime float64
    EndTime float64
    FreqLow float64
    FreqHigh float64
    Species string
    }
    // ravenColumnIndices holds the column index positions for a Raven file
    type ravenColumnIndices struct {
    beginTimeIdx int
    endTimeIdx int
    lowFreqIdx int
    highFreqIdx int
    speciesIdx int
    }
    for i, col := range header {
    switch col {
    case "Begin Time (s)":
    idx.beginTimeIdx = i
    case "End Time (s)":
    idx.endTimeIdx = i
    case "Low Freq (Hz)":
    idx.lowFreqIdx = i
    case "High Freq (Hz)":
    idx.highFreqIdx = i
    case "Species":
    idx.speciesIdx = i
    }
    }
    if idx.beginTimeIdx == -1 || idx.endTimeIdx == -1 || idx.speciesIdx == -1 {
    return idx, fmt.Errorf("missing required columns in Raven file")
    }
    // parseRavenSelections reads all selection rows from a scanner and returns parsed selections
    func parseRavenSelections(scanner *bufio.Scanner, idx ravenColumnIndices) ([]RavenSelection, error) {
    var selections []RavenSelection
    for scanner.Scan() {
    line := scanner.Text()
    if line == "" {
    continue
    }
    fields := strings.Split(line, "\t")
    if len(fields) <= idx.speciesIdx {
    continue
    }
    sel, err := parseRavenRow(fields, idx)
    if err != nil {
    return nil, err
    }
    selections = append(selections, sel)
    }
    if err := scanner.Err(); err != nil {
    return nil, fmt.Errorf("error reading file: %w", err)
    }
    // parseRavenRow parses a single tab-separated row into a RavenSelection
    func parseRavenRow(fields []string, idx ravenColumnIndices) (RavenSelection, error) {
    var sel RavenSelection
    startTime, err := strconv.ParseFloat(fields[idx.beginTimeIdx], 64)
    if err != nil {
    return sel, fmt.Errorf("failed to parse begin time %q: %w", fields[idx.beginTimeIdx], err)
    }
    sel.StartTime = startTime
    endTime, err := strconv.ParseFloat(fields[idx.endTimeIdx], 64)
    if err != nil {
    return sel, fmt.Errorf("failed to parse end time %q: %w", fields[idx.endTimeIdx], err)
    }
    sel.EndTime = endTime
    if idx.lowFreqIdx >= 0 && idx.lowFreqIdx < len(fields) {
    freqLow, err := strconv.ParseFloat(fields[idx.lowFreqIdx], 64)
    if err != nil {
    return sel, fmt.Errorf("failed to parse low freq %q: %w", fields[idx.lowFreqIdx], err)
    }
    sel.FreqLow = freqLow
    }
    if idx.highFreqIdx >= 0 && idx.highFreqIdx < len(fields) {
    freqHigh, err := strconv.ParseFloat(fields[idx.highFreqIdx], 64)
    if err != nil {
    return sel, fmt.Errorf("failed to parse high freq %q: %w", fields[idx.highFreqIdx], err)
    }
    sel.FreqHigh = freqHigh
    }
    // deriveWAVBaseName extracts the base WAV filename from a Raven .selections.txt filename
    func deriveWAVBaseName(ravenFile string) string {
    base := filepath.Base(ravenFile)
    nameWithoutSuffix := strings.TrimSuffix(base, ".selections.txt")
    idx := strings.Index(nameWithoutSuffix, ".Table.")
    if idx > 0 {
    nameWithoutSuffix = nameWithoutSuffix[:idx]
    }
    if !scanner.Scan() {
    return nil, false, false, fmt.Errorf("empty file")
    }
    header := strings.Split(scanner.Text(), "\t")
    idx, err := parseRavenHeader(header)
    if err != nil {
    return nil, false, false, err
    }
    selections, err := parseRavenSelections(scanner, idx)
    if err != nil {
    return nil, false, false, err
    }
    if len(selections) == 0 {
    return nil, false, true, nil
    }
    if wavPath == "" {
    return nil, false, true, nil
    }
    sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
    if err != nil {
    return nil, false, true, nil
    }
    dataPath := wavPath + ".data"
    segments := buildRavenSegments(selections, sampleRate)
    meta := AviaNZMeta{Operator: "Raven", Duration: duration}
    reviewer := "None"
    meta.Reviewer = &reviewer
    if err := writeDotDataFileSafe(dataPath, segments, "Raven", meta); err != nil {
    return nil, false, false, err
    }
    var calls []ClusteredCall
    for _, sel := range selections {
    calls = append(calls, ClusteredCall{
    File: wavPath,
    StartTime: sel.StartTime,
    EndTime: sel.EndTime,
    EbirdCode: sel.Species,
    Segments: 1,
    })
    }
    return calls, true, false, nil
    }
    // buildRavenSegments converts Raven selections to AviaNZ segments
    func buildRavenSegments(selections []RavenSelection, sampleRate int) []AviaNZSegment {
    var segments []AviaNZSegment
    for _, sel := range selections {
    labels := []AviaNZLabel{
    {
    Species: sel.Species,
    Certainty: 70, // Default certainty for Raven (no confidence metric)
    Filter: "Raven",
    },
    }
    // Use frequency range from Raven, or full band if not specified
    freqLow := sel.FreqLow
    freqHigh := sel.FreqHigh
    if freqLow == 0 && freqHigh == 0 {
    freqHigh = float64(sampleRate)
    }
    segment := AviaNZSegment{
    sel.StartTime,
    sel.EndTime,
    freqLow,
    freqHigh,
    labels,
    }
    segments = append(segments, segment)
    }
    return segments
    }
    }
    // resolveWAVPath finds the WAV file corresponding to a Raven file
    func resolveWAVPath(ravenFile string, cache *DirCache) string {
    baseName := deriveWAVBaseName(ravenFile)
    if cache != nil {
    return cache.FindWAV(baseName)
    }
    return findWAVFile(filepath.Dir(ravenFile), baseName)
    // Find WAV file
    wavPath := resolveWAVPath(ravenFile, cache)
    defer func() { _ = file.Close() }()
    scanner := bufio.NewScanner(file)
    }
    return nameWithoutSuffix
    }
    // processRavenFileCached processes a single Raven selection file using a DirCache for WAV lookup
    func processRavenFileCached(ravenFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
    file, err := os.Open(ravenFile)
    if err != nil {
    return nil, false, false, fmt.Errorf("failed to open file: %w", err)
    sel.Species = fields[idx.speciesIdx]
    return sel, nil
    }
    return selections, nil
    }
    return idx, nil
    }
    // parseRavenHeader finds column indices from a tab-separated header line
    func parseRavenHeader(header []string) (ravenColumnIndices, error) {
    idx := ravenColumnIndices{beginTimeIdx: -1, endTimeIdx: -1, lowFreqIdx: -1, highFreqIdx: -1, speciesIdx: -1}
  • file deletion: calls_from_preds_test.go (----------)
    [6.248737][6.483453:483501](),[6.483501][6.471745:471745]()
    package tools
    import (
    "os"
    "path/filepath"
    "testing"
    "skraak/utils"
    )
    func TestCallsFromPreds_EmptyFilterError(t *testing.T) {
    // Create a temp CSV file
    tmpDir := t.TempDir()
    csvPath := filepath.Join(tmpDir, "preds.csv")
    csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
    if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
    t.Fatal(err)
    }
    // Create a dummy WAV file (minimal valid WAV)
    wavPath := filepath.Join(tmpDir, "test.wav")
    createMinimalWAV(t, wavPath, 44100, 10.0)
    // Test with empty filter (should error)
    input := CallsFromPredsInput{
    CSVPath: csvPath,
    Filter: "",
    WriteDotData: true,
    ProgressHandler: nil,
    }
    output, err := CallsFromPreds(input)
    // Should return error
    if err == nil {
    t.Error("expected error for empty filter, got nil")
    }
    if output.Error == nil || *output.Error == "" {
    t.Error("expected error message in output, got empty")
    }
    }
    func TestCallsFromPreds_NewDataFile(t *testing.T) {
    // Create a temp CSV file
    tmpDir := t.TempDir()
    csvPath := filepath.Join(tmpDir, "predsST_test-filter_2025-01-01.csv")
    csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
    if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
    t.Fatal(err)
    }
    // Create a dummy WAV file
    wavPath := filepath.Join(tmpDir, "test.wav")
    createMinimalWAV(t, wavPath, 44100, 10.0)
    // Test with filter parsed from filename
    input := CallsFromPredsInput{
    CSVPath: csvPath,
    Filter: "", // Will parse from filename
    WriteDotData: true,
    ProgressHandler: nil,
    }
    output, err := CallsFromPreds(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.DataFilesWritten != 1 {
    t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
    }
    if output.Filter != "test-filter" {
    t.Errorf("expected filter 'test-filter', got '%s'", output.Filter)
    }
    // Verify .data file was created
    dataPath := wavPath + ".data"
    if _, err := os.Stat(dataPath); os.IsNotExist(err) {
    t.Error("expected .data file to be created")
    }
    // Verify content
    df, err := utils.ParseDataFile(dataPath)
    if err != nil {
    t.Fatalf("failed to parse .data file: %v", err)
    }
    if len(df.Segments) != 1 {
    t.Errorf("expected 1 segment, got %d", len(df.Segments))
    }
    if len(df.Segments[0].Labels) != 1 {
    t.Errorf("expected 1 label, got %d", len(df.Segments[0].Labels))
    }
    if df.Segments[0].Labels[0].Filter != "test-filter" {
    t.Errorf("expected filter 'test-filter', got '%s'", df.Segments[0].Labels[0].Filter)
    }
    }
    func TestCallsFromPreds_ExistingDataFileSameFilter(t *testing.T) {
    // Create a temp CSV file
    tmpDir := t.TempDir()
    csvPath := filepath.Join(tmpDir, "predsST_existing-filter_2025-01-01.csv")
    csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
    if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
    t.Fatal(err)
    }
    // Create a dummy WAV file
    wavPath := filepath.Join(tmpDir, "test.wav")
    createMinimalWAV(t, wavPath, 44100, 10.0)
    // Create existing .data file with same filter
    dataPath := wavPath + ".data"
    existingData := `[
    {"Operator": "Manual", "Reviewer": "David", "Duration": 10.0},
    [5.0, 8.0, 0, 44100, [{"species": "morepork", "certainty": 90, "filter": "existing-filter"}]]
    ]`
    if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
    t.Fatal(err)
    }
    // Test with same filter (should error)
    input := CallsFromPredsInput{
    CSVPath: csvPath,
    Filter: "", // Will parse from filename -> "existing-filter"
    WriteDotData: true,
    ProgressHandler: nil,
    }
    output, err := CallsFromPreds(input)
    // Should return error
    if err == nil {
    t.Error("expected error for same filter, got nil")
    }
    if output.Error == nil {
    t.Error("expected error message in output")
    }
    // Verify original .data file is unchanged
    df, err := utils.ParseDataFile(dataPath)
    if err != nil {
    t.Fatalf("failed to parse .data file: %v", err)
    }
    if len(df.Segments) != 1 {
    t.Errorf("expected original 1 segment, got %d", len(df.Segments))
    }
    if df.Segments[0].Labels[0].Species != "morepork" {
    t.Errorf("expected original species 'morepork', got '%s'", df.Segments[0].Labels[0].Species)
    }
    }
    func TestCallsFromPreds_ExistingDataFileDifferentFilter(t *testing.T) {
    // Create a temp CSV file
    tmpDir := t.TempDir()
    csvPath := filepath.Join(tmpDir, "predsST_new-filter_2025-01-01.csv")
    csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
    if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
    t.Fatal(err)
    }
    // Create a dummy WAV file
    wavPath := filepath.Join(tmpDir, "test.wav")
    createMinimalWAV(t, wavPath, 44100, 10.0)
    // Create existing .data file with different filter
    dataPath := wavPath + ".data"
    existingData := `[
    {"Operator": "Manual", "Reviewer": "David", "Duration": 10.0},
    [5.0, 8.0, 0, 44100, [{"species": "morepork", "certainty": 90, "filter": "old-filter"}]]
    ]`
    if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
    t.Fatal(err)
    }
    // Test with different filter (should merge)
    input := CallsFromPredsInput{
    CSVPath: csvPath,
    Filter: "", // Will parse from filename -> "new-filter"
    WriteDotData: true,
    ProgressHandler: nil,
    }
    output, err := CallsFromPreds(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.DataFilesWritten != 1 {
    t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
    }
    // Verify .data file has merged content
    df, err := utils.ParseDataFile(dataPath)
    if err != nil {
    t.Fatalf("failed to parse .data file: %v", err)
    }
    if len(df.Segments) != 2 {
    t.Errorf("expected 2 segments after merge, got %d", len(df.Segments))
    }
    // Check segments are sorted by start time
    if df.Segments[0].StartTime > df.Segments[1].StartTime {
    t.Error("expected segments to be sorted by start time")
    }
    // Check both filters are present
    filters := make(map[string]bool)
    for _, seg := range df.Segments {
    for _, label := range seg.Labels {
    filters[label.Filter] = true
    }
    }
    if !filters["old-filter"] {
    t.Error("expected 'old-filter' to be present")
    }
    if !filters["new-filter"] {
    t.Error("expected 'new-filter' to be present")
    }
    }
    func TestCallsFromPreds_ExistingDataFileParseError(t *testing.T) {
    // Create a temp CSV file
    tmpDir := t.TempDir()
    csvPath := filepath.Join(tmpDir, "predsST_test-filter_2025-01-01.csv")
    csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
    if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
    t.Fatal(err)
    }
    // Create a dummy WAV file
    wavPath := filepath.Join(tmpDir, "test.wav")
    createMinimalWAV(t, wavPath, 44100, 10.0)
    // Create corrupted .data file
    dataPath := wavPath + ".data"
    corruptedData := `this is not valid json`
    if err := os.WriteFile(dataPath, []byte(corruptedData), 0644); err != nil {
    t.Fatal(err)
    }
    // Test (should error due to parse failure)
    input := CallsFromPredsInput{
    CSVPath: csvPath,
    Filter: "",
    WriteDotData: true,
    ProgressHandler: nil,
    }
    output, err := CallsFromPreds(input)
    // Should return error
    if err == nil {
    t.Error("expected error for corrupted .data file, got nil")
    }
    if output.Error == nil {
    t.Error("expected error message in output")
    }
    // Verify original file is unchanged
    content, err := os.ReadFile(dataPath)
    if err != nil {
    t.Fatal(err)
    }
    if string(content) != corruptedData {
    t.Error("expected corrupted file to remain unchanged")
    }
    }
    func TestCallsFromPreds_ExplicitFilter(t *testing.T) {
    // Create a temp CSV file with non-standard name
    tmpDir := t.TempDir()
    csvPath := filepath.Join(tmpDir, "predictions.csv")
    csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
    if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
    t.Fatal(err)
    }
    // Create a dummy WAV file
    wavPath := filepath.Join(tmpDir, "test.wav")
    createMinimalWAV(t, wavPath, 44100, 10.0)
    // Test with explicit filter
    input := CallsFromPredsInput{
    CSVPath: csvPath,
    Filter: "my-custom-filter",
    WriteDotData: true,
    ProgressHandler: nil,
    }
    output, err := CallsFromPreds(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.Filter != "my-custom-filter" {
    t.Errorf("expected filter 'my-custom-filter', got '%s'", output.Filter)
    }
    // Verify .data file uses explicit filter
    dataPath := wavPath + ".data"
    df, err := utils.ParseDataFile(dataPath)
    if err != nil {
    t.Fatalf("failed to parse .data file: %v", err)
    }
    if df.Segments[0].Labels[0].Filter != "my-custom-filter" {
    t.Errorf("expected filter 'my-custom-filter' in .data file, got '%s'", df.Segments[0].Labels[0].Filter)
    }
    }
    func TestCallsFromPreds_NonParsableFilenameNoFilter(t *testing.T) {
    // Create a temp CSV file with non-standard name that can't be parsed
    tmpDir := t.TempDir()
    csvPath := filepath.Join(tmpDir, "random_name.csv")
    csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
    if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
    t.Fatal(err)
    }
    // Create a dummy WAV file
    wavPath := filepath.Join(tmpDir, "test.wav")
    createMinimalWAV(t, wavPath, 44100, 10.0)
    // Test with no filter and non-parsable filename (should error)
    input := CallsFromPredsInput{
    CSVPath: csvPath,
    Filter: "",
    WriteDotData: true,
    ProgressHandler: nil,
    }
    output, err := CallsFromPreds(input)
    // Should return error
    if err == nil {
    t.Error("expected error for unparsable filename with no filter, got nil")
    }
    if output.Error == nil {
    t.Error("expected error message in output")
    }
    }
    // createMinimalWAV creates a minimal valid WAV file for testing
    func createMinimalWAV(t *testing.T, path string, sampleRate int, duration float64) {
    t.Helper()
    numSamples := int(float64(sampleRate) * duration)
    dataSize := numSamples * 2 // 16-bit mono
    // WAV header (44 bytes)
    header := make([]byte, 44)
    // RIFF header
    copy(header[0:4], "RIFF")
    totalSize := uint32(36 + dataSize)
    header[4] = byte(totalSize)
    header[5] = byte(totalSize >> 8)
    header[6] = byte(totalSize >> 16)
    header[7] = byte(totalSize >> 24)
    copy(header[8:12], "WAVE")
    // fmt chunk
    copy(header[12:16], "fmt ")
    chunkSize := uint32(16)
    header[16] = byte(chunkSize)
    header[17] = byte(chunkSize >> 8)
    header[18] = byte(chunkSize >> 16)
    header[19] = byte(chunkSize >> 24)
    audioFormat := uint16(1) // PCM
    header[20] = byte(audioFormat)
    header[21] = byte(audioFormat >> 8)
    numChannels := uint16(1)
    header[22] = byte(numChannels)
    header[23] = byte(numChannels >> 8)
    header[24] = byte(sampleRate)
    header[25] = byte(sampleRate >> 8)
    header[26] = byte(sampleRate >> 16)
    header[27] = byte(sampleRate >> 24)
    byteRate := uint32(sampleRate * 2)
    header[28] = byte(byteRate)
    header[29] = byte(byteRate >> 8)
    header[30] = byte(byteRate >> 16)
    header[31] = byte(byteRate >> 24)
    blockAlign := uint16(2)
    header[32] = byte(blockAlign)
    header[33] = byte(blockAlign >> 8)
    bitsPerSample := uint16(16)
    header[34] = byte(bitsPerSample)
    header[35] = byte(bitsPerSample >> 8)
    // data chunk
    copy(header[36:40], "data")
    header[40] = byte(dataSize)
    header[41] = byte(dataSize >> 8)
    header[42] = byte(dataSize >> 16)
    header[43] = byte(dataSize >> 24)
    // Create file with header and silence
    file, err := os.Create(path)
    if err != nil {
    t.Fatal(err)
    }
    defer file.Close()
    if _, err := file.Write(header); err != nil {
    t.Fatal(err)
    }
    // Write silence (zeros)
    silence := make([]byte, dataSize)
    if _, err := file.Write(silence); err != nil {
    t.Fatal(err)
    }
    }
  • file deletion: calls_from_preds.go (----------)
    [6.248737][6.504729:504772](),[6.504772][6.483503:483503]()
    package tools
    import (
    "encoding/csv"
    "encoding/json"
    "fmt"
    "io"
    "os"
    "path/filepath"
    "sort"
    "strconv"
    "strings"
    "sync"
    "sync/atomic"
    "skraak/utils"
    )
    // Constants for clustering algorithm
    const (
    CLUSTER_GAP_MULTIPLIER = 2 // 3 Gap threshold = CLUSTER_GAP_MULTIPLIER * clip_duration. 3 for kiwi
    MIN_DETECTIONS_PER_CLUSTER = 0 // 1 = filter out single detections (used for kiwi, they have long calls 30s), 0 = let single detections pass through
    DEFAULT_CERTAINTY = 70 // .data certainty:70
    DOT_DATA_WORKERS = 8 // Number of parallel workers for .data file writing
    )
    // ClusteredCall represents a clustered bird call detection
    type ClusteredCall struct {
    File string `json:"file"`
    StartTime float64 `json:"start_time"`
    EndTime float64 `json:"end_time"`
    EbirdCode string `json:"ebird_code"`
    Segments int `json:"segments"`
    }
    // CallsFromPredsInput defines the input for the calls-from-preds tool
    type CallsFromPredsInput struct {
    CSVPath string `json:"csv_path"`
    Filter string `json:"filter"`
    WriteDotData bool `json:"write_dot_data"`
    GapMultiplier int `json:"gap_multiplier"`
    MinDetections int `json:"min_detections"`
    ProgressHandler ProgressHandler `json:"-"` // Optional progress callback (not serialized)
    }
    // ProgressHandler is a callback function for reporting progress during long operations
    // processed: number of items processed so far
    // total: total number of items to process
    // message: optional status message
    type ProgressHandler func(processed, total int, message string)
    // CallsFromPredsOutput defines the output for the calls-from-preds tool
    type CallsFromPredsOutput struct {
    Calls []ClusteredCall `json:"calls"`
    TotalCalls int `json:"total_calls"`
    ClipDuration float64 `json:"clip_duration"`
    GapThreshold float64 `json:"gap_threshold"`
    SpeciesCount map[string]int `json:"species_count"`
    DataFilesWritten int `json:"data_files_written"`
    DataFilesSkipped int `json:"data_files_skipped"`
    Filter string `json:"filter"`
    Error *string `json:"error,omitempty"`
    }
    // AviaNZ .data file types
    // predFileSpeciesKey groups detections by file and ebird code
    type predFileSpeciesKey struct {
    File string
    EbirdCode string
    }
    // CallsFromPreds reads a predictions CSV and clusters detections into continuous bird calls
    func CallsFromPreds(input CallsFromPredsInput) (CallsFromPredsOutput, error) {
    var output CallsFromPredsOutput
    // Determine filter: use provided filter, or parse from CSV filename
    filter := input.Filter
    if filter == "" {
    filter = ParseFilterFromFilename(input.CSVPath)
    }
    if filter == "" {
    errMsg := "Filter must be specified via --filter flag or parsable from CSV filename"
    output.Error = &errMsg
    return output, fmt.Errorf("%s", errMsg)
    }
    output.Filter = filter
    _, detections, clipDuration, err := readPredCSV(input.CSVPath)
    if err != nil {
    errMsg := err.Error()
    output.Error = &errMsg
    return output, err
    }
    output.ClipDuration = clipDuration
    gapMultiplier := CLUSTER_GAP_MULTIPLIER
    if input.GapMultiplier > 0 {
    gapMultiplier = input.GapMultiplier
    }
    minDetections := MIN_DETECTIONS_PER_CLUSTER
    if input.MinDetections >= 0 {
    minDetections = input.MinDetections
    }
    gapThreshold := float64(gapMultiplier) * clipDuration
    output.GapThreshold = gapThreshold
    allCalls, speciesCount := clusterDetections(detections, clipDuration, gapThreshold, minDetections)
    output.Calls = allCalls
    output.TotalCalls = len(allCalls)
    output.SpeciesCount = speciesCount
    if input.WriteDotData {
    dataFilesWritten, dataFilesSkipped, err := writeDotFiles(input.CSVPath, filter, allCalls, input.ProgressHandler)
    if err != nil {
    errMsg := fmt.Sprintf("Error writing .data files: %v", err)
    output.Error = &errMsg
    return output, fmt.Errorf("%s", errMsg)
    }
    output.DataFilesWritten = dataFilesWritten
    output.DataFilesSkipped = dataFilesSkipped
    }
    return output, nil
    }
    // readPredCSV opens and reads a predictions CSV, returning column mappings, detections, and clip duration
    func readPredCSV(csvPath string) (predCSVColumns, map[predFileSpeciesKey][]float64, float64, error) {
    file, err := os.Open(csvPath)
    if err != nil {
    return predCSVColumns{}, nil, 0, fmt.Errorf("failed to open CSV file: %w", err)
    }
    defer func() { _ = file.Close() }()
    reader := csv.NewReader(file)
    reader.ReuseRecord = true
    header, err := reader.Read()
    if err != nil {
    return predCSVColumns{}, nil, 0, fmt.Errorf("failed to read CSV header: %w", err)
    }
    cols, err := findPredCSVColumns(header)
    if err != nil {
    return predCSVColumns{}, nil, 0, err
    }
    detections, clipDuration, err := readPredCSVRows(reader, cols)
    if err != nil {
    return predCSVColumns{}, nil, 0, err
    }
    return cols, detections, clipDuration, nil
    }
    // predCSVColumns holds the column indices for a predictions CSV
    type predCSVColumns struct {
    fileIdx int
    startTimeIdx int
    endTimeIdx int
    ebirdCodes []string
    ebirdIdx []int
    }
    // findPredCSVColumns parses the CSV header to find column indices
    func findPredCSVColumns(header []string) (predCSVColumns, error) {
    cols := predCSVColumns{
    fileIdx: -1,
    startTimeIdx: -1,
    endTimeIdx: -1,
    }
    for i, col := range header {
    switch col {
    case "file":
    cols.fileIdx = i
    case "start_time":
    cols.startTimeIdx = i
    case "end_time":
    cols.endTimeIdx = i
    default:
    if ignoredColumns[col] {
    continue
    }
    cols.ebirdCodes = append(cols.ebirdCodes, col)
    cols.ebirdIdx = append(cols.ebirdIdx, i)
    }
    }
    if cols.fileIdx == -1 || cols.startTimeIdx == -1 || cols.endTimeIdx == -1 {
    return cols, fmt.Errorf("CSV must have 'file', 'start_time', and 'end_time' columns")
    }
    if len(cols.ebirdCodes) == 0 {
    return cols, fmt.Errorf("CSV must have at least one ebird code column")
    }
    // readPredCSVRows reads all CSV data rows and returns detections grouped by file+species, plus clip duration
    func readPredCSVRows(reader *csv.Reader, cols predCSVColumns) (map[predFileSpeciesKey][]float64, float64, error) {
    detections := make(map[predFileSpeciesKey][]float64)
    clipDuration := 0.0
    record, err := reader.Read()
    if err == io.EOF {
    return detections, 0, nil
    }
    if err != nil {
    return nil, 0, fmt.Errorf("failed to read first CSV row: %w", err)
    }
    startTime, _ := strconv.ParseFloat(record[cols.startTimeIdx], 64)
    endTime, _ := strconv.ParseFloat(record[cols.endTimeIdx], 64)
    clipDuration = endTime - startTime
    addDetectionsFromRow(record, cols, startTime, detections)
    for {
    record, err := reader.Read()
    if err == io.EOF {
    break
    }
    if err != nil {
    return nil, 0, fmt.Errorf("failed to read CSV row: %w", err)
    }
    startTime, _ = strconv.ParseFloat(record[cols.startTimeIdx], 64)
    addDetectionsFromRow(record, cols, startTime, detections)
    }
    return detections, clipDuration, nil
    }
    // addDetectionsFromRow adds positive detections from a single CSV row
    func addDetectionsFromRow(record []string, cols predCSVColumns, startTime float64, detections map[predFileSpeciesKey][]float64) {
    fileName := record[cols.fileIdx]
    for i, idx := range cols.ebirdIdx {
    if record[idx] == "1" {
    key := predFileSpeciesKey{File: fileName, EbirdCode: cols.ebirdCodes[i]}
    detections[key] = append(detections[key], startTime)
    }
    }
    }
    // clusterDetections groups detections into clusters and produces sorted ClusteredCalls
    func clusterDetections(detections map[predFileSpeciesKey][]float64, clipDuration, gapThreshold float64, minDetections int) ([]ClusteredCall, map[string]int) {
    var allCalls []ClusteredCall
    speciesCount := make(map[string]int)
    for key, startTimes := range detections {
    sort.Float64s(startTimes)
    clusters := clusterStartTimes(startTimes, gapThreshold)
    for _, cluster := range clusters {
    if len(cluster) <= minDetections {
    continue
    }
    call := ClusteredCall{
    File: key.File,
    StartTime: cluster[0],
    EndTime: cluster[len(cluster)-1] + clipDuration,
    EbirdCode: key.EbirdCode,
    Segments: len(cluster),
    }
    allCalls = append(allCalls, call)
    speciesCount[key.EbirdCode]++
    }
    }
    sort.Slice(allCalls, func(i, j int) bool {
    if allCalls[i].File != allCalls[j].File {
    return allCalls[i].File < allCalls[j].File
    }
    return allCalls[i].StartTime < allCalls[j].StartTime
    })
    return allCalls, speciesCount
    }
    // DirCache caches directory entries for fast WAV file lookup.
    // Scans the directory once and builds a map from lowercased basename to full filename.
    // Safe for concurrent read-only use after construction.
    type DirCache struct {
    dir string
    wavMap map[string]string // lowercase basename -> filename with original case (e.g. "20230610_150000" -> "20230610_150000.WAV")
    dirMap map[string]string // lowercase basename -> filename for any file (used by from-raven for .selections.txt etc.)
    }
    // NewDirCache creates a DirCache by scanning the directory once.
    func NewDirCache(dir string) *DirCache {
    entries, err := os.ReadDir(dir)
    if err != nil {
    return &DirCache{dir: dir, wavMap: make(map[string]string), dirMap: make(map[string]string)}
    }
    wavMap := make(map[string]string, len(entries))
    dirMap := make(map[string]string, len(entries))
    for _, entry := range entries {
    if entry.IsDir() {
    continue
    }
    name := entry.Name()
    ext := filepath.Ext(name)
    base := strings.TrimSuffix(name, ext)
    dirMap[strings.ToLower(base)] = name
    if strings.EqualFold(ext, ".wav") {
    wavMap[strings.ToLower(base)] = name
    }
    }
    return &DirCache{dir: dir, wavMap: wavMap, dirMap: dirMap}
    }
    // FindWAV looks up a WAV file by basename (case-insensitive).
    // Returns the full path with correct case, or empty string if not found.
    func (dc *DirCache) FindWAV(baseName string) string {
    if name, ok := dc.wavMap[strings.ToLower(baseName)]; ok {
    return filepath.Join(dc.dir, name)
    }
    return ""
    }
    // FindFile looks up any file by basename (case-insensitive).
    // Returns the full path with correct case, or empty string if not found.
    func (dc *DirCache) FindFile(baseName string) string {
    if name, ok := dc.dirMap[strings.ToLower(baseName)]; ok {
    return filepath.Join(dc.dir, name)
    }
    return ""
    }
    // findWAVFile finds a WAV file in the directory with case-insensitive matching.
    // baseName is the filename without extension (e.g., "20230610_150000").
    // Returns the full path with correct case, or empty string if not found.
    // Deprecated: Use DirCache.FindWAV for batch operations to avoid repeated directory scans.
    func findWAVFile(dir, baseName string) string {
    entries, err := os.ReadDir(dir)
    if err != nil {
    return ""
    }
    for _, entry := range entries {
    if entry.IsDir() {
    continue
    }
    name := entry.Name()
    ext := filepath.Ext(name)
    nameNoExt := strings.TrimSuffix(name, ext)
    if nameNoExt == baseName && strings.EqualFold(ext, ".wav") {
    return filepath.Join(dir, name)
    }
    }
    return ""
    }
    // writeDotFiles writes AviaNZ .data files for each audio file with calls
    // Uses parallel workers for improved performance on large batches
    func writeDotFiles(csvPath, filter string, calls []ClusteredCall, progress ProgressHandler) (int, int, error) {
    // Base directory is the directory containing the CSV file
    csvDir := filepath.Dir(csvPath)
    // Group calls by file (using extracted filename)
    callsByFile := make(map[string][]ClusteredCall)
    for _, call := range calls {
    filename := filepath.Base(call.File)
    callsByFile[filename] = append(callsByFile[filename], call)
    }
    // Report initial progress
    if progress != nil {
    progress(0, len(callsByFile), "Processing WAV files")
    }
    // If small batch, process sequentially (avoid goroutine overhead)
    if len(callsByFile) < 10 {
    return writeDotFilesSequential(csvDir, filter, callsByFile, progress)
    }
    // Parallel processing for larger batches
    return writeDotFilesParallel(csvDir, filter, callsByFile, progress)
    }
    // dotDataJob represents a single file to process
    type dotDataJob struct {
    filename string
    fileCalls []ClusteredCall
    }
    // dotDataResult represents the result of processing a single file
    type dotDataResult struct {
    filename string
    written bool
    err error
    }
    // writeDotFilesSequential processes files one at a time (for small batches)
    func writeDotFilesSequential(csvDir, filter string, callsByFile map[string][]ClusteredCall, progress ProgressHandler) (int, int, error) {
    dataFilesWritten := 0
    dataFilesSkipped := 0
    total := len(callsByFile)
    processed := 0
    for filename, fileCalls := range callsByFile {
    // Find WAV file with correct case
    baseName := strings.TrimSuffix(filename, filepath.Ext(filename))
    wavPath := findWAVFile(csvDir, baseName)
    if wavPath == "" {
    dataFilesSkipped++
    processed++
    if progress != nil {
    progress(processed, total, "")
    }
    continue
    }
    dataPath := wavPath + ".data"
    sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
    if err != nil {
    dataFilesSkipped++
    processed++
    if progress != nil {
    progress(processed, total, "")
    }
    continue
    }
    // Build segments and metadata
    meta, segments := buildAviaNZMetaAndSegments(fileCalls, filter, duration, sampleRate)
    if err := writeDotDataFileSafe(dataPath, segments, filter, meta); err != nil {
    return dataFilesWritten, dataFilesSkipped, fmt.Errorf("failed to write %s: %w", dataPath, err)
    }
    dataFilesWritten++
    processed++
    if progress != nil {
    progress(processed, total, "")
    }
    }
    return dataFilesWritten, dataFilesSkipped, nil
    }
    // writeDotFilesParallel processes files concurrently using a worker pool
    func writeDotFilesParallel(csvDir, filter string, callsByFile map[string][]ClusteredCall, progress ProgressHandler) (int, int, error) {
    total := len(callsByFile)
    var processed atomic.Int32
    // Create job channel
    jobs := make(chan dotDataJob, len(callsByFile))
    results := make(chan dotDataResult, len(callsByFile))
    // Start workers
    var wg sync.WaitGroup
    for range DOT_DATA_WORKERS {
    wg.Add(1)
    go dotDataWorker(csvDir, filter, jobs, results, &wg)
    }
    // Send jobs
    for filename, fileCalls := range callsByFile {
    jobs <- dotDataJob{filename: filename, fileCalls: fileCalls}
    }
    close(jobs)
    // Wait for workers to finish
    go func() {
    wg.Wait()
    close(results)
    }()
    // Collect results with progress reporting
    dataFilesWritten := 0
    dataFilesSkipped := 0
    var firstErr error
    for result := range results {
    if result.err != nil && firstErr == nil {
    firstErr = result.err
    }
    if result.written {
    dataFilesWritten++
    } else {
    dataFilesSkipped++
    }
    // Report progress
    if progress != nil {
    current := int(processed.Add(1))
    progress(current, total, "")
    }
    }
    return dataFilesWritten, dataFilesSkipped, firstErr
    }
    // dotDataWorker processes files from the jobs channel
    func dotDataWorker(csvDir, filter string, jobs <-chan dotDataJob, results chan<- dotDataResult, wg *sync.WaitGroup) {
    defer wg.Done()
    for job := range jobs {
    // Find WAV file with correct case
    baseName := strings.TrimSuffix(job.filename, filepath.Ext(job.filename))
    wavPath := findWAVFile(csvDir, baseName)
    if wavPath == "" {
    results <- dotDataResult{filename: job.filename, written: false, err: nil}
    continue
    }
    dataPath := wavPath + ".data"
    sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
    if err != nil {
    results <- dotDataResult{filename: job.filename, written: false, err: nil}
    continue
    }
    // Build segments and metadata
    meta, segments := buildAviaNZMetaAndSegments(job.fileCalls, filter, duration, sampleRate)
    if err := writeDotDataFileSafe(dataPath, segments, filter, meta); err != nil {
    results <- dotDataResult{filename: job.filename, written: false, err: fmt.Errorf("failed to write %s: %w", dataPath, err)}
    continue
    }
    results <- dotDataResult{filename: job.filename, written: true, err: nil}
    }
    }
    // buildAviaNZMetaAndSegments creates metadata and segments for a .data file
    func buildAviaNZMetaAndSegments(calls []ClusteredCall, filter string, duration float64, sampleRate int) (AviaNZMeta, []AviaNZSegment) {
    // Create metadata
    reviewer := "None"
    meta := AviaNZMeta{
    Operator: "Auto",
    Reviewer: &reviewer,
    Duration: duration,
    }
    // Build segments array
    var segments []AviaNZSegment
    for _, call := range calls {
    // Create labels for this segment
    labels := []AviaNZLabel{
    {
    Species: call.EbirdCode,
    Certainty: DEFAULT_CERTAINTY,
    Filter: filter,
    },
    }
    // Create segment: [start, end, freq_low, freq_high, labels]
    // freq_low=0, freq_high=sampleRate for full-band segments
    segment := AviaNZSegment{
    call.StartTime,
    call.EndTime,
    0, // freq_low
    sampleRate, // freq_high (full band)
    labels,
    }
    segments = append(segments, segment)
    }
    return meta, segments
    }
    // writeAviaNZDataFile writes a new .data file to disk (does not check for existing files)
    func writeAviaNZDataFile(path string, data []any) error {
    file, err := os.Create(path)
    if err != nil {
    return fmt.Errorf("failed to create file: %w", err)
    }
    defer func() { _ = file.Close() }()
    encoder := json.NewEncoder(file)
    encoder.SetIndent("", "") // No indentation for compact output
    if err := encoder.Encode(data); err != nil {
    return fmt.Errorf("failed to encode JSON: %w", err)
    }
    return nil
    }
    // writeDotDataFileSafe safely writes or merges .data files
    // - If file doesn't exist: write new file
    // - If file exists with same filter: return error (refuse to clobber)
    // - If file exists with different filter: merge segments and write
    // - If file exists but can't be parsed: return error (refuse to clobber)
    func writeDotDataFileSafe(path string, newSegments []AviaNZSegment, filter string, meta AviaNZMeta) error {
    // Check if file exists
    if _, err := os.Stat(path); err == nil {
    // File exists - parse and check
    existing, err := utils.ParseDataFile(path)
    if err != nil {
    return fmt.Errorf("cannot parse existing %s: %w (refusing to clobber)", path, err)
    }
    // Check for duplicate filter
    for _, seg := range existing.Segments {
    if seg.HasFilterLabel(filter) {
    return fmt.Errorf("%s already contains filter '%s' (refusing to clobber)", path, filter)
    }
    }
    // Append new segments (different filter - safe to merge)
    for _, newSeg := range newSegments {
    seg := convertAviaNZSegment(newSeg, filter)
    existing.Segments = append(existing.Segments, seg)
    }
    // Sort by start time
    sort.Slice(existing.Segments, func(i, j int) bool {
    return existing.Segments[i].StartTime < existing.Segments[j].StartTime
    })
    return existing.Write(path)
    }
    // File doesn't exist - write new
    data := buildDataFileFromSegments(meta, newSegments)
    return writeAviaNZDataFile(path, data)
    }
    // convertAviaNZSegment converts an AviaNZSegment to utils.Segment
    func convertAviaNZSegment(seg AviaNZSegment, filter string) *utils.Segment {
    labels := seg[4].([]AviaNZLabel)
    utilsLabels := make([]*utils.Label, len(labels))
    for i, l := range labels {
    utilsLabels[i] = &utils.Label{
    Species: l.Species,
    Certainty: l.Certainty,
    Filter: filter,
    }
    }
    // Handle freq values (could be int or float64 depending on how they were created)
    var freqLow, freqHigh float64
    switch v := seg[2].(type) {
    case int:
    freqLow = float64(v)
    case float64:
    freqLow = v
    }
    switch v := seg[3].(type) {
    case int:
    freqHigh = float64(v)
    case float64:
    freqHigh = v
    }
    return &utils.Segment{
    StartTime: seg[0].(float64),
    EndTime: seg[1].(float64),
    FreqLow: freqLow,
    FreqHigh: freqHigh,
    Labels: utilsLabels,
    }
    }
    // buildDataFileFromSegments builds the data file structure from meta and segments
    func buildDataFileFromSegments(meta AviaNZMeta, segments []AviaNZSegment) []any {
    result := make([]any, 0, 1+len(segments))
    result = append(result, meta)
    for _, seg := range segments {
    result = append(result, seg)
    }
    return result
    }
    // ParseFilterFromFilename extracts filter name from preds CSV filename
    // "predsST_opensoundscape-kiwi-1.2_2025-11-12.csv" -> "opensoundscape-kiwi-1.2"
    // Returns empty string if parsing fails
    func ParseFilterFromFilename(csvPath string) string {
    filename := filepath.Base(csvPath)
    // Remove .csv extension
    name := strings.TrimSuffix(filename, ".csv")
    // Split on underscore
    parts := strings.Split(name, "_")
    if len(parts) == 3 {
    return parts[1]
    }
    return ""
    }
    // clusterStartTimes groups consecutive start times into clusters
    // where the gap between consecutive times is <= gapThreshold
    func clusterStartTimes(startTimes []float64, gapThreshold float64) [][]float64 {
    if len(startTimes) == 0 {
    return nil
    }
    var clusters [][]float64
    currentCluster := []float64{startTimes[0]}
    for i := 1; i < len(startTimes); i++ {
    gap := startTimes[i] - startTimes[i-1]
    if gap <= gapThreshold {
    // Same cluster
    currentCluster = append(currentCluster, startTimes[i])
    } else {
    // New cluster
    clusters = append(clusters, currentCluster)
    currentCluster = []float64{startTimes[i]}
    }
    }
    // Don't forget the last cluster
    clusters = append(clusters, currentCluster)
    return clusters
    }
    return cols, nil
    }
    ignoredColumns := map[string]bool{"NotKiwi": true, "0.0": true}
  • file deletion: calls_from_birda_raven_test.go (----------)
    [6.248737][6.515903:515957](),[6.515957][6.504774:504774]()
    package tools
    import (
    "os"
    "path/filepath"
    "testing"
    "skraak/utils"
    )
    // ============================================
    // BirdNET Tests
    // ============================================
    func TestCallsFromBirda_NewDataFile(t *testing.T) {
    tmpDir := t.TempDir()
    // Create a minimal WAV file
    wavPath := filepath.Join(tmpDir, "test.WAV")
    createMinimalWAV(t, wavPath, 16000, 60.0)
    // Create BirdNET results file
    birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
    birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Turdus migratorius,American Robin,0.85,/some/path/test.WAV\n"
    if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
    t.Fatal(err)
    }
    input := CallsFromBirdaInput{
    File: birdaPath,
    }
    output, err := CallsFromBirda(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.DataFilesWritten != 1 {
    t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
    }
    if output.Filter != "BirdNET" {
    t.Errorf("expected filter 'BirdNET', got '%s'", output.Filter)
    }
    if output.TotalCalls != 1 {
    t.Errorf("expected 1 call, got %d", output.TotalCalls)
    }
    // Verify .data file was created
    dataPath := wavPath + ".data"
    df, err := utils.ParseDataFile(dataPath)
    if err != nil {
    t.Fatalf("failed to parse .data file: %v", err)
    }
    if len(df.Segments) != 1 {
    t.Errorf("expected 1 segment, got %d", len(df.Segments))
    }
    if df.Segments[0].Labels[0].Filter != "BirdNET" {
    t.Errorf("expected filter 'BirdNET', got '%s'", df.Segments[0].Labels[0].Filter)
    }
    if df.Segments[0].Labels[0].Certainty != 85 {
    t.Errorf("expected certainty 85, got %d", df.Segments[0].Labels[0].Certainty)
    }
    }
    func TestCallsFromBirda_ExistingSameFilter(t *testing.T) {
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.WAV")
    createMinimalWAV(t, wavPath, 16000, 60.0)
    dataPath := wavPath + ".data"
    existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Existing Bird", "certainty": 90, "filter": "BirdNET"}]]]`
    if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
    t.Fatal(err)
    }
    birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
    birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,New Bird,New Bird,0.85,test.WAV\n"
    if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
    t.Fatal(err)
    }
    input := CallsFromBirdaInput{File: birdaPath}
    output, err := CallsFromBirda(input)
    if err == nil {
    t.Error("expected error for same filter, got nil")
    }
    if output.Error == nil {
    t.Error("expected error message in output")
    }
    }
    func TestCallsFromBirda_ExistingDifferentFilter(t *testing.T) {
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.WAV")
    createMinimalWAV(t, wavPath, 16000, 60.0)
    dataPath := wavPath + ".data"
    existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Kiwi", "certainty": 90, "filter": "Manual"}]]]`
    if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
    t.Fatal(err)
    }
    birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
    birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Robin,Robin,0.85,test.WAV\n"
    if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
    t.Fatal(err)
    }
    input := CallsFromBirdaInput{File: birdaPath}
    output, err := CallsFromBirda(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.DataFilesWritten != 1 {
    t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
    }
    df, err := utils.ParseDataFile(dataPath)
    if err != nil {
    t.Fatalf("failed to parse .data file: %v", err)
    }
    if len(df.Segments) != 2 {
    t.Errorf("expected 2 segments after merge, got %d", len(df.Segments))
    }
    }
    func TestCallsFromBirda_DeleteOption(t *testing.T) {
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.WAV")
    createMinimalWAV(t, wavPath, 16000, 60.0)
    birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
    birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Robin,Robin,0.85,test.WAV\n"
    if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
    t.Fatal(err)
    }
    input := CallsFromBirdaInput{File: birdaPath, Delete: true}
    output, err := CallsFromBirda(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.FilesDeleted != 1 {
    t.Errorf("expected 1 file deleted, got %d", output.FilesDeleted)
    }
    if _, err := os.Stat(birdaPath); !os.IsNotExist(err) {
    t.Error("expected BirdNET file to be deleted")
    }
    }
    func TestCallsFromBirda_FolderMode(t *testing.T) {
    tmpDir := t.TempDir()
    for i := range 2 {
    wavPath := filepath.Join(tmpDir, "test"+string(rune('0'+i))+".WAV")
    createMinimalWAV(t, wavPath, 16000, 60.0)
    birdaPath := filepath.Join(tmpDir, "test"+string(rune('0'+i))+".BirdNET.results.csv")
    birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Bird,Bird,0.85,test.WAV\n"
    if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
    t.Fatal(err)
    }
    }
    input := CallsFromBirdaInput{Folder: tmpDir}
    output, err := CallsFromBirda(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.FilesProcessed != 2 {
    t.Errorf("expected 2 files processed, got %d", output.FilesProcessed)
    }
    if output.DataFilesWritten != 2 {
    t.Errorf("expected 2 data files written, got %d", output.DataFilesWritten)
    }
    }
    // ============================================
    // Raven Tests
    // ============================================
    func TestCallsFromRaven_NewDataFile(t *testing.T) {
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.WAV")
    createMinimalWAV(t, wavPath, 16000, 60.0)
    ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
    ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tKiwi\n"
    if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
    t.Fatal(err)
    }
    input := CallsFromRavenInput{File: ravenPath}
    output, err := CallsFromRaven(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.DataFilesWritten != 1 {
    t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
    }
    if output.Filter != "Raven" {
    t.Errorf("expected filter 'Raven', got '%s'", output.Filter)
    }
    dataPath := wavPath + ".data"
    df, err := utils.ParseDataFile(dataPath)
    if err != nil {
    t.Fatalf("failed to parse .data file: %v", err)
    }
    if df.Segments[0].FreqLow != 1000 {
    t.Errorf("expected freq_low 1000, got %f", df.Segments[0].FreqLow)
    }
    if df.Segments[0].FreqHigh != 5000 {
    t.Errorf("expected freq_high 5000, got %f", df.Segments[0].FreqHigh)
    }
    }
    func TestCallsFromRaven_ExistingSameFilter(t *testing.T) {
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.WAV")
    createMinimalWAV(t, wavPath, 16000, 60.0)
    dataPath := wavPath + ".data"
    existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Existing", "certainty": 90, "filter": "Raven"}]]]`
    if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
    t.Fatal(err)
    }
    ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
    ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tNew\n"
    if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
    t.Fatal(err)
    }
    input := CallsFromRavenInput{File: ravenPath}
    output, err := CallsFromRaven(input)
    if err == nil {
    t.Error("expected error for same filter, got nil")
    }
    if output.Error == nil {
    t.Error("expected error message in output")
    }
    }
    func TestCallsFromRaven_ExistingDifferentFilter(t *testing.T) {
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.WAV")
    createMinimalWAV(t, wavPath, 16000, 60.0)
    dataPath := wavPath + ".data"
    existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Kiwi", "certainty": 90, "filter": "BirdNET"}]]]`
    if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
    t.Fatal(err)
    }
    ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
    ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tMorepork\n"
    if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
    t.Fatal(err)
    }
    input := CallsFromRavenInput{File: ravenPath}
    output, err := CallsFromRaven(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.DataFilesWritten != 1 {
    t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
    }
    df, err := utils.ParseDataFile(dataPath)
    if err != nil {
    t.Fatalf("failed to parse .data file: %v", err)
    }
    if len(df.Segments) != 2 {
    t.Errorf("expected 2 segments after merge, got %d", len(df.Segments))
    }
    }
    func TestCallsFromRaven_DeleteOption(t *testing.T) {
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.WAV")
    createMinimalWAV(t, wavPath, 16000, 60.0)
    ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
    ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tKiwi\n"
    if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
    t.Fatal(err)
    }
    input := CallsFromRavenInput{File: ravenPath, Delete: true}
    output, err := CallsFromRaven(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.FilesDeleted != 1 {
    t.Errorf("expected 1 file deleted, got %d", output.FilesDeleted)
    }
    if _, err := os.Stat(ravenPath); !os.IsNotExist(err) {
    t.Error("expected Raven file to be deleted")
    }
    }
    func TestCallsFromRaven_MultipleSelections(t *testing.T) {
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.WAV")
    createMinimalWAV(t, wavPath, 16000, 60.0)
    ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
    ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tKiwi\n2\tSpectrogram 1\t1\t10.0\t15.0\t2000\t6000\tMorepork\n3\tSpectrogram 1\t1\t20.0\t25.0\t1500\t4500\tTui\n"
    if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
    t.Fatal(err)
    }
    input := CallsFromRavenInput{File: ravenPath}
    output, err := CallsFromRaven(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.TotalCalls != 3 {
    t.Errorf("expected 3 calls, got %d", output.TotalCalls)
    }
    if output.SpeciesCount["Kiwi"] != 1 || output.SpeciesCount["Morepork"] != 1 || output.SpeciesCount["Tui"] != 1 {
    t.Errorf("unexpected species count: %v", output.SpeciesCount)
    }
    }
  • file deletion: calls_from_birda.go (----------)
    [6.248737][6.529042:529085](),[6.529085][6.515959:515959]()
    package tools
    import (
    "encoding/csv"
    "fmt"
    "io"
    "os"
    "path/filepath"
    "strconv"
    "strings"
    "skraak/utils"
    )
    // CallsFromBirdaInput defines the input for the calls-from-birda tool
    type CallsFromBirdaInput struct {
    Folder string `json:"folder"`
    File string `json:"file"`
    Delete bool `json:"delete"`
    ProgressHandler ProgressHandler `json:"-"` // Optional progress callback
    }
    // CallsFromBirdaOutput defines the output for the calls-from-birda tool
    type CallsFromBirdaOutput struct {
    Calls []ClusteredCall `json:"calls"`
    TotalCalls int `json:"total_calls"`
    SpeciesCount map[string]int `json:"species_count"`
    DataFilesWritten int `json:"data_files_written"`
    DataFilesSkipped int `json:"data_files_skipped"`
    FilesProcessed int `json:"files_processed"`
    FilesDeleted int `json:"files_deleted"`
    Filter string `json:"filter"`
    Error *string `json:"error,omitempty"`
    }
    // birdaSource implements CallSource for BirdNET results files
    type birdaSource struct{}
    func (birdaSource) Name() string { return "BirdNET" }
    func (birdaSource) FindFiles(folder string) ([]string, error) {
    var files []string
    entries, err := os.ReadDir(folder)
    if err != nil {
    return nil, err
    }
    for _, entry := range entries {
    name := entry.Name()
    if strings.HasSuffix(name, ".BirdNET.results.csv") {
    files = append(files, filepath.Join(folder, name))
    }
    }
    return files, nil
    }
    commonOutput, err := callsFromSource(src, commonInput)
    // Convert to Birda-specific output type
    var output CallsFromBirdaOutput
    output.Calls = commonOutput.Calls
    output.TotalCalls = commonOutput.TotalCalls
    output.SpeciesCount = commonOutput.SpeciesCount
    output.DataFilesWritten = commonOutput.DataFilesWritten
    output.DataFilesSkipped = commonOutput.DataFilesSkipped
    output.FilesProcessed = commonOutput.FilesProcessed
    output.FilesDeleted = commonOutput.FilesDeleted
    output.Filter = commonOutput.Filter
    output.Error = commonOutput.Error
    return output, err
    }
    // BirdNETDetection represents a single BirdNET detection
    type BirdNETDetection struct {
    StartTime float64
    EndTime float64
    ScientificName string
    CommonName string
    Confidence float64
    WAVPath string
    }
    // birdaColumnIndices holds the parsed column positions from a BirdNET CSV header.
    type birdaColumnIndices struct {
    startIdx int
    endIdx int
    commonNameIdx int
    confidenceIdx int
    fileIdx int
    }
    // parseBirdaCSVHeader reads the CSV header row and returns column indices.
    func parseBirdaCSVHeader(reader *csv.Reader) (birdaColumnIndices, error) {
    header, err := reader.Read()
    if err != nil {
    return birdaColumnIndices{}, fmt.Errorf("failed to read header: %w", err)
    }
    idx := birdaColumnIndices{startIdx: -1, endIdx: -1, commonNameIdx: -1, confidenceIdx: -1, fileIdx: -1}
    for i, col := range header {
    col = strings.TrimPrefix(col, "\ufeff")
    switch col {
    case "Start (s)":
    idx.startIdx = i
    case "End (s)":
    idx.endIdx = i
    case "Common name":
    idx.commonNameIdx = i
    case "Confidence":
    idx.confidenceIdx = i
    case "File":
    idx.fileIdx = i
    }
    }
    if idx.startIdx == -1 || idx.endIdx == -1 || idx.commonNameIdx == -1 || idx.confidenceIdx == -1 {
    return birdaColumnIndices{}, fmt.Errorf("missing required columns in BirdNET file")
    }
    // readBirdaDetections reads all detection records from a BirdNET CSV.
    func readBirdaDetections(reader *csv.Reader, idx birdaColumnIndices) ([]BirdNETDetection, error) {
    var detections []BirdNETDetection
    for {
    record, err := reader.Read()
    if err == io.EOF {
    break
    }
    if err != nil {
    return nil, fmt.Errorf("failed to read record: %w", err)
    }
    var det BirdNETDetection
    startTime, perr := strconv.ParseFloat(record[idx.startIdx], 64)
    if perr != nil {
    return nil, fmt.Errorf("failed to parse start time %q: %w", record[idx.startIdx], perr)
    }
    det.StartTime = startTime
    endTime, perr := strconv.ParseFloat(record[idx.endIdx], 64)
    if perr != nil {
    return nil, fmt.Errorf("failed to parse end time %q: %w", record[idx.endIdx], perr)
    }
    det.EndTime = endTime
    det.CommonName = record[idx.commonNameIdx]
    confidence, perr := strconv.ParseFloat(record[idx.confidenceIdx], 64)
    if perr != nil {
    return nil, fmt.Errorf("failed to parse confidence %q: %w", record[idx.confidenceIdx], perr)
    }
    det.Confidence = confidence
    if idx.fileIdx >= 0 && idx.fileIdx < len(record) {
    det.WAVPath = record[idx.fileIdx]
    }
    detections = append(detections, det)
    }
    // resolveBirdaWAVPath finds the WAV file associated with a BirdNET results file.
    func resolveBirdaWAVPath(birdaFile string, firstWAVPath string, cache *DirCache) string {
    if firstWAVPath != "" {
    if _, err := os.Stat(firstWAVPath); err == nil {
    return firstWAVPath
    }
    }
    dir := filepath.Dir(birdaFile)
    base := filepath.Base(birdaFile)
    baseName := strings.TrimSuffix(base, ".BirdNET.results.csv")
    if cache != nil {
    return cache.FindWAV(baseName)
    }
    // processBirdaFileCached processes a single BirdNET results file using a DirCache for WAV lookup
    func processBirdaFileCached(birdaFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
    file, err := os.Open(birdaFile)
    if err != nil {
    return nil, false, false, fmt.Errorf("failed to open file: %w", err)
    }
    defer func() { _ = file.Close() }()
    reader := csv.NewReader(file)
    idx, err := parseBirdaCSVHeader(reader)
    if err != nil {
    return nil, false, false, err
    }
    detections, err := readBirdaDetections(reader, idx)
    if err != nil {
    return nil, false, false, err
    }
    if len(detections) == 0 {
    return nil, false, true, nil
    }
    if wavPath == "" {
    return nil, false, true, nil
    }
    sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
    if err != nil {
    return nil, false, true, nil
    }
    dataPath := wavPath + ".data"
    segments := buildBirdNETSegments(detections, sampleRate)
    meta := AviaNZMeta{Operator: "BirdNET", Duration: duration}
    reviewer := "None"
    meta.Reviewer = &reviewer
    if err := writeDotDataFileSafe(dataPath, segments, "BirdNET", meta); err != nil {
    return nil, false, false, err
    }
    var calls []ClusteredCall
    for _, det := range detections {
    calls = append(calls, ClusteredCall{
    File: wavPath,
    StartTime: det.StartTime,
    EndTime: det.EndTime,
    EbirdCode: det.CommonName,
    Segments: 1,
    })
    }
    return calls, true, false, nil
    }
    // buildBirdNETSegments converts BirdNET detections to AviaNZ segments
    func buildBirdNETSegments(detections []BirdNETDetection, sampleRate int) []AviaNZSegment {
    var segments []AviaNZSegment
    for _, det := range detections {
    // Convert confidence (0.0-1.0) to certainty (0-100)
    certainty := min(max(int(det.Confidence*100), 0), 100)
    labels := []AviaNZLabel{
    {
    Species: det.CommonName,
    Certainty: certainty,
    Filter: "BirdNET",
    },
    }
    segment := AviaNZSegment{
    det.StartTime,
    det.EndTime,
    0, // freq_low
    sampleRate, // freq_high (full band)
    labels,
    }
    segments = append(segments, segment)
    }
    return segments
    }
    wavPath := resolveBirdaWAVPath(birdaFile, detections[0].WAVPath, cache)
    return findWAVFile(dir, baseName)
    }
    return detections, nil
    }
    return idx, nil
    }
    // CallsFromBirda processes BirdNET results files and writes .data files
    func CallsFromBirda(input CallsFromBirdaInput) (CallsFromBirdaOutput, error) {
    src := birdaSource{}
    commonInput := CallsFromSourceInput(input)
    }
    func (birdaSource) ProcessFile(birdaFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
    return processBirdaFileCached(birdaFile, cache)
  • file deletion: calls_detect_anomalies_test.go (----------)
    [6.248737][6.532511:532565](),[6.532565][6.529087:529087]()
    package tools
    import (
    "os"
    "path/filepath"
    "testing"
    )
    func TestDetectAnomalies_LabelMismatch(t *testing.T) {
    dir := t.TempDir()
    // Same time range, different calltypes across two models
    data := `[{"Operator":"test"},` +
    `[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-a"},` +
    `{"species":"Kiwi","calltype":"Male","certainty":100,"filter":"model-b"}]]]`
    if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
    t.Fatal(err)
    }
    out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
    if err != nil {
    t.Fatal(err)
    }
    if out.LabelMismatches != 1 {
    t.Errorf("expected 1 label mismatch, got %d", out.LabelMismatches)
    }
    if out.CertaintyMismatches != 0 {
    t.Errorf("expected 0 certainty mismatches, got %d", out.CertaintyMismatches)
    }
    if out.Anomalies[0].Type != "label_mismatch" {
    t.Errorf("expected label_mismatch, got %s", out.Anomalies[0].Type)
    }
    }
    func TestDetectAnomalies_CertaintyMismatch(t *testing.T) {
    dir := t.TempDir()
    // Same time range, same labels, different certainty
    data := `[{"Operator":"test"},` +
    `[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":90,"filter":"model-a"},` +
    `{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-b"}]]]`
    if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
    t.Fatal(err)
    }
    out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
    if err != nil {
    t.Fatal(err)
    }
    if out.CertaintyMismatches != 1 {
    t.Errorf("expected 1 certainty mismatch, got %d", out.CertaintyMismatches)
    }
    if out.LabelMismatches != 0 {
    t.Errorf("expected 0 label mismatches, got %d", out.LabelMismatches)
    }
    }
    func TestDetectAnomalies_NoAnomalyWhenAgreement(t *testing.T) {
    dir := t.TempDir()
    data := `[{"Operator":"test"},` +
    `[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-a"},` +
    `{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-b"}]]]`
    if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
    t.Fatal(err)
    }
    out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
    if err != nil {
    t.Fatal(err)
    }
    if out.AnomaliesTotal != 0 {
    t.Errorf("expected 0 anomalies, got %d", out.AnomaliesTotal)
    }
    }
    func TestDetectAnomalies_LonelySegmentSkipped(t *testing.T) {
    dir := t.TempDir()
    // model-a has a segment, model-b has no segment in this file
    data := `[{"Operator":"test"},` +
    `[0,10,100,1000,[{"species":"Kiwi","certainty":100,"filter":"model-a"}]]]`
    if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
    t.Fatal(err)
    }
    out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
    if err != nil {
    t.Fatal(err)
    }
    if out.AnomaliesTotal != 0 {
    t.Errorf("lonely segment should be skipped, got %d anomalies", out.AnomaliesTotal)
    }
    if out.FilesWithAllModels != 0 {
    t.Errorf("file missing a model should not count as FilesWithAllModels")
    }
    }
    func TestDetectAnomalies_FailsWithOneModel(t *testing.T) {
    dir := t.TempDir()
    _, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a"}})
    if err == nil {
    t.Error("expected error with only 1 model")
    }
    }
  • file deletion: calls_detect_anomalies.go (----------)
    [6.248737][6.539374:539423](),[6.539423][6.532567:532567]()
    package tools
    import (
    "fmt"
    "os"
    "path/filepath"
    "skraak/utils"
    )
    type DetectAnomaliesInput struct {
    Folder string
    Models []string // at least 2 filter names
    Species []string // optional scope; empty = all species
    }
    type DetectAnomaliesOutput struct {
    Folder string `json:"folder"`
    Models []string `json:"models"`
    FilesExamined int `json:"files_examined"`
    FilesWithAllModels int `json:"files_with_all_models"`
    AnomaliesTotal int `json:"anomalies_total"`
    LabelMismatches int `json:"label_mismatches"`
    CertaintyMismatches int `json:"certainty_mismatches"`
    Anomalies []Anomaly `json:"anomalies,omitempty"`
    Error string `json:"error,omitempty"`
    }
    type Anomaly struct {
    File string `json:"file"`
    Type string `json:"type"` // "label_mismatch" | "certainty_mismatch"
    Segments []AnomalySegment `json:"segments"`
    }
    type AnomalySegment struct {
    Model string `json:"model"`
    Start float64 `json:"start"`
    End float64 `json:"end"`
    Species string `json:"species"`
    CallType string `json:"calltype,omitempty"`
    Certainty int `json:"certainty"`
    }
    // DetectAnomalies compares corresponding segments across multiple ML model filters
    // within each .data file. Segments are matched by time overlap (same logic as propagate).
    // Lonely segments (no overlap in one or more models) are silently skipped.
    // Anomalies are flagged when overlapping segments disagree on species+calltype,
    // or when labels match but certainty values differ.
    // validateAnomalyInput validates the input parameters for DetectAnomalies.
    func validateAnomalyInput(input DetectAnomaliesInput) error {
    if len(input.Models) < 2 {
    return fmt.Errorf("at least 2 --model values required")
    }
    for i, a := range input.Models {
    for j, b := range input.Models {
    if i != j && a == b {
    return fmt.Errorf("duplicate --model values are not allowed")
    }
    }
    }
    info, err := os.Stat(input.Folder)
    if err != nil {
    return fmt.Errorf("folder not found: %s", input.Folder)
    }
    if !info.IsDir() {
    return fmt.Errorf("not a directory: %s", input.Folder)
    }
    return nil
    }
    func DetectAnomalies(input DetectAnomaliesInput) (DetectAnomaliesOutput, error) {
    folder := filepath.Clean(input.Folder)
    output := DetectAnomaliesOutput{
    Folder: folder,
    Models: input.Models,
    }
    files, err := utils.FindDataFiles(folder)
    if err != nil {
    output.Error = fmt.Sprintf("list .data files: %v", err)
    return output, fmt.Errorf("%s", output.Error)
    }
    scopeSet := make(map[string]bool, len(input.Species))
    for _, s := range input.Species {
    scopeSet[s] = true
    }
    for _, path := range files {
    df, err := utils.ParseDataFile(path)
    if err != nil {
    continue
    }
    output.FilesExamined++
    anomalies := detectAnomaliesInFile(df, path, input.Models, scopeSet)
    if anomalies == nil {
    // file didn't have all models present
    continue
    }
    output.FilesWithAllModels++
    for _, a := range anomalies {
    if a.Type == "label_mismatch" {
    output.LabelMismatches++
    } else {
    output.CertaintyMismatches++
    }
    }
    output.Anomalies = append(output.Anomalies, anomalies...)
    }
    output.AnomaliesTotal = len(output.Anomalies)
    return output, nil
    }
    // labeledSeg pairs a segment with the specific label matching the model filter.
    type labeledSeg struct {
    seg *utils.Segment
    label *utils.Label
    }
    // detectAnomaliesInFile returns nil if the file doesn't contain all required models.
    func detectAnomaliesInFile(df *utils.DataFile, path string, models []string, scope map[string]bool) []Anomaly {
    modelSegs := collectModelSegments(df, models)
    // Skip file if any model is entirely absent.
    for _, model := range models {
    if len(modelSegs[model]) == 0 {
    return nil
    }
    }
    var anomalies []Anomaly
    for _, anchor := range modelSegs[models[0]] {
    if !inScope(anchor, scope) {
    continue
    }
    if matches := findOverlappingMatches(anchor, models, modelSegs); matches == nil {
    continue
    } else {
    group := buildComparisonGroup(anchor, models, matches)
    if a := checkGroupAnomaly(group, path, models); a != nil {
    anomalies = append(anomalies, *a)
    }
    }
    // collectModelSegments groups labeled segments by model filter name.
    func collectModelSegments(df *utils.DataFile, models []string) map[string][]labeledSeg {
    modelSegs := make(map[string][]labeledSeg, len(models))
    for _, seg := range df.Segments {
    for _, lbl := range seg.Labels {
    for _, model := range models {
    if lbl.Filter == model {
    modelSegs[model] = append(modelSegs[model], labeledSeg{seg: seg, label: lbl})
    break
    }
    }
    }
    }
    return modelSegs
    }
    // inScope returns true if the anchor's label is within the species scope filter.
    func inScope(anchor labeledSeg, scope map[string]bool) bool {
    if len(scope) == 0 {
    return true
    }
    key := anchor.label.Species
    if anchor.label.CallType != "" {
    key += "+" + anchor.label.CallType
    }
    return scope[key] || scope[anchor.label.Species]
    }
    // findOverlappingMatches returns matches[model] = overlapping segments from that model,
    // or nil if any model has no overlap (lonely anchor).
    func findOverlappingMatches(anchor labeledSeg, models []string, modelSegs map[string][]labeledSeg) map[string][]labeledSeg {
    matches := make(map[string][]labeledSeg, len(models)-1)
    for _, model := range models[1:] {
    for _, candidate := range modelSegs[model] {
    if overlaps(anchor.seg, candidate.seg) {
    matches[model] = append(matches[model], candidate)
    }
    }
    // buildComparisonGroup assembles anchor + first match per other model.
    func buildComparisonGroup(anchor labeledSeg, models []string, matches map[string][]labeledSeg) []labeledSeg {
    group := []labeledSeg{anchor}
    for _, model := range models[1:] {
    group = append(group, matches[model][0])
    }
    return group
    }
    // checkGroupAnomaly checks a comparison group for label or certainty mismatches.
    func checkGroupAnomaly(group []labeledSeg, path string, models []string) *Anomaly {
    refSpecies := group[0].label.Species
    refCallType := group[0].label.CallType
    for _, ls := range group[1:] {
    if ls.label.Species != refSpecies || ls.label.CallType != refCallType {
    a := Anomaly{File: path, Type: "label_mismatch", Segments: buildAnomalySegs(group, models)}
    return &a
    }
    }
    refCertainty := group[0].label.Certainty
    for _, ls := range group[1:] {
    if ls.label.Certainty != refCertainty {
    a := Anomaly{File: path, Type: "certainty_mismatch", Segments: buildAnomalySegs(group, models)}
    return &a
    }
    }
    return nil
    }
    func buildAnomalySegs(group []labeledSeg, models []string) []AnomalySegment {
    segs := make([]AnomalySegment, len(group))
    for i, ls := range group {
    segs[i] = AnomalySegment{
    Model: models[i],
    Start: ls.seg.StartTime,
    End: ls.seg.EndTime,
    Species: ls.label.Species,
    CallType: ls.label.CallType,
    Certainty: ls.label.Certainty,
    }
    }
    return segs
    }
    // overlaps returns true if two segments share any time overlap.
    func overlaps(a, b *utils.Segment) bool {
    return a.StartTime < b.EndTime && b.StartTime < a.EndTime
    }
    if len(matches[model]) == 0 {
    return nil
    }
    }
    return matches
    }
    }
    return anomalies
    }
    if err := validateAnomalyInput(input); err != nil {
    output.Error = err.Error()
    return output, err
    }
  • file deletion: calls_clip_labels_test.go (----------)
    [6.248737][6.549826:549875](),[6.549875][6.539425:539425]()
    package tools
    import (
    "encoding/csv"
    "os"
    "path/filepath"
    "strings"
    "testing"
    "skraak/utils"
    )
    // --- test helpers (test file only) ---
    func writeDataFile(t *testing.T, dir, name string, df *utils.DataFile) {
    t.Helper()
    if err := df.Write(filepath.Join(dir, name)); err != nil {
    t.Fatalf("write .data file %s: %v", name, err)
    }
    }
    func writeMapping(t *testing.T, dir, json string) {
    t.Helper()
    if err := os.WriteFile(filepath.Join(dir, "mapping.json"), []byte(json), 0644); err != nil {
    t.Fatalf("write mapping.json: %v", err)
    }
    }
    // parseCSV reads the output CSV, returning header and rows.
    func parseCSV(t *testing.T, path string) ([]string, [][]string) {
    t.Helper()
    f, err := os.Open(path)
    if err != nil {
    t.Fatalf("open CSV %s: %v", path, err)
    }
    defer f.Close()
    r := csv.NewReader(f)
    header, err := r.Read()
    if err != nil {
    t.Fatalf("read header: %v", err)
    }
    rows, err := r.ReadAll()
    if err != nil {
    t.Fatalf("read rows: %v", err)
    }
    return header, rows
    }
    // clipLabels calls CallsClipLabels with standard test parameters.
    func clipLabels(t *testing.T, dir string, extra ...func(*CallsClipLabelsInput)) CallsClipLabelsOutput {
    t.Helper()
    input := CallsClipLabelsInput{
    Folder: dir,
    MappingPath: filepath.Join(dir, "mapping.json"),
    OutputPath: filepath.Join(dir, "clip_labels.csv"),
    ClipDuration: 5,
    ClipOverlap: 0,
    MinLabelOverlap: 0.25,
    FinalClip: "full",
    }
    for _, fn := range extra {
    fn(&input)
    }
    out, err := CallsClipLabels(input)
    if err != nil {
    t.Fatalf("CallsClipLabels: %v", err)
    }
    return out
    }
    // --- tests ---
    func TestClipLabels_RealClassTrue(t *testing.T) {
    dir := t.TempDir()
    writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
    Meta: &utils.DataMeta{Duration: 20},
    Segments: []*utils.Segment{
    {
    StartTime: 3, EndTime: 8, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
    },
    },
    })
    writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
    out := clipLabels(t, dir)
    header, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
    // Header: file, start_time, end_time, Kiwi
    if len(header) != 4 || header[3] != "Kiwi" {
    t.Fatalf("header = %v, want [..., Kiwi]", header)
    }
    // Clip 0-5 overlaps segment 3-8 by 2s ≥ 0.25 → Kiwi=True
    // Clip 5-10 overlaps segment 3-8 by 3s ≥ 0.25 → Kiwi=True
    // Clip 10-15, 15-20 → Kiwi=False
    kiwiCol := 3
    for i, row := range rows {
    switch row[1] {
    case "0.0", "5.0":
    if row[kiwiCol] != "True" {
    t.Errorf("row %d (start=%s): Kiwi=%s, want True", i, row[1], row[kiwiCol])
    }
    case "10.0", "15.0":
    if row[kiwiCol] != "False" {
    t.Errorf("row %d (start=%s): Kiwi=%s, want False", i, row[1], row[kiwiCol])
    }
    }
    }
    if out.PerClassTrueCount["Kiwi"] != 2 {
    t.Errorf("PerClassTrueCount[Kiwi] = %d, want 2", out.PerClassTrueCount["Kiwi"])
    }
    }
    func TestClipLabels_GapClipsAllFalse(t *testing.T) {
    dir := t.TempDir()
    // 15s file, Kiwi segment 0-5 only → clips 5-10 and 10-15 are gaps
    writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
    Meta: &utils.DataMeta{Duration: 15},
    Segments: []*utils.Segment{
    {
    StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
    },
    },
    })
    writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
    out := clipLabels(t, dir)
    if out.ClipsAllFalseGap != 2 {
    t.Errorf("ClipsAllFalseGap = %d, want 2", out.ClipsAllFalseGap)
    }
    if out.PerClassTrueCount["Kiwi"] != 1 {
    t.Errorf("PerClassTrueCount[Kiwi] = %d, want 1", out.PerClassTrueCount["Kiwi"])
    }
    if out.RowsWritten != 3 {
    t.Errorf("RowsWritten = %d, want 3", out.RowsWritten)
    }
    }
    func TestClipLabels_NegativeOverridesPositive(t *testing.T) {
    dir := t.TempDir()
    // Kiwi segment 0-8, Not segment 0-4 → clip 0-5 overlaps both → __NEGATIVE__ wins
    // Clip 5-10 overlaps only Kiwi (3s) → True
    writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
    Meta: &utils.DataMeta{Duration: 10},
    Segments: []*utils.Segment{
    {
    StartTime: 0, EndTime: 8, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
    },
    {
    StartTime: 0, EndTime: 4, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{{Species: "Not", Certainty: 100, Filter: "f1"}},
    },
    },
    })
    writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Not":{"species":"__NEGATIVE__"}}`)
    out := clipLabels(t, dir)
    if out.ClipsNegative != 1 {
    t.Errorf("ClipsNegative = %d, want 1", out.ClipsNegative)
    }
    _, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
    // Clip 0-5: negative hit → all-False (Not overlaps 0-4 by 4s)
    if rows[0][3] != "False" {
    t.Errorf("clip 0-5 Kiwi = %s, want False (overridden by __NEGATIVE__)", rows[0][3])
    }
    // Clip 5-10: only Kiwi overlaps (3s) → True
    if rows[1][3] != "True" {
    t.Errorf("clip 5-10 Kiwi = %s, want True", rows[1][3])
    }
    }
    func TestClipLabels_IgnoreExcludesClip(t *testing.T) {
    dir := t.TempDir()
    // Don't Know segment 0-5, Kiwi segment 6-10
    // Clip 0-5 overlaps __IGNORE__ → excluded
    // Clip 5-10 overlaps Kiwi → emitted with True
    writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
    Meta: &utils.DataMeta{Duration: 15},
    Segments: []*utils.Segment{
    {
    StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{{Species: "Don't Know", Certainty: 0, Filter: "f1"}},
    },
    {
    StartTime: 6, EndTime: 10, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
    },
    },
    })
    writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Don't Know":{"species":"__IGNORE__"}}`)
    out := clipLabels(t, dir)
    if out.ClipsIgnored != 1 {
    t.Errorf("ClipsIgnored = %d, want 1", out.ClipsIgnored)
    }
    if out.SegmentsIgnored != 1 {
    t.Errorf("SegmentsIgnored = %d, want 1", out.SegmentsIgnored)
    }
    // Only 2 rows: clip 5-10 (Kiwi=True) and clip 10-15 (gap)
    if out.RowsWritten != 2 {
    t.Errorf("RowsWritten = %d, want 2", out.RowsWritten)
    }
    }
    func TestClipLabels_FilterRestrictsLabels(t *testing.T) {
    dir := t.TempDir()
    // Same time range, two filters. Only "wanted" should contribute.
    writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
    Meta: &utils.DataMeta{Duration: 10},
    Segments: []*utils.Segment{
    {
    StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{
    {Species: "Kiwi", Certainty: 100, Filter: "wanted"},
    {Species: "Not", Certainty: 100, Filter: "unwanted"},
    },
    },
    },
    })
    writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Not":{"species":"__NEGATIVE__"}}`)
    out := clipLabels(t, dir, func(in *CallsClipLabelsInput) { in.Filter = "wanted" })
    // Only Kiwi from "wanted" filter → clip 0-5 should be Kiwi=True
    // Not from "unwanted" filter should be ignored → no __NEGATIVE__ override
    if out.ClipsNegative != 0 {
    t.Errorf("ClipsNegative = %d, want 0 (Not filter excluded)", out.ClipsNegative)
    }
    if out.PerClassTrueCount["Kiwi"] != 1 {
    t.Errorf("PerClassTrueCount[Kiwi] = %d, want 1", out.PerClassTrueCount["Kiwi"])
    }
    }
    func TestClipLabels_MappingCoverageError(t *testing.T) {
    dir := t.TempDir()
    writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
    Meta: &utils.DataMeta{Duration: 10},
    Segments: []*utils.Segment{
    {
    StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{{Species: "Mystery", Certainty: 100, Filter: "f1"}},
    },
    },
    })
    writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
    input := CallsClipLabelsInput{
    Folder: dir,
    MappingPath: filepath.Join(dir, "mapping.json"),
    OutputPath: filepath.Join(dir, "clip_labels.csv"),
    ClipDuration: 5,
    ClipOverlap: 0,
    MinLabelOverlap: 0.25,
    FinalClip: "full",
    }
    _, err := CallsClipLabels(input)
    if err == nil {
    t.Fatal("expected error for missing species in mapping")
    }
    if !strings.Contains(err.Error(), "Mystery") {
    t.Errorf("error should mention missing species, got: %v", err)
    }
    }
    func TestClipLabels_AppendMode(t *testing.T) {
    dir := t.TempDir()
    writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
    // First file
    writeDataFile(t, dir, "a.wav.data", &utils.DataFile{
    Meta: &utils.DataMeta{Duration: 5},
    Segments: []*utils.Segment{
    {
    StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
    },
    },
    })
    out1 := clipLabels(t, dir)
    if out1.RowsWritten != 1 {
    t.Fatalf("first run: RowsWritten = %d, want 1", out1.RowsWritten)
    }
    // Second run on same output file but with a different input folder
    // Simulate append by running again — should fail on duplicate
    _, err := CallsClipLabels(CallsClipLabelsInput{
    Folder: dir,
    MappingPath: filepath.Join(dir, "mapping.json"),
    OutputPath: filepath.Join(dir, "clip_labels.csv"),
    ClipDuration: 5,
    ClipOverlap: 0,
    MinLabelOverlap: 0.25,
    FinalClip: "full",
    })
    if err == nil {
    t.Fatal("expected duplicate error on second run with same folder")
    }
    if !strings.Contains(err.Error(), "duplicate") {
    t.Errorf("error should mention duplicate, got: %v", err)
    }
    }
    func TestClipLabels_MultipleFiles(t *testing.T) {
    dir := t.TempDir()
    writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
    writeDataFile(t, dir, "a.wav.data", &utils.DataFile{
    Meta: &utils.DataMeta{Duration: 10},
    Segments: []*utils.Segment{
    {
    StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
    },
    },
    })
    writeDataFile(t, dir, "b.wav.data", &utils.DataFile{
    Meta: &utils.DataMeta{Duration: 5},
    Segments: []*utils.Segment{
    {
    StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
    },
    },
    })
    out := clipLabels(t, dir)
    if out.DataFilesParsed != 2 {
    t.Errorf("DataFilesParsed = %d, want 2", out.DataFilesParsed)
    }
    // a: 2 clips (0-5, 5-10), b: 1 clip (0-5) = 3 total
    if out.RowsWritten != 3 {
    t.Errorf("RowsWritten = %d, want 3", out.RowsWritten)
    }
    _, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
    files := map[string]int{}
    for _, r := range rows {
    files[r[0]]++
    }
    if len(files) != 2 {
    t.Errorf("expected 2 distinct files in CSV, got %d", len(files))
    }
    }
  • file deletion: calls_clip_labels.go (----------)
    [6.248737][6.563754:563798](),[6.563798][6.549877:549877]()
    package tools
    import (
    "encoding/csv"
    "fmt"
    "io"
    "os"
    "path/filepath"
    "slices"
    "sort"
    "strconv"
    "strings"
    "skraak/utils"
    )
    // CallsClipLabelsInput configures the clip-labels exporter.
    type CallsClipLabelsInput struct {
    Folder string `json:"folder"`
    MappingPath string `json:"mapping"`
    Filter string `json:"filter,omitempty"`
    OutputPath string `json:"output"`
    ClipDuration float64 `json:"clip_duration"`
    ClipOverlap float64 `json:"clip_overlap"`
    MinLabelOverlap float64 `json:"min_label_overlap"`
    FinalClip string `json:"final_clip"`
    }
    // CallsClipLabelsOutput summarises a run.
    type CallsClipLabelsOutput struct {
    Folder string `json:"folder"`
    OutputPath string `json:"output"`
    Filter string `json:"filter,omitempty"`
    Classes []string `json:"classes"`
    DataFilesParsed int `json:"data_files_parsed"`
    ClipsNegative int `json:"clips_negative"` // emitted, all-False because of __NEGATIVE__
    ClipsIgnored int `json:"clips_ignored"` // excluded from output because of __IGNORE__ overlap
    SegmentsIgnored int `json:"segments_ignored"` // segments whose species maps to __IGNORE__
    ClipsAllFalseGap int `json:"clips_all_false_gap"` // emitted, all-False because no overlap
    PerClassTrueCount map[string]int `json:"per_class_true_count"`
    AppendedToFile bool `json:"appended_to_file"`
    ExistingRowsFound int `json:"existing_rows_found"`
    RowsWritten int `json:"rows_written"`
    }
    // resolvedSeg is a segment that has been classified by the mapping and is
    // ready for overlap-checking against clip windows.
    type resolvedSeg struct {
    start, end float64
    kind utils.MappingKind
    classIdx int // valid only when kind == utils.MappingReal
    }
    // clipDisposition describes the outcome for a single clip window.
    type clipDisposition int
    const (
    dispoLabelled clipDisposition = iota // at least one class column is True
    dispoNegative // __NEGATIVE__ hit, all class columns False
    dispoGap // no segment overlaps, all class columns False
    dispoIgnored // __IGNORE__ hit, clip excluded from output
    )
    // clipLabelsRow is one row of the output CSV.
    type clipLabelsRow struct {
    file string
    start float64
    end float64
    flags []bool
    }
    // rowKey is used for duplicate detection.
    type rowKey struct {
    file string
    start string
    end string
    }
    // CallsClipLabels reads .data files from a single folder and writes a CSV in
    // OpenSoundScape's clip_labels format: one row per clip per file, with one
    // True/False column per class in the mapping.
    //
    // Mirrors BoxedAnnotations.clip_labels(): every clip window is emitted; a
    // column is True when any annotation of that class overlaps the window by
    // ≥ min_label_overlap seconds. Sentinel mappings (__NEGATIVE__, __IGNORE__)
    // get no column and contribute no labels.
    // parsedClipFile holds a parsed .data file for clip-labels processing.
    type parsedClipFile struct {
    path string
    df *utils.DataFile
    }
    // validateClipLabelsInput validates the input parameters and returns the parsed finalClipMode.
    func validateClipLabelsInput(input CallsClipLabelsInput) (utils.FinalClipMode, error) {
    finalClipMode, err := utils.ParseFinalClipMode(input.FinalClip)
    if err != nil {
    return 0, err
    }
    if input.ClipDuration <= 0 {
    return 0, fmt.Errorf("--clip-duration must be > 0, got %v", input.ClipDuration)
    }
    if input.ClipOverlap < 0 || input.ClipOverlap >= input.ClipDuration {
    return 0, fmt.Errorf("--clip-overlap must be in [0, clip-duration), got %v", input.ClipOverlap)
    }
    if input.MinLabelOverlap <= 0 {
    return 0, fmt.Errorf("--min-label-overlap must be > 0, got %v", input.MinLabelOverlap)
    }
    // parseClipLabelsDataFiles finds and parses .data files, collecting species seen.
    func parseClipLabelsDataFiles(folder, filter string, mapping utils.MappingFile) ([]parsedClipFile, error) {
    dataPaths, err := utils.FindDataFiles(folder)
    if err != nil {
    return nil, fmt.Errorf("scan folder %s: %w", folder, err)
    }
    if len(dataPaths) == 0 {
    return nil, fmt.Errorf("no .data files found in %s", folder)
    }
    speciesSeen := map[string]bool{}
    for _, p := range dataPaths {
    df, err := utils.ParseDataFile(p)
    if err != nil {
    return nil, fmt.Errorf("parse %s: %w", p, err)
    }
    if df.Meta == nil || df.Meta.Duration <= 0 {
    return nil, fmt.Errorf("missing or non-positive Duration in %s (cannot generate clips)", p)
    }
    for _, seg := range df.Segments {
    for _, lbl := range seg.Labels {
    if filter != "" && lbl.Filter != filter {
    continue
    }
    speciesSeen[lbl.Species] = true
    }
    }
    parsed = append(parsed, parsedClipFile{path: p, df: df})
    }
    if missing := mapping.ValidateCoversSpecies(speciesSeen); len(missing) > 0 {
    return nil, fmt.Errorf("mapping.json is missing entries for species: %s\n(run /data-mapping to regenerate)", strings.Join(missing, ", "))
    }
    return parsed, nil
    }
    // dedupClipLabelsRows checks for duplicate rows within new rows and against existing CSV rows.
    func dedupClipLabelsRows(rows []clipLabelsRow, existing map[rowKey]bool) error {
    dedup := make(map[rowKey]bool, len(existing)+len(rows))
    for k := range existing {
    dedup[k] = true
    }
    func CallsClipLabels(input CallsClipLabelsInput) (CallsClipLabelsOutput, error) {
    out := CallsClipLabelsOutput{
    Folder: input.Folder,
    OutputPath: input.OutputPath,
    PerClassTrueCount: map[string]int{},
    }
    finalClipMode, err := validateClipLabelsInput(input)
    if err != nil {
    return out, err
    }
    mapping, err := utils.LoadMappingFile(input.MappingPath)
    if err != nil {
    return out, fmt.Errorf("load mapping %s: %w", input.MappingPath, err)
    }
    classes := mapping.Classes()
    if len(classes) == 0 {
    return out, fmt.Errorf("mapping.json has no real (non-sentinel) classes")
    }
    out.Classes = classes
    out.Filter = input.Filter
    classIdx := map[string]int{}
    for i, c := range classes {
    classIdx[c] = i
    }
    parsed, err := parseClipLabelsDataFiles(input.Folder, input.Filter, mapping)
    if err != nil {
    return out, err
    }
    out.DataFilesParsed = len(parsed)
    expectedHeader := append([]string{"file", "start_time", "end_time"}, classes...)
    existing, appendMode, err := loadExistingRows(input.OutputPath, expectedHeader)
    if err != nil {
    return out, err
    }
    out.AppendedToFile = appendMode
    out.ExistingRowsFound = len(existing)
    cwd, err := os.Getwd()
    if err != nil {
    return out, fmt.Errorf("getwd: %w", err)
    }
    folderAbs, err := filepath.Abs(input.Folder)
    if err != nil {
    return out, fmt.Errorf("abs %s: %w", input.Folder, err)
    }
    rows := make([]clipLabelsRow, 0, 1024)
    for _, pf := range parsed {
    fileRows, err := processClipLabelsFile(pf.path, pf.df, mapping, classIdx, classes, input, finalClipMode, cwd, folderAbs, &out)
    if err != nil {
    return out, err
    }
    rows = append(rows, fileRows...)
    }
    if err := dedupClipLabelsRows(rows, existing); err != nil {
    return out, err
    }
    if err := writeRows(input.OutputPath, expectedHeader, rows, appendMode); err != nil {
    return out, err
    }
    out.RowsWritten = len(rows)
    sort.Strings(out.Classes)
    return out, nil
    }
    // processClipLabelsFile generates clip-labels rows for a single .data file.
    func processClipLabelsFile(
    path string,
    df *utils.DataFile,
    mapping utils.MappingFile,
    classIdx map[string]int,
    classes []string,
    input CallsClipLabelsInput,
    finalClipMode utils.FinalClipMode,
    cwd, folderAbs string,
    out *CallsClipLabelsOutput,
    ) ([]clipLabelsRow, error) {
    windows, err := utils.GenerateClipTimes(
    df.Meta.Duration,
    input.ClipDuration,
    input.ClipOverlap,
    finalClipMode,
    10,
    )
    if err != nil {
    return nil, fmt.Errorf("generate clip windows for %s: %w", path, err)
    }
    if len(windows) == 0 {
    return nil, nil
    }
    // resolveSegments maps segments to their classification and filters out mismatches.
    func resolveSegments(
    segments []*utils.Segment,
    filter string,
    minLabelOverlap float64,
    mapping utils.MappingFile,
    classIdx map[string]int,
    out *CallsClipLabelsOutput,
    ) []resolvedSeg {
    segs := make([]resolvedSeg, 0, len(segments))
    for _, seg := range segments {
    if seg.EndTime-seg.StartTime < minLabelOverlap {
    continue
    }
    for _, lbl := range seg.Labels {
    if filter != "" && lbl.Filter != filter {
    continue
    }
    canon, kind, ok := mapping.Classify(lbl.Species)
    if !ok {
    continue
    }
    switch kind {
    case utils.MappingIgn:
    out.SegmentsIgnored++
    segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind})
    case utils.MappingNeg:
    segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind})
    case utils.MappingReal:
    idx, present := classIdx[canon]
    if !present {
    continue
    }
    segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind, classIdx: idx})
    }
    }
    }
    // computeWavRelPath computes the relative path from cwd to the WAV file corresponding to a .data file.
    func computeWavRelPath(dataPath, cwd, folderAbs string) (string, error) {
    wavName := strings.TrimSuffix(filepath.Base(dataPath), ".data")
    wavAbs := filepath.Join(folderAbs, wavName)
    rel, err := filepath.Rel(cwd, wavAbs)
    if err != nil {
    rel = wavAbs
    }
    // Ensure relative paths start with ./ to match OPSO / pandas convention.
    if rel != "" && !filepath.IsAbs(rel) && !strings.HasPrefix(rel, "."+string(filepath.Separator)) {
    rel = "." + string(filepath.Separator) + rel
    }
    // labelClipWindows classifies each clip window and builds the output rows.
    func labelClipWindows(windows []utils.ClipWindow, segs []resolvedSeg, rel string, classes []string, minLabelOverlap float64, out *CallsClipLabelsOutput) []clipLabelsRow {
    var rows []clipLabelsRow
    for _, w := range windows {
    dispo, classHits := classifyClip(w, segs, minLabelOverlap, len(classes))
    if dispo == dispoIgnored {
    out.ClipsIgnored++
    continue
    }
    row := clipLabelsRow{
    file: rel,
    start: w.Start,
    end: w.End,
    flags: make([]bool, len(classes)),
    }
    switch dispo {
    case dispoNegative:
    out.ClipsNegative++
    case dispoGap:
    out.ClipsAllFalseGap++
    case dispoLabelled:
    for i, hit := range classHits {
    if hit {
    row.flags[i] = true
    out.PerClassTrueCount[classes[i]]++
    }
    }
    }
    rows = append(rows, row)
    }
    return rows
    }
    // classifyClip determines the disposition of a single clip window against
    // the resolved segments. Priority: __IGNORE__ > __NEGATIVE__ > class labels.
    func classifyClip(w utils.ClipWindow, segs []resolvedSeg, minLabelOverlap float64, nClasses int) (clipDisposition, []bool) {
    ignoreHit := false
    negativeHit := false
    classHits := make([]bool, nClasses)
    for _, s := range segs {
    if overlapSeconds(s.start, s.end, w.Start, w.End) < minLabelOverlap {
    continue
    }
    switch s.kind {
    case utils.MappingIgn:
    ignoreHit = true
    case utils.MappingNeg:
    negativeHit = true
    case utils.MappingReal:
    classHits[s.classIdx] = true
    }
    }
    if ignoreHit {
    return dispoIgnored, nil
    }
    if negativeHit {
    return dispoNegative, classHits
    }
    for _, hit := range classHits {
    if hit {
    return dispoLabelled, classHits
    }
    }
    return dispoGap, classHits
    }
    // loadExistingRows reads an existing output CSV and returns its row keys
    // (for deduplication) and whether we're in append mode.
    func loadExistingRows(outputPath string, expectedHeader []string) (map[rowKey]bool, bool, error) {
    fi, err := os.Stat(outputPath)
    if err != nil {
    if os.IsNotExist(err) {
    return nil, false, nil
    }
    return nil, false, fmt.Errorf("stat %s: %w", outputPath, err)
    }
    if fi.Size() == 0 {
    return nil, false, nil
    }
    f, err := os.Open(outputPath)
    if err != nil {
    return nil, false, fmt.Errorf("open existing %s: %w", outputPath, err)
    }
    defer func() { _ = f.Close() }()
    r := csv.NewReader(f)
    r.FieldsPerRecord = -1
    header, err := r.Read()
    if err != nil {
    return nil, false, fmt.Errorf("read header of existing %s: %w", outputPath, err)
    }
    if !slices.Equal(header, expectedHeader) {
    return nil, false, fmt.Errorf("column-set mismatch in existing %s\n existing: %s\n new: %s",
    outputPath, strings.Join(header, ","), strings.Join(expectedHeader, ","))
    }
    existing := map[rowKey]bool{}
    for {
    rec, err := r.Read()
    if err == io.EOF {
    break
    }
    if err != nil {
    return nil, false, fmt.Errorf("read row of existing %s: %w", outputPath, err)
    }
    if len(rec) < 3 {
    return nil, false, fmt.Errorf("malformed row in existing %s: %v", outputPath, rec)
    }
    existing[rowKey{file: rec[0], start: rec[1], end: rec[2]}] = true
    }
    return existing, true, nil
    }
    // overlapSeconds returns the duration of overlap between two half-open intervals.
    func overlapSeconds(aStart, aEnd, bStart, bEnd float64) float64 {
    lo := max(aStart, bStart)
    hi := min(aEnd, bEnd)
    if hi <= lo {
    return 0
    }
    return hi - lo
    }
    // formatTime renders a float to match pandas' default float repr in to_csv:
    // always at least one decimal place, no trailing zeros beyond what's needed.
    // e.g. 5 → "5.0", 5.5 → "5.5", 3.5001250000 → "3.500125".
    func formatTime(v float64) string {
    s := strconv.FormatFloat(v, 'f', -1, 64)
    if !strings.ContainsRune(s, '.') {
    s += ".0"
    }
    return s
    }
    // writeRows writes the clip-labels rows to a CSV file.
    func writeRows(path string, header []string, rows []clipLabelsRow, appendMode bool) error {
    var f *os.File
    var err error
    if appendMode {
    f, err = os.OpenFile(path, os.O_APPEND|os.O_WRONLY, 0644)
    } else {
    f, err = os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
    }
    if err != nil {
    return fmt.Errorf("open %s for write: %w", path, err)
    }
    defer func() { _ = f.Close() }()
    w := csv.NewWriter(f)
    if !appendMode {
    if err := w.Write(header); err != nil {
    return fmt.Errorf("write header: %w", err)
    }
    }
    if len(rows) == 0 {
    w.Flush()
    return w.Error()
    }
    rec := make([]string, 3+len(rows[0].flags))
    for _, r := range rows {
    rec[0] = r.file
    rec[1] = formatTime(r.start)
    rec[2] = formatTime(r.end)
    for i, b := range r.flags {
    if b {
    rec[3+i] = "True"
    } else {
    rec[3+i] = "False"
    }
    }
    if err := w.Write(rec); err != nil {
    return fmt.Errorf("write row: %w", err)
    }
    }
    w.Flush()
    return w.Error()
    }
    return rel, nil
    }
    return segs
    }
    return labelClipWindows(windows, segs, rel, classes, input.MinLabelOverlap, out), nil
    }
    }
    segs := resolveSegments(df.Segments, input.Filter, input.MinLabelOverlap, mapping, classIdx, out)
    rel, err := computeWavRelPath(path, cwd, folderAbs)
    if err != nil {
    return nil, err
    for _, r := range rows {
    k := rowKey{file: r.file, start: formatTime(r.start), end: formatTime(r.end)}
    if dedup[k] {
    return fmt.Errorf("duplicate clip detected: file=%s start=%s end=%s", k.file, k.start, k.end)
    }
    dedup[k] = true
    }
    return nil
    }
    parsed := make([]parsedClipFile, 0, len(dataPaths))
    return finalClipMode, nil
    }
  • file deletion: calls_clip_bench_test.go (----------)
    [6.248737][6.574044:574092](),[6.574092][6.563800:563800]()
    package tools
    import (
    "encoding/binary"
    "math"
    "os"
    "testing"
    "skraak/utils"
    )
    const benchWAV = "../audio/20211028_211500.WAV"
    // ==================== WAV I/O ====================
    func BenchmarkReadWAV(b *testing.B) {
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    _, _, err := utils.ReadWAVSamples(benchWAV)
    if err != nil {
    b.Fatal(err)
    }
    }
    }
    func BenchmarkConvertToFloat64_16bit(b *testing.B) {
    // Simulate 16-bit mono WAV data (same size as test file: 14.32M samples)
    numSamples := 14320000
    data := make([]byte, numSamples*2)
    for i := range numSamples {
    binary.LittleEndian.PutUint16(data[i*2:], uint16(i%65536))
    }
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    _ = convertToFloat64Bench(data, 16, 1)
    }
    }
    // Duplicate of convertToFloat64 for benchmarking (unexported in utils)
    func convertToFloat64Bench(data []byte, bitsPerSample, channels int) []float64 {
    bytesPerSample := bitsPerSample / 8
    blockAlign := bytesPerSample * channels
    numSamples := len(data) / blockAlign
    samples := make([]float64, numSamples)
    for i := range numSamples {
    offset := i * blockAlign
    sample := int16(binary.LittleEndian.Uint16(data[offset : offset+2]))
    samples[i] = float64(sample) / 32768.0
    }
    return samples
    }
    func BenchmarkWriteWAV(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    b.Logf("segment samples=%d", len(segSamples))
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    f, _ := os.CreateTemp("", "bench_*.wav")
    utils.WriteWAVFile(f.Name(), segSamples, sr)
    f.Close()
    os.Remove(f.Name())
    }
    }
    // ==================== Resample ====================
    func BenchmarkResampleRate_48k(b *testing.B) {
    samples, _, _ := utils.ReadWAVSamples(benchWAV)
    b.Logf("resampling %d samples 48000->16000", len(samples))
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    utils.ResampleRate(samples, 48000, 16000)
    }
    }
    func BenchmarkResampleRate_250k(b *testing.B) {
    samples, _, _ := utils.ReadWAVSamples(benchWAV)
    b.Logf("resampling %d samples 250000->16000", len(samples))
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    utils.ResampleRate(samples, 250000, 16000)
    }
    }
    // ==================== Spectrogram pipeline ====================
    func BenchmarkExtractSegment(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    b.Logf("full file: %d samples, sr=%d", len(samples), sr)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    seg := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    if len(seg) == 0 {
    b.Fatal("empty segment")
    }
    }
    }
    func BenchmarkPowerSpectrumFFT_512(b *testing.B) {
    n := 512
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    frameData := make([]float64, n)
    power := make([]float64, n/2+1)
    scratch := make([]complex128, n)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    // Simulate the windowing step (Hann) + FFT
    for j := range n {
    frameData[j] = segSamples[j] * 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(j)/float64(n-1)))
    }
    utils.PowerSpectrumFFT(frameData, power, scratch)
    }
    }
    func BenchmarkSpectrogram_23s(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    cfg := utils.DefaultSpectrogramConfig(16000)
    b.Logf("segment samples=%d, windowSize=%d, hopSize=%d", len(segSamples), cfg.WindowSize, cfg.HopSize)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    if spect == nil {
    b.Fatal("nil spectrogram")
    }
    }
    }
    func BenchmarkSpectrogram_60s(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 0, 60)
    cfg := utils.DefaultSpectrogramConfig(16000)
    b.Logf("60s segment samples=%d", len(segSamples))
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    if spect == nil {
    b.Fatal("nil spectrogram")
    }
    }
    }
    // ==================== Image creation & resize ====================
    func BenchmarkCreateGrayscaleImage(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    cfg := utils.DefaultSpectrogramConfig(16000)
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    img := utils.CreateGrayscaleImage(spect)
    if img == nil {
    b.Fatal("nil image")
    }
    }
    }
    func BenchmarkCreateRGBImage(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    cfg := utils.DefaultSpectrogramConfig(16000)
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    colorData := utils.ApplyL4Colormap(spect)
    img := utils.CreateRGBImage(colorData)
    if img == nil {
    b.Fatal("nil image")
    }
    }
    }
    func BenchmarkApplyL4Colormap(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    cfg := utils.DefaultSpectrogramConfig(16000)
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    colorData := utils.ApplyL4Colormap(spect)
    if colorData == nil {
    b.Fatal("nil colormap")
    }
    }
    }
    func BenchmarkResizeGray224(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    cfg := utils.DefaultSpectrogramConfig(16000)
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    img := utils.CreateGrayscaleImage(spect)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    resized := utils.ResizeImage(img, 224, 224)
    if resized == nil {
    b.Fatal("nil resize")
    }
    }
    }
    func BenchmarkResizeGray448(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    cfg := utils.DefaultSpectrogramConfig(16000)
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    img := utils.CreateGrayscaleImage(spect)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    resized := utils.ResizeImage(img, 448, 448)
    if resized == nil {
    b.Fatal("nil resize")
    }
    }
    }
    // ==================== PNG write ====================
    func BenchmarkWritePNG_224(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    cfg := utils.DefaultSpectrogramConfig(16000)
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    img := utils.CreateGrayscaleImage(spect)
    resized := utils.ResizeImage(img, 224, 224)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    f, _ := os.CreateTemp("", "bench_*.png")
    utils.WritePNG(resized, f)
    f.Close()
    os.Remove(f.Name())
    }
    }
    // ==================== Full pipeline ====================
    func BenchmarkFullPipelineGray224(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    outputSR := sr
    if sr > 16000 {
    segSamples = utils.ResampleRate(segSamples, sr, 16000)
    outputSR = 16000
    }
    cfg := utils.DefaultSpectrogramConfig(outputSR)
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    img := utils.CreateGrayscaleImage(spect)
    resized := utils.ResizeImage(img, 224, 224)
    f, _ := os.CreateTemp("", "bench_*.png")
    utils.WritePNG(resized, f)
    f.Close()
    os.Remove(f.Name())
    utils.WriteWAVFile(f.Name(), segSamples, outputSR)
    os.Remove(f.Name())
    _ = resized
    }
    }
    func BenchmarkFullPipelineColor448(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    outputSR := sr
    if sr > 16000 {
    segSamples = utils.ResampleRate(segSamples, sr, 16000)
    outputSR = 16000
    }
    cfg := utils.DefaultSpectrogramConfig(outputSR)
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    colorData := utils.ApplyL4Colormap(spect)
    img := utils.CreateRGBImage(colorData)
    resized := utils.ResizeImage(img, 448, 448)
    f, _ := os.CreateTemp("", "bench_*.png")
    utils.WritePNG(resized, f)
    f.Close()
    os.Remove(f.Name())
    utils.WriteWAVFile(f.Name(), segSamples, outputSR)
    os.Remove(f.Name())
    _ = resized
    }
    }
    // ==================== Data dimension report ====================
    func TestPipelineDimensions(t *testing.T) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    t.Logf("Input: %d samples, sr=%d, segment=%d samples (%.1fs)",
    len(samples), sr, len(segSamples), float64(len(segSamples))/float64(sr))
    cfg := utils.DefaultSpectrogramConfig(16000)
    numFrames := (len(segSamples)-cfg.WindowSize)/cfg.HopSize + 1
    numBins := cfg.WindowSize/2 + 1
    t.Logf("Spectrogram: %d freq bins x %d time frames = %d values",
    numBins, numFrames, numBins*numFrames)
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    t.Logf("Output: %d x %d (freq x time)", len(spect), len(spect[0]))
    img := utils.CreateGrayscaleImage(spect)
    t.Logf("Grayscale image: %dx%d pixels, %d bytes",
    img.Bounds().Dx(), img.Bounds().Dy(), img.Bounds().Dx()*img.Bounds().Dy())
    resized := utils.ResizeImage(img, 224, 224)
    t.Logf("Resized 224: %dx%d", resized.Bounds().Dx(), resized.Bounds().Dy())
    resized448 := utils.ResizeImage(img, 448, 448)
    t.Logf("Resized 448: %dx%d", resized448.Bounds().Dx(), resized448.Bounds().Dy())
    }
  • file deletion: calls_clip.go (----------)
    [6.248737][6.584765:584802](),[6.584802][6.574094:574094]()
    package tools
    import (
    "fmt"
    "image"
    "math"
    "os"
    "path/filepath"
    "runtime"
    "strings"
    "sync"
    "skraak/utils"
    )
    // CallsClipInput defines the input for the clip tool
    type CallsClipInput struct {
    File string `json:"file"`
    Folder string `json:"folder"`
    Output string `json:"output"`
    Prefix string `json:"prefix"`
    Filter string `json:"filter"`
    Species string `json:"species"`
    Certainty int `json:"certainty"`
    Size int `json:"size"`
    Color bool `json:"color"`
    Night bool `json:"night"`
    Day bool `json:"day"`
    Location string `json:"location,omitempty"`
    }
    // CallsClipOutput defines the output for the clip tool
    type CallsClipOutput struct {
    FilesProcessed int `json:"files_processed"`
    SegmentsClipped int `json:"segments_clipped"`
    NightSkipped int `json:"night_skipped,omitempty"`
    DaySkipped int `json:"day_skipped,omitempty"`
    OutputFiles []string `json:"output_files"`
    Errors []string `json:"errors,omitempty"`
    }
    // CallsClip processes .data files and generates audio/image clips for matching segments
    func CallsClip(input CallsClipInput) (CallsClipOutput, error) {
    var output CallsClipOutput
    // Validate required flags
    if err := validateClipInput(&output, input); err != nil {
    return output, err
    }
    // Parse species+calltype
    speciesName, callType := utils.ParseSpeciesCallType(input.Species)
    // Get list of .data files
    filePaths, err := resolveClipFiles(&output, input)
    if err != nil {
    return output, err
    }
    // Create output folder if it doesn't exist
    if err := os.MkdirAll(input.Output, 0755); err != nil {
    output.Errors = append(output.Errors, fmt.Sprintf("failed to create output folder: %v", err))
    return output, err
    }
    // Clamp image size to valid range
    imgSize := utils.ClampImageSize(input.Size)
    // Process .data files (parallel for larger batches)
    if len(filePaths) <= 2 {
    processFilesSequential(&output, filePaths, input, speciesName, callType, imgSize, lat, lng, timezone)
    } else {
    processFilesParallel(&output, filePaths, input, speciesName, callType, imgSize, lat, lng, timezone)
    }
    return output, nil
    }
    // validateClipInput validates required flags for clip generation.
    func validateClipInput(output *CallsClipOutput, input CallsClipInput) error {
    if input.File == "" && input.Folder == "" {
    output.Errors = append(output.Errors, "either --file or --folder is required")
    return fmt.Errorf("missing required flag: --file or --folder")
    }
    if input.Output == "" {
    output.Errors = append(output.Errors, "--output is required")
    return fmt.Errorf("missing required flag: --output")
    }
    if input.Prefix == "" {
    output.Errors = append(output.Errors, "--prefix is required")
    return fmt.Errorf("missing required flag: --prefix")
    }
    return nil
    }
    // resolveClipFiles returns the list of .data file paths from input.
    func resolveClipFiles(output *CallsClipOutput, input CallsClipInput) ([]string, error) {
    if input.File != "" {
    return []string{input.File}, nil
    }
    filePaths, err := utils.FindDataFiles(input.Folder)
    if err != nil {
    output.Errors = append(output.Errors, fmt.Sprintf("failed to find .data files: %v", err))
    return nil, err
    }
    if len(filePaths) == 0 {
    output.Errors = append(output.Errors, "no .data files found")
    return nil, fmt.Errorf("no .data files found")
    }
    return filePaths, nil
    }
    // processFilesSequential processes .data files one at a time.
    func processFilesSequential(output *CallsClipOutput, filePaths []string, input CallsClipInput, speciesName, callType string, imgSize int, lat, lng float64, timezone string) {
    for _, dataPath := range filePaths {
    clips, skipped, errs := processFile(dataPath, input.Output, input.Prefix, input.Filter, speciesName, callType, input.Certainty, imgSize, input.Color, input.Night, input.Day, lat, lng, timezone)
    accumulateFileResult(output, clips, skipped, errs, input.Night)
    }
    }
    // processFilesParallel processes .data files using worker goroutines.
    func processFilesParallel(output *CallsClipOutput, filePaths []string, input CallsClipInput, speciesName, callType string, imgSize int, lat, lng float64, timezone string) {
    type fileResult struct {
    clips []string
    skipped int
    errs []string
    }
    workers := min(runtime.NumCPU(), 8, len(filePaths))
    jobs := make(chan string, len(filePaths))
    results := make(chan fileResult, len(filePaths))
    var wg sync.WaitGroup
    for range workers {
    wg.Go(func() {
    for dataPath := range jobs {
    clips, skipped, errs := processFile(dataPath, input.Output, input.Prefix, input.Filter, speciesName, callType, input.Certainty, imgSize, input.Color, input.Night, input.Day, lat, lng, timezone)
    results <- fileResult{clips: clips, skipped: skipped, errs: errs}
    }
    })
    }
    for _, dataPath := range filePaths {
    jobs <- dataPath
    }
    go func() {
    wg.Wait()
    close(results)
    }()
    for r := range results {
    accumulateFileResult(output, r.clips, r.skipped, r.errs, input.Night)
    }
    }
    // accumulateFileResult merges a single file's results into the output.
    func accumulateFileResult(output *CallsClipOutput, clips []string, skipped int, errs []string, night bool) {
    output.SegmentsClipped += len(clips)
    if night {
    output.NightSkipped += skipped
    } else {
    output.DaySkipped += skipped
    }
    output.OutputFiles = append(output.OutputFiles, clips...)
    output.Errors = append(output.Errors, errs...)
    if len(clips) > 0 || len(errs) == 0 {
    output.FilesProcessed++
    }
    }
    // processFile processes a single .data file and returns generated clips, time-filter-skipped count, and errors
    func processFile(dataPath, outputDir, prefix, filter, speciesName, callType string, certainty, imgSize int, color, night, day bool, lat, lng float64, timezone string) ([]string, int, []string) {
    var clips []string
    var errors []string
    // Parse .data file
    dataFile, err := utils.ParseDataFile(dataPath)
    if err != nil {
    errors = append(errors, fmt.Sprintf("%s: failed to parse: %v", dataPath, err))
    return nil, 0, errors
    }
    // Get WAV basename (without path and extensions)
    wavPath := filepath.Clean(strings.TrimSuffix(dataPath, ".data"))
    basename := filepath.Base(wavPath)
    basename = strings.TrimSuffix(basename, filepath.Ext(basename))
    // Filter segments
    matchingSegments := filterSegments(dataFile.Segments, filter, speciesName, callType, certainty)
    if len(matchingSegments) == 0 {
    return nil, 0, nil
    }
    // Day/night filter: check WAV header only (cheaper than reading full audio).
    if night || day {
    skipped, err := checkDayNightFilter(wavPath, night, day, lat, lng, timezone)
    if err != nil || skipped {
    if skipped {
    return nil, 1, nil
    }
    return nil, 0, nil
    }
    }
    // Read WAV samples once
    samples, sampleRate, err := utils.ReadWAVSamples(wavPath)
    if err != nil {
    errors = append(errors, fmt.Sprintf("%s: failed to read WAV: %v", dataPath, err))
    return nil, 0, errors
    }
    // Process matching segments
    clips, errors = processSegments(matchingSegments, dataPath, samples, sampleRate, outputDir, prefix, basename, imgSize, color)
    return clips, 0, errors
    }
    // filterSegments returns segments matching the given filter criteria.
    func filterSegments(segments []*utils.Segment, filter, speciesName, callType string, certainty int) []*utils.Segment {
    var matching []*utils.Segment
    for _, seg := range segments {
    if seg.SegmentMatchesFilters(filter, speciesName, callType, certainty) {
    matching = append(matching, seg)
    }
    }
    return matching
    }
    // checkDayNightFilter applies day/night filtering. Returns (skipped=true, nil) if the
    // recording should be skipped, (false, nil) if it passes, or (false, err) on failure.
    func checkDayNightFilter(wavPath string, night, day bool, lat, lng float64, timezone string) (bool, error) {
    result, err := IsNight(IsNightInput{
    FilePath: wavPath,
    Lat: lat,
    Lng: lng,
    Timezone: timezone,
    })
    if err != nil {
    fmt.Fprintf(os.Stderr, "warning: skipping %s (isnight error: %v)\n", wavPath, err)
    return false, err
    }
    if night && !result.SolarNight {
    fmt.Fprintf(os.Stderr, "skipped (daytime): %s\n", wavPath)
    return true, nil
    }
    if day && !result.DiurnalActive {
    fmt.Fprintf(os.Stderr, "skipped (nighttime): %s\n", wavPath)
    return true, nil
    }
    return false, nil
    }
    // processSegments generates clips for matching segments, using parallel processing for larger batches.
    func processSegments(segments []*utils.Segment, dataPath string, samples []float64, sampleRate int, outputDir, prefix, basename string, imgSize int, color bool) ([]string, []string) {
    var clips []string
    var errors []string
    if len(segments) <= 2 {
    for _, seg := range segments {
    clipFiles, err := generateClip(samples, sampleRate, outputDir, prefix, basename, seg.StartTime, seg.EndTime, imgSize, color)
    if err != nil {
    errors = append(errors, fmt.Sprintf("%s: segment %.0f-%.0f: %v", dataPath, seg.StartTime, seg.EndTime, err))
    continue
    }
    clips = append(clips, clipFiles...)
    }
    } else {
    clips, errors = processSegmentsParallel(segments, dataPath, samples, sampleRate, outputDir, prefix, basename, imgSize, color)
    }
    return clips, errors
    }
    // processSegmentsParallel generates clips for segments using worker goroutines.
    func processSegmentsParallel(segments []*utils.Segment, dataPath string, samples []float64, sampleRate int, outputDir, prefix, basename string, imgSize int, color bool) ([]string, []string) {
    type segResult struct {
    clips []string
    err string
    }
    workers := min(runtime.NumCPU(), len(segments))
    jobs := make(chan *utils.Segment, len(segments))
    results := make(chan segResult, len(segments))
    var wg sync.WaitGroup
    for range workers {
    wg.Go(func() {
    for seg := range jobs {
    clipFiles, err := generateClip(samples, sampleRate, outputDir, prefix, basename, seg.StartTime, seg.EndTime, imgSize, color)
    if err != nil {
    results <- segResult{err: fmt.Sprintf("%s: segment %.0f-%.0f: %v", dataPath, seg.StartTime, seg.EndTime, err)}
    } else {
    results <- segResult{clips: clipFiles}
    }
    }
    })
    }
    for _, seg := range segments {
    jobs <- seg
    }
    close(jobs)
    go func() {
    wg.Wait()
    close(results)
    }()
    var clips []string
    var errors []string
    for r := range results {
    if r.err != "" {
    errors = append(errors, r.err)
    } else {
    clips = append(clips, r.clips...)
    }
    }
    return clips, errors
    }
    // generateClip generates PNG and WAV files for a segment
    func generateClip(samples []float64, sampleRate int, outputDir, prefix, basename string, startTime, endTime float64, imgSize int, color bool) ([]string, error) {
    var files []string
    // Calculate integer times for filename
    startInt := int(math.Floor(startTime))
    endInt := int(math.Ceil(endTime))
    // Build base filename
    baseName := fmt.Sprintf("%s_%s_%d_%d", prefix, basename, startInt, endInt)
    wavPath := filepath.Join(outputDir, baseName+".wav")
    // Extract segment samples
    segSamples := utils.ExtractSegmentSamples(samples, sampleRate, startTime, endTime)
    if len(segSamples) == 0 {
    return nil, fmt.Errorf("no samples in segment")
    }
    // Determine output sample rate (downsample if > 16kHz)
    outputSampleRate := sampleRate
    if sampleRate > utils.DefaultMaxSampleRate {
    segSamples = utils.ResampleRate(segSamples, sampleRate, utils.DefaultMaxSampleRate)
    outputSampleRate = utils.DefaultMaxSampleRate
    }
    pngPath := filepath.Join(outputDir, baseName+".png")
    spectSampleRate := outputSampleRate
    config := utils.DefaultSpectrogramConfig(spectSampleRate)
    spectrogram := utils.GenerateSpectrogram(segSamples, config)
    if spectrogram == nil {
    return nil, fmt.Errorf("failed to generate spectrogram")
    }
    // Create image (grayscale or color)
    var img image.Image
    if color {
    colorData := utils.ApplyL4Colormap(spectrogram)
    img = utils.CreateRGBImage(colorData)
    } else {
    img = utils.CreateGrayscaleImage(spectrogram)
    }
    if img == nil {
    return nil, fmt.Errorf("failed to create image")
    }
    resized := utils.ResizeImage(img, imgSize, imgSize)
    // Write PNG (O_EXCL fails atomically if file exists)
    pngFile, err := os.OpenFile(pngPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0644)
    if err != nil {
    if os.IsExist(err) {
    return nil, fmt.Errorf("file already exists: %s", pngPath)
    }
    return nil, fmt.Errorf("failed to create PNG: %w", err)
    }
    if err := utils.WritePNG(resized, pngFile); err != nil {
    _ = pngFile.Close()
    return nil, fmt.Errorf("failed to write PNG: %w", err)
    }
    if err := pngFile.Close(); err != nil {
    return nil, fmt.Errorf("failed to close PNG: %w", err)
    }
    // Write WAV
    if err := utils.WriteWAVFile(wavPath, segSamples, outputSampleRate); err != nil {
    return nil, fmt.Errorf("failed to write WAV: %w", err)
    }
    files = append(files, wavPath)
    return files, nil
    }
    files = append(files, pngPath)
    close(jobs)
    // Parse location into lat/lng/timezone
    var lat, lng float64
    var timezone string
    if input.Location != "" {
    var err error
    lat, lng, timezone, err = utils.ParseLocation(input.Location)
    if err != nil {
    output.Errors = append(output.Errors, err.Error())
    return output, err
    }
    }
  • file deletion: calls_classify_test.go (----------)
    [6.248737][6.590621:590667](),[6.590667][6.584804:584804]()
    package tools
    import (
    "testing"
    "skraak/utils"
    )
    func NewClassifyState(config ClassifyConfig, dataFiles []*utils.DataFile) *ClassifyState {
    hasFilter := config.Filter != "" || config.Species != "" || config.Certainty >= 0
    cached := make([][]*utils.Segment, len(dataFiles))
    for i, df := range dataFiles {
    if !hasFilter {
    cached[i] = df.Segments
    } else {
    for _, seg := range df.Segments {
    if seg.SegmentMatchesFilters(config.Filter, config.Species, config.CallType, config.Certainty) {
    cached[i] = append(cached[i], seg)
    }
    }
    }
    }
    total := 0
    for _, segs := range cached {
    total += len(segs)
    }
    return &ClassifyState{
    Config: config,
    DataFiles: dataFiles,
    filteredSegs: cached,
    totalSegs: total,
    }
    }
    func TestParseKeyBuffer(t *testing.T) {
    bindings := []KeyBinding{
    {Key: "k", Species: "Kiwi"},
    {Key: "d", Species: "Kiwi", CallType: "Duet"},
    {Key: "n", Species: "Don't Know"},
    {Key: "p", Species: "Morepork"},
    }
    state := NewClassifyState(ClassifyConfig{Bindings: bindings, Certainty: -1}, nil)
    tests := []struct {
    key string
    want *BindingResult
    wantNil bool
    }{
    {"k", &BindingResult{Species: "Kiwi"}, false},
    {"d", &BindingResult{Species: "Kiwi", CallType: "Duet"}, false},
    {"n", &BindingResult{Species: "Don't Know"}, false},
    {"p", &BindingResult{Species: "Morepork"}, false},
    {"x", nil, true}, // unknown key
    }
    for _, tt := range tests {
    got := state.ParseKeyBuffer(tt.key)
    if tt.wantNil {
    if got != nil {
    t.Errorf("ParseKeyBuffer(%q) = %v, want nil", tt.key, got)
    }
    } else {
    if got == nil {
    t.Errorf("ParseKeyBuffer(%q) = nil, want %+v", tt.key, tt.want)
    continue
    }
    if got.Species != tt.want.Species {
    t.Errorf("ParseKeyBuffer(%q).Species = %q, want %q", tt.key, got.Species, tt.want.Species)
    }
    if got.CallType != tt.want.CallType {
    t.Errorf("ParseKeyBuffer(%q).CallType = %q, want %q", tt.key, got.CallType, tt.want.CallType)
    }
    }
    }
    }
    func TestApplyBinding(t *testing.T) {
    bindings := []KeyBinding{
    {Key: "k", Species: "Kiwi"},
    {Key: "n", Species: "Don't Know"},
    {Key: "d", Species: "Kiwi", CallType: "Duet"},
    }
    df := &utils.DataFile{
    Meta: &utils.DataMeta{},
    Segments: []*utils.Segment{
    {
    StartTime: 10.0,
    EndTime: 20.0,
    Labels: []*utils.Label{
    {Species: "Unknown", Certainty: 50, Filter: "test-filter", CallType: "OldType"},
    },
    },
    },
    }
    state := NewClassifyState(ClassifyConfig{
    Filter: "test-filter",
    Reviewer: "David",
    Bindings: bindings,
    Certainty: -1,
    }, []*utils.DataFile{df})
    // Apply "k" = Kiwi (no calltype, should remove existing calltype)
    result := &BindingResult{Species: "Kiwi"}
    state.ApplyBinding(result)
    // Check label was updated
    if len(df.Segments[0].Labels) != 1 {
    t.Errorf("expected 1 label, got %d", len(df.Segments[0].Labels))
    }
    if df.Segments[0].Labels[0].Species != "Kiwi" {
    t.Errorf("expected Species=Kiwi, got %s", df.Segments[0].Labels[0].Species)
    }
    if df.Segments[0].Labels[0].Certainty != 100 {
    t.Errorf("expected Certainty=100, got %d", df.Segments[0].Labels[0].Certainty)
    }
    if df.Segments[0].Labels[0].CallType != "" {
    t.Errorf("expected CallType='', got %s (should be removed)", df.Segments[0].Labels[0].CallType)
    }
    if df.Meta.Reviewer != "David" {
    t.Errorf("expected Reviewer=David, got %s", df.Meta.Reviewer)
    }
    // Apply "d" = Kiwi/Duet (should set calltype)
    result = &BindingResult{Species: "Kiwi", CallType: "Duet"}
    state.ApplyBinding(result)
    if df.Segments[0].Labels[0].CallType != "Duet" {
    t.Errorf("expected CallType=Duet, got %s", df.Segments[0].Labels[0].CallType)
    }
    // Apply "n" = Don't Know (certainty should be 0)
    result = &BindingResult{Species: "Don't Know"}
    state.ApplyBinding(result)
    if df.Segments[0].Labels[0].Species != "Don't Know" {
    t.Errorf("expected Species=Don't Know, got %s", df.Segments[0].Labels[0].Species)
    }
    if df.Segments[0].Labels[0].Certainty != 0 {
    t.Errorf("expected Certainty=0 for Don't Know, got %d", df.Segments[0].Labels[0].Certainty)
    }
    }
    func TestApplyBindingCallTypeRemoval(t *testing.T) {
    bindings := []KeyBinding{
    {Key: "k", Species: "Kiwi"}, // no calltype
    }
    df := &utils.DataFile{
    Meta: &utils.DataMeta{},
    Segments: []*utils.Segment{
    {
    StartTime: 10.0,
    EndTime: 20.0,
    Labels: []*utils.Label{
    {Species: "Kiwi", Certainty: 100, Filter: "test-filter", CallType: "Male"},
    },
    },
    },
    }
    state := NewClassifyState(ClassifyConfig{
    Filter: "test-filter",
    Reviewer: "David",
    Bindings: bindings,
    Certainty: -1,
    }, []*utils.DataFile{df})
    // Apply "k" = Kiwi (should remove Male calltype)
    result := &BindingResult{Species: "Kiwi"}
    state.ApplyBinding(result)
    if df.Segments[0].Labels[0].CallType != "" {
    t.Errorf("expected CallType='', got %s (should be removed)", df.Segments[0].Labels[0].CallType)
    }
    }
    func TestConfirmLabelDontKnow(t *testing.T) {
    df := &utils.DataFile{
    Meta: &utils.DataMeta{},
    Segments: []*utils.Segment{
    {
    StartTime: 10.0,
    EndTime: 20.0,
    Labels: []*utils.Label{
    {Species: "Don't Know", Certainty: 0, Filter: "test-filter"},
    },
    },
    },
    }
    state := NewClassifyState(ClassifyConfig{
    Filter: "test-filter",
    Reviewer: "David",
    Certainty: -1,
    }, []*utils.DataFile{df})
    // ConfirmLabel on Don't Know should be a no-op
    if state.ConfirmLabel() {
    t.Error("ConfirmLabel() should return false for Don't Know (certainty=0)")
    }
    label := df.Segments[0].Labels[0]
    if label.Species != "Don't Know" {
    t.Errorf("Species should remain Don't Know, got %s", label.Species)
    }
    if label.Certainty != 0 {
    t.Errorf("Certainty should remain 0, got %d", label.Certainty)
    }
    if state.Dirty {
    t.Error("State should not be dirty after confirming Don't Know")
    }
    }
  • file deletion: calls_classify_load_test.go (----------)
    [6.248737][6.597282:597333](),[6.597333][6.590669:590669]()
    package tools
    import (
    "os"
    "path/filepath"
    "testing"
    )
    // writeDataFileContent creates a .data file in dir with the given raw content.
    func writeDataFileContent(t *testing.T, dir, name, content string) {
    t.Helper()
    if err := os.WriteFile(filepath.Join(dir, name), []byte(content), 0644); err != nil {
    t.Fatal(err)
    }
    // mustLoadDataFiles is a test helper that calls LoadDataFiles and fatals on error.
    func mustLoadDataFiles(t *testing.T, config ClassifyConfig) *ClassifyState {
    t.Helper()
    state, err := LoadDataFiles(config)
    if err != nil {
    t.Fatal(err)
    }
    // assertFileSegCounts checks file count and total segment count match expected values.
    func assertFileSegCounts(t *testing.T, state *ClassifyState, wantFiles, wantSegs int, label string) {
    t.Helper()
    if len(state.DataFiles) != wantFiles {
    t.Errorf("%s: expected %d files, got %d", label, wantFiles, len(state.DataFiles))
    }
    if state.TotalSegments() != wantSegs {
    t.Errorf("%s: expected %d segments total, got %d", label, wantSegs, state.TotalSegments())
    }
    }
    const (
    kiwiSeg = `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]]]`
    tomtitSeg = `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Tomtit", "certainty": 90}]]]`
    )
    func TestLoadDataFilesFiltersFilesWithNoMatchingSegments(t *testing.T) {
    tempDir := t.TempDir()
    writeDataFileContent(t, tempDir, "file1.data", kiwiSeg)
    writeDataFileContent(t, tempDir, "file2.data", tomtitSeg)
    writeDataFileContent(t, tempDir, "file3.data", kiwiSeg)
    t.Run("no_filter", func(t *testing.T) {
    state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Certainty: -1})
    assertFileSegCounts(t, state, 3, 3, "No filter")
    })
    t.Run("species_kiwi", func(t *testing.T) {
    state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})
    assertFileSegCounts(t, state, 2, 2, "Species=Kiwi")
    })
    t.Run("species_tomtit", func(t *testing.T) {
    state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Tomtit", Certainty: -1})
    assertFileSegCounts(t, state, 1, 1, "Species=Tomtit")
    })
    t.Run("species_nonexistent", func(t *testing.T) {
    state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "NonExistent", Certainty: -1})
    assertFileSegCounts(t, state, 0, 0, "Species=NonExistent")
    })
    }
    func TestLoadDataFilesWithMixedSegments(t *testing.T) {
    tempDir := t.TempDir()
    file := `[
    {"Operator": "test"},
    [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]],
    [10, 20, 100, 1000, [{"species": "Tomtit", "certainty": 80}]],
    [20, 30, 100, 1000, [{"species": "Kiwi", "certainty": 95}]]
    ]`
    writeDataFileContent(t, tempDir, "mixed.data", file)
    state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})
    if len(state.DataFiles) != 1 {
    t.Errorf("Expected 1 file, got %d", len(state.DataFiles))
    }
    if state.TotalSegments() != 2 {
    t.Errorf("Species=Kiwi: expected 2 segments, got %d", state.TotalSegments())
    }
    // The DataFile should still have all 3 segments internally
    // but cached filtered segments should return only the Kiwi ones
    if len(state.DataFiles[0].Segments) != 3 {
    t.Errorf("DataFile should have 3 segments internally, got %d", len(state.DataFiles[0].Segments))
    }
    // TotalSegments uses cached filtered segments
    if state.TotalSegments() != 2 {
    t.Errorf("TotalSegments should return 2 Kiwi segments, got %d", state.TotalSegments())
    }
    }
    // Test that the original DataFile segments are not modified (immutable filtering)
    func TestFilteringDoesNotModifyOriginalSegments(t *testing.T) {
    tempDir := t.TempDir()
    file := `[
    {"Operator": "test"},
    [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]],
    [10, 20, 100, 1000, [{"species": "Tomtit", "certainty": 80}]]
    ]`
    writeDataFileContent(t, tempDir, "test.data", file)
    state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})
    // Original segments should be untouched
    originalSegments := state.DataFiles[0].Segments
    if len(originalSegments) != 2 {
    t.Errorf("Original should have 2 segments, got %d", len(originalSegments))
    }
    // Verify all original segments are preserved
    species := []string{}
    for _, seg := range originalSegments {
    if len(seg.Labels) > 0 {
    species = append(species, seg.Labels[0].Species)
    }
    }
    if len(species) != 2 || species[0] != "Kiwi" || species[1] != "Tomtit" {
    t.Errorf("Original segments should have both species, got %v", species)
    }
    }
    func TestLoadDataFilesCertaintyPruning(t *testing.T) {
    tempDir := t.TempDir()
    writeDataFileContent(t, tempDir, "file1.data", `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 70}]]]`)
    writeDataFileContent(t, tempDir, "file2.data", `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 100}]]]`)
    state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Certainty: 100})
    assertFileSegCounts(t, state, 1, 1, "Certainty=100")
    // CurrentSegment should work (not nil) because file1 was pruned
    seg := state.CurrentSegment()
    if seg == nil {
    t.Error("CurrentSegment should not be nil after pruning")
    }
    }
    return state
    }
    }
  • file deletion: calls_classify_filter_test.go (----------)
    [6.248737][6.605661:605714](),[6.605714][6.597335:597335]()
    package tools
    import (
    "math/rand"
    "testing"
    "skraak/utils"
    )
    func TestTotalSegmentsRespectsFilters(t *testing.T) {
    // Create test data files with different species and filters
    df1 := &utils.DataFile{
    FilePath: "/test/file1.data",
    Segments: []*utils.Segment{
    {
    StartTime: 0,
    EndTime: 10,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0"},
    },
    },
    {
    StartTime: 10,
    EndTime: 20,
    Labels: []*utils.Label{
    {Species: "Tomtit", Filter: "model-1.0"},
    },
    },
    },
    }
    df2 := &utils.DataFile{
    FilePath: "/test/file2.data",
    Segments: []*utils.Segment{
    {
    StartTime: 0,
    EndTime: 10,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0"},
    },
    },
    },
    }
    // Test 1: No filters - should count all segments (3)
    state1 := NewClassifyState(ClassifyConfig{Certainty: -1}, []*utils.DataFile{df1, df2})
    if got := state1.TotalSegments(); got != 3 {
    t.Errorf("No filters: expected 3 segments, got %d", got)
    }
    // Test 2: Filter by species "Kiwi" - should count only Kiwi segments (2)
    state2 := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df1, df2})
    if got := state2.TotalSegments(); got != 2 {
    t.Errorf("Species=Kiwi: expected 2 segments, got %d", got)
    }
    // Test 3: Filter by species "Tomtit" - should count only Tomtit segments (1)
    state3 := NewClassifyState(ClassifyConfig{Species: "Tomtit", Certainty: -1}, []*utils.DataFile{df1, df2})
    if got := state3.TotalSegments(); got != 1 {
    t.Errorf("Species=Tomtit: expected 1 segment, got %d", got)
    }
    // Test 4: Filter by filter name "model-1.0" - should count all segments (3)
    state4 := NewClassifyState(ClassifyConfig{Filter: "model-1.0", Certainty: -1}, []*utils.DataFile{df1, df2})
    if got := state4.TotalSegments(); got != 3 {
    t.Errorf("Filter=model-1.0: expected 3 segments, got %d", got)
    }
    // Test 5: Filter by non-existent species - should count 0
    state5 := NewClassifyState(ClassifyConfig{Species: "NonExistent", Certainty: -1}, []*utils.DataFile{df1, df2})
    if got := state5.TotalSegments(); got != 0 {
    t.Errorf("Species=NonExistent: expected 0 segments, got %d", got)
    }
    // Test 6: Combined filter + species
    df3 := &utils.DataFile{
    FilePath: "/test/file3.data",
    Segments: []*utils.Segment{
    {
    StartTime: 0,
    EndTime: 10,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0", CallType: "Duet"},
    },
    },
    {
    StartTime: 10,
    EndTime: 20,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-2.0", CallType: "Male"},
    },
    },
    },
    }
    state6 := NewClassifyState(ClassifyConfig{Filter: "model-1.0", Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df3})
    if got := state6.TotalSegments(); got != 1 {
    t.Errorf("Filter=model-1.0 + Species=Kiwi: expected 1 segment, got %d", got)
    }
    }
    func TestCurrentSegmentNumberWithFilters(t *testing.T) {
    // Create test data files
    df1 := &utils.DataFile{
    FilePath: "/test/file1.data",
    Segments: []*utils.Segment{
    {
    StartTime: 0,
    EndTime: 10,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0"},
    },
    },
    {
    StartTime: 10,
    EndTime: 20,
    Labels: []*utils.Label{
    {Species: "Tomtit", Filter: "model-1.0"},
    },
    },
    },
    }
    df2 := &utils.DataFile{
    FilePath: "/test/file2.data",
    Segments: []*utils.Segment{
    {
    StartTime: 0,
    EndTime: 10,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0"},
    },
    },
    },
    }
    // Test: Filter by species "Kiwi", at file 2, segment 0
    // Should report current segment as 2 (first Kiwi in df1 + first Kiwi in df2)
    state := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df1, df2})
    state.FileIdx = 1 // at df2
    state.SegmentIdx = 0
    if got := state.CurrentSegmentNumber(); got != 2 {
    t.Errorf("Species=Kiwi, at file 2, seg 0: expected current segment 2, got %d", got)
    }
    }
    func TestCertaintyFiltering(t *testing.T) {
    // Create test data files with different certainty levels
    df := &utils.DataFile{
    FilePath: "/test/file1.data",
    Segments: []*utils.Segment{
    {
    StartTime: 0,
    EndTime: 10,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0", Certainty: 70},
    },
    },
    {
    StartTime: 10,
    EndTime: 20,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0", Certainty: 100},
    },
    },
    {
    StartTime: 20,
    EndTime: 30,
    Labels: []*utils.Label{
    {Species: "Tomtit", Filter: "model-1.0", Certainty: 70},
    },
    },
    },
    }
    // Test 1: Filter by certainty 70 - should get 2 segments
    state1 := NewClassifyState(ClassifyConfig{Certainty: 70}, []*utils.DataFile{df})
    if got := state1.TotalSegments(); got != 2 {
    t.Errorf("Certainty=70: expected 2 segments, got %d", got)
    }
    // Test 2: Filter by certainty 100 - should get 1 segment
    state2 := NewClassifyState(ClassifyConfig{Certainty: 100}, []*utils.DataFile{df})
    if got := state2.TotalSegments(); got != 1 {
    t.Errorf("Certainty=100: expected 1 segment, got %d", got)
    }
    // Test 3: Filter by certainty 0 - should get 0 segments
    state3 := NewClassifyState(ClassifyConfig{Certainty: 0}, []*utils.DataFile{df})
    if got := state3.TotalSegments(); got != 0 {
    t.Errorf("Certainty=0: expected 0 segments, got %d", got)
    }
    // Test 4: Combined species + certainty
    state4 := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: 70}, []*utils.DataFile{df})
    if got := state4.TotalSegments(); got != 1 {
    t.Errorf("Species=Kiwi + Certainty=70: expected 1 segment, got %d", got)
    }
    }
    func TestSampling(t *testing.T) {
    makeSegs := func(n int) []*utils.Segment {
    s := make([]*utils.Segment, n)
    for i := range s {
    s[i] = &utils.Segment{StartTime: float64(i), EndTime: float64(i + 1)}
    }
    return s
    }
    df1 := &utils.DataFile{FilePath: "/test/f1.data", Segments: makeSegs(6)}
    df2 := &utils.DataFile{FilePath: "/test/f2.data", Segments: makeSegs(4)}
    kept := []*utils.DataFile{df1, df2}
    cached := [][]*utils.Segment{df1.Segments, df2.Segments}
    countTotal := func(c [][]*utils.Segment) int {
    n := 0
    for _, s := range c {
    n += len(s)
    }
    return n
    }
    // 50% of 10 → 5
    k, c := applySampling(kept, cached, 50, rand.New(rand.NewSource(42)))
    if got := countTotal(c); got != 5 {
    t.Errorf("sample 50%%: expected 5, got %d", got)
    }
    // Files must be in original chronological order
    for i := 1; i < len(k); i++ {
    if k[i].FilePath < k[i-1].FilePath {
    t.Errorf("sample 50%%: files out of order at index %d", i)
    }
    }
    // 10% of 10 → 1
    _, c2 := applySampling(kept, cached, 10, rand.New(rand.NewSource(42)))
    if got := countTotal(c2); got != 1 {
    t.Errorf("sample 10%%: expected 1, got %d", got)
    }
    // 1% of 10 → clamp to 1
    _, c3 := applySampling(kept, cached, 1, rand.New(rand.NewSource(42)))
    if got := countTotal(c3); got != 1 {
    t.Errorf("sample 1%%: expected 1 (clamped), got %d", got)
    }
    // 99% of 10 → 9
    _, c4 := applySampling(kept, cached, 99, rand.New(rand.NewSource(42)))
    if got := countTotal(c4); got != 9 {
    t.Errorf("sample 99%%: expected 9, got %d", got)
    }
    }
    func TestCertaintyPruning(t *testing.T) {
    // Simulate the bug: first file has no matching certainty segments
    df1 := &utils.DataFile{
    FilePath: "/test/file1.data",
    Segments: []*utils.Segment{
    {
    StartTime: 0,
    EndTime: 10,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0", Certainty: 70},
    },
    },
    },
    }
    df2 := &utils.DataFile{
    FilePath: "/test/file2.data",
    Segments: []*utils.Segment{
    {
    StartTime: 0,
    EndTime: 10,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0", Certainty: 100},
    },
    },
    },
    }
    // Without pruning (old bug): file1 is first, has no certainty=100 segments
    // CurrentSegment() would return nil even though TotalSegments() > 0
    state := NewClassifyState(ClassifyConfig{Certainty: 100}, []*utils.DataFile{df1, df2})
    // TotalSegments should be 1 (only file2 has certainty 100)
    if got := state.TotalSegments(); got != 1 {
    t.Errorf("Certainty=100: expected 1 segment, got %d", got)
    }
    // CurrentSegment should work if files are properly pruned
    // Note: this test assumes LoadDataFiles does the pruning
    // Here we test the state after manual construction
    }
    }
    func TestCallTypeNoneFiltering(t *testing.T) {
    // Create test data: Kiwi with calltype, Kiwi without, Tomtit without
    df := &utils.DataFile{
    FilePath: "/test/file1.data",
    Segments: []*utils.Segment{
    {
    StartTime: 0,
    EndTime: 10,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0", CallType: "Male"},
    },
    },
    {
    StartTime: 10,
    EndTime: 20,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0"}, // no calltype
    },
    },
    {
    StartTime: 20,
    EndTime: 30,
    Labels: []*utils.Label{
    {Species: "Tomtit", Filter: "model-1.0"}, // no calltype, wrong species
    },
    },
    },
    }
    // Test 1: --species Kiwi+_ should match only Kiwi with no calltype (1 segment)
    state1 := NewClassifyState(ClassifyConfig{Species: "Kiwi", CallType: utils.CallTypeNone, Certainty: -1}, []*utils.DataFile{df})
    if got := state1.TotalSegments(); got != 1 {
    t.Errorf("Species=Kiwi+_: expected 1 segment, got %d", got)
    }
    // Test 2: --species Kiwi should still match all Kiwi (2 segments)
    state2 := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df})
    if got := state2.TotalSegments(); got != 2 {
    t.Errorf("Species=Kiwi: expected 2 segments, got %d", got)
    }
    // Test 3: --species Kiwi+Male should still work as before (1 segment)
    state3 := NewClassifyState(ClassifyConfig{Species: "Kiwi", CallType: "Male", Certainty: -1}, []*utils.DataFile{df})
    if got := state3.TotalSegments(); got != 1 {
    t.Errorf("Species=Kiwi+Male: expected 1 segment, got %d", got)
    }
  • file deletion: calls_classify.go (----------)
    [6.248737][6.622528:622569](),[6.622569][6.605716:605716]()
    package tools
    import (
    "fmt"
    "math/rand"
    "os"
    "path/filepath"
    "slices"
    "sort"
    "strings"
    "time"
    "skraak/utils"
    )
    // KeyBinding maps a key to a species/calltype
    type KeyBinding struct {
    Key string // single char: "k", "n", "p"
    Species string // "Kiwi", "Don't Know", "Morepork"
    CallType string // "Duet", "Female", "Male" (optional)
    }
    // ClassifyConfig holds the configuration for classification
    type ClassifyConfig struct {
    Folder string
    File string
    Filter string
    Species string // scope to this species (optional)
    CallType string // scope to this calltype within species (optional)
    Certainty int // scope to this certainty value, -1 = no filter (optional)
    Sample int // random sample percentage 1-99, -1 = no sampling, 100 = no-op
    Goto string // goto this file on startup (optional, basename match)
    Reviewer string
    Color bool
    ImageSize int // spectrogram display size in pixels (0 = default)
    Sixel bool
    ITerm bool
    Bindings []KeyBinding
    // SecondaryBindings maps a primary binding key to per-species calltype
    // keys. Invoked via Shift+primary-key: the species is labeled without
    // advancing, and the next key is interpreted as a calltype.
    SecondaryBindings map[string]map[string]string
    Night bool
    Day bool
    Lat float64
    Lng float64
    Timezone string
    }
    // ClassifyState holds the current state for TUI
    type ClassifyState struct {
    Config ClassifyConfig
    DataFiles []*utils.DataFile
    filteredSegs [][]*utils.Segment // cached at load time, parallel to DataFiles
    totalSegs int // pre-computed total segment count
    FileIdx int
    SegmentIdx int
    Dirty bool
    Player *utils.AudioPlayer
    PlaybackSpeed float64 // Current playback speed (1.0 = normal, 0.5 = half speed)
    TimeFilteredCount int // files skipped by --night or --day filter
    }
    // BindingResult represents parsed key result
    type BindingResult struct {
    Species string
    CallType string // empty string = remove calltype
    }
    // LoadDataFiles loads all .data files for classification
    // findDataFilePaths resolves the list of .data file paths from config.
    func findDataFilePaths(config ClassifyConfig) ([]string, error) {
    if config.File != "" {
    return []string{config.File}, nil
    }
    paths, err := utils.FindDataFiles(config.Folder)
    if err != nil {
    return nil, fmt.Errorf("find data files: %w", err)
    }
    return paths, nil
    }
    // filterDataFileSegments applies segment and day/night filters to a single data file.
    // Returns the filtered segments and whether the file should be kept.
    // If the file is filtered out (no matching segments, or time-of-day), returns nil, false.
    func filterDataFileSegments(df *utils.DataFile, config ClassifyConfig) ([]*utils.Segment, bool, int) {
    segs := filterSegmentsByLabel(df.Segments, config)
    if segs == nil {
    return nil, false, 0
    }
    timeFiltered := 0
    if config.Night || config.Day {
    keep, tf := filterByTimeOfDay(df.FilePath, config)
    if !keep {
    return nil, false, tf
    }
    }
    if len(filePaths) == 0 {
    return nil, fmt.Errorf("no .data files found")
    }
    var dataFiles []*utils.DataFile
    for _, path := range filePaths {
    df, err := utils.ParseDataFile(path)
    if err != nil {
    continue
    }
    dataFiles = append(dataFiles, df)
    }
    if len(dataFiles) == 0 {
    return nil, fmt.Errorf("no valid .data files")
    }
    sort.Slice(dataFiles, func(i, j int) bool {
    return dataFiles[i].FilePath < dataFiles[j].FilePath
    })
    return dataFiles, nil
    }
    // filterDataFiles applies segment filters to each data file, returning kept files and their segments.
    func filterDataFiles(dataFiles []*utils.DataFile, config ClassifyConfig) ([]*utils.DataFile, [][]*utils.Segment, int) {
    var kept []*utils.DataFile
    var cachedSegs [][]*utils.Segment
    var timeFiltered int
    for _, df := range dataFiles {
    segs, keep, tf := filterDataFileSegments(df, config)
    timeFiltered += tf
    if !keep {
    continue
    }
    kept = append(kept, df)
    cachedSegs = append(cachedSegs, segs)
    }
    total := 0
    for _, segs := range filteredSegs {
    total += len(segs)
    }
    state := &ClassifyState{
    Config: config,
    DataFiles: dataFiles,
    filteredSegs: filteredSegs,
    totalSegs: total,
    TimeFilteredCount: timeFiltered,
    }
    if config.Goto == "" {
    return state, nil
    }
    for i, df := range state.DataFiles {
    base := df.FilePath[strings.LastIndex(df.FilePath, "/")+1:]
    if base == config.Goto {
    state.FileIdx = i
    return state, nil
    }
    }
    return nil, fmt.Errorf("goto file not found (or has no matching segments): %s", config.Goto)
    }
    // applySampling randomly selects sample% of segments from the filtered set.
    // The returned files and segments preserve the original chronological order.
    func applySampling(kept []*utils.DataFile, cachedSegs [][]*utils.Segment, sample int, rng *rand.Rand) ([]*utils.DataFile, [][]*utils.Segment) {
    flat := make([]struct{ fileIdx, segIdx int }, 0)
    for fi, segs := range cachedSegs {
    for si := range segs {
    flat = append(flat, struct{ fileIdx, segIdx int }{fi, si})
    }
    }
    targetCount := max(len(flat)*sample/100, 1)
    rng.Shuffle(len(flat), func(i, j int) { flat[i], flat[j] = flat[j], flat[i] })
    selected := flat[:targetCount]
    // Restore chronological order before rebuilding
    sort.Slice(selected, func(i, j int) bool {
    if selected[i].fileIdx != selected[j].fileIdx {
    return selected[i].fileIdx < selected[j].fileIdx
    }
    return selected[i].segIdx < selected[j].segIdx
    })
    newCached := make([][]*utils.Segment, len(cachedSegs))
    for _, ref := range selected {
    newCached[ref.fileIdx] = append(newCached[ref.fileIdx], cachedSegs[ref.fileIdx][ref.segIdx])
    }
    var newKept []*utils.DataFile
    var finalCached [][]*utils.Segment
    for i, segs := range newCached {
    if len(segs) > 0 {
    newKept = append(newKept, kept[i])
    finalCached = append(finalCached, segs)
    }
    }
    return newKept, finalCached
    }
    // FilteredSegs returns the cached filtered segments parallel to DataFiles.
    func (s *ClassifyState) FilteredSegs() [][]*utils.Segment {
    return s.filteredSegs
    }
    // CurrentFile returns the current data file
    func (s *ClassifyState) CurrentFile() *utils.DataFile {
    if s.FileIdx >= len(s.DataFiles) {
    return nil
    }
    return s.DataFiles[s.FileIdx]
    }
    // CurrentSegment returns the current segment
    func (s *ClassifyState) CurrentSegment() *utils.Segment {
    if s.FileIdx >= len(s.filteredSegs) {
    return nil
    }
    segs := s.filteredSegs[s.FileIdx]
    if s.SegmentIdx >= len(segs) {
    return nil
    }
    return segs[s.SegmentIdx]
    }
    // TotalSegments returns total segments to review
    func (s *ClassifyState) TotalSegments() int {
    return s.totalSegs
    }
    // CurrentSegmentNumber returns 1-based segment number
    func (s *ClassifyState) CurrentSegmentNumber() int {
    count := 0
    for i := 0; i < s.FileIdx; i++ {
    count += len(s.filteredSegs[i])
    }
    return count + s.SegmentIdx + 1
    }
    // NextSegment moves to the next segment, returns false if at end
    func (s *ClassifyState) NextSegment() bool {
    if s.FileIdx >= len(s.filteredSegs) {
    return false
    }
    segs := s.filteredSegs[s.FileIdx]
    if s.SegmentIdx+1 < len(segs) {
    s.SegmentIdx++
    return true
    }
    // Move to next file
    if s.FileIdx+1 < len(s.DataFiles) {
    s.FileIdx++
    s.SegmentIdx = 0
    return true
    }
    return false
    }
    // PrevSegment moves to the previous segment, returns false if at start
    func (s *ClassifyState) PrevSegment() bool {
    if s.SegmentIdx > 0 {
    s.SegmentIdx--
    return true
    }
    // Move to previous file
    if s.FileIdx > 0 {
    s.FileIdx--
    segs := s.filteredSegs[s.FileIdx]
    s.SegmentIdx = max(len(segs)-1, 0)
    return true
    }
    return false
    }
    // ParseKeyBuffer parses a single key into binding result
    func (s *ClassifyState) ParseKeyBuffer(key string) *BindingResult {
    for _, b := range s.Config.Bindings {
    if b.Key == key {
    return &BindingResult{
    Species: b.Species,
    CallType: b.CallType,
    }
    }
    }
    return nil
    }
    // SetComment sets the comment on the current segment's filter label.
    // Returns the previous comment (for undo) or empty string if none.
    func (s *ClassifyState) SetComment(comment string) string {
    seg := s.CurrentSegment()
    if seg == nil {
    return ""
    }
    df := s.CurrentFile()
    if df == nil {
    return ""
    }
    // Set reviewer
    df.Meta.Reviewer = s.Config.Reviewer
    // Get labels matching filter
    filterLabels := seg.GetFilterLabels(s.Config.Filter)
    var oldComment string
    if len(filterLabels) == 0 {
    // No matching labels, add new one with comment
    label := &utils.Label{
    Species: "Don't Know",
    Certainty: 0,
    Filter: s.Config.Filter,
    Comment: comment,
    }
    seg.Labels = append(seg.Labels, label)
    } else {
    // Set comment on first matching label
    oldComment = filterLabels[0].Comment
    filterLabels[0].Comment = comment
    }
    s.Dirty = true
    return oldComment
    }
    // GetCurrentComment returns the comment on the current segment's filter label.
    func (s *ClassifyState) GetCurrentComment() string {
    seg := s.CurrentSegment()
    if seg == nil {
    return ""
    }
    filterLabels := seg.GetFilterLabels(s.Config.Filter)
    if len(filterLabels) == 0 {
    return ""
    }
    return filterLabels[0].Comment
    }
    // ApplyBinding applies a binding result to the current segment
    func (s *ClassifyState) ApplyBinding(result *BindingResult) {
    seg := s.CurrentSegment()
    if seg == nil {
    return
    }
    df := s.CurrentFile()
    if df == nil {
    return
    }
    // Set reviewer
    df.Meta.Reviewer = s.Config.Reviewer
    // Get labels matching filter
    filterLabels := seg.GetFilterLabels(s.Config.Filter)
    // Determine certainty: 0 for Don't Know, 100 for others
    certainty := 100
    if result.Species == "Don't Know" {
    certainty = 0
    }
    if len(filterLabels) == 0 {
    // No matching labels, add new one
    seg.Labels = append(seg.Labels, &utils.Label{
    Species: result.Species,
    Certainty: certainty,
    Filter: s.Config.Filter,
    CallType: result.CallType,
    })
    } else {
    // Edit first matching label, remove rest
    filterLabels[0].Species = result.Species
    filterLabels[0].Certainty = certainty
    filterLabels[0].CallType = result.CallType // always set (empty = remove)
    // Remove extra matching labels
    if len(filterLabels) > 1 {
    var newLabels []*utils.Label
    for _, l := range seg.Labels {
    keep := !slices.Contains(filterLabels[1:], l)
    if keep {
    newLabels = append(newLabels, l)
    }
    }
    seg.Labels = newLabels
    }
    }
    // Re-sort labels
    sort.Slice(seg.Labels, func(i, j int) bool {
    return seg.Labels[i].Species < seg.Labels[j].Species
    })
    s.Dirty = true
    }
    // ApplyCallTypeOnly sets the CallType on the current segment's first
    // filter-matching label. Used after a Shift+primary keypress labeled the
    // species and we now receive the secondary key for the calltype.
    // No-op if there is no matching label to update.
    func (s *ClassifyState) ApplyCallTypeOnly(callType string) {
    seg := s.CurrentSegment()
    if seg == nil {
    return
    }
    df := s.CurrentFile()
    if df == nil {
    return
    }
    filterLabels := seg.GetFilterLabels(s.Config.Filter)
    if len(filterLabels) == 0 {
    return
    }
    df.Meta.Reviewer = s.Config.Reviewer
    filterLabels[0].CallType = callType
    s.Dirty = true
    }
    // HasSecondary reports whether the given primary key has any secondary
    // (calltype) bindings configured.
    func (s *ClassifyState) HasSecondary(primaryKey string) bool {
    return len(s.Config.SecondaryBindings[primaryKey]) > 0
    }
    // ConfirmLabel upgrades the current segment's existing filter label certainty
    // to 100. Returns true if a write is needed (label existed and was below 100).
    // Returns false for Don't Know (certainty=0) — confirming a Don't Know is a no-op;
    // the caller should just advance to the next segment.
    func (s *ClassifyState) ConfirmLabel() bool {
    seg := s.CurrentSegment()
    if seg == nil {
    return false
    }
    filterLabels := seg.GetFilterLabels(s.Config.Filter)
    if len(filterLabels) == 0 {
    return false
    }
    if filterLabels[0].Certainty == 0 {
    return false
    }
    if filterLabels[0].Certainty == 100 {
    return false
    }
    df := s.CurrentFile()
    if df == nil {
    return false
    }
    df.Meta.Reviewer = s.Config.Reviewer
    filterLabels[0].Certainty = 100
    s.Dirty = true
    return true
    }
    // Save saves the current file
    func (s *ClassifyState) Save() error {
    df := s.CurrentFile()
    if df == nil {
    return nil
    }
    if !s.Dirty {
    return nil
    }
    err := df.Write(df.FilePath)
    if err != nil {
    return err
    }
    s.Dirty = false
    return nil
    }
    // getFilterLabel returns the label matching the current filter, or first label if no filter.
    func (s *ClassifyState) getFilterLabel(seg *utils.Segment) *utils.Label {
    if s.Config.Filter == "" {
    if len(seg.Labels) > 0 {
    return seg.Labels[0]
    }
    return nil
    }
    for _, label := range seg.Labels {
    if label.Filter == s.Config.Filter {
    return label
    }
    }
    return nil
    }
    // getOrCreateFilterLabel gets existing label or creates new one for the current filter.
    func (s *ClassifyState) getOrCreateFilterLabel(seg *utils.Segment) *utils.Label {
    label := s.getFilterLabel(seg)
    if label != nil {
    return label
    }
    // Create new label
    label = &utils.Label{
    Species: "Don't Know",
    Certainty: 0,
    Filter: s.Config.Filter,
    }
    seg.Labels = append(seg.Labels, label)
    s.Dirty = true
    return label
    }
    // HasBookmark returns true if current segment has a bookmark on the filter label.
    func (s *ClassifyState) HasBookmark() bool {
    seg := s.CurrentSegment()
    if seg == nil {
    return false
    }
    label := s.getFilterLabel(seg)
    return label != nil && label.Bookmark
    }
    // ToggleBookmark toggles the bookmark on the current segment's filter label.
    func (s *ClassifyState) ToggleBookmark() {
    seg := s.CurrentSegment()
    if seg == nil {
    return
    }
    df := s.CurrentFile()
    if df == nil {
    return
    }
    // Set reviewer
    df.Meta.Reviewer = s.Config.Reviewer
    label := s.getOrCreateFilterLabel(seg)
    label.Bookmark = !label.Bookmark
    s.Dirty = true
    }
    // NextBookmark navigates to the next bookmark, wrapping around if needed.
    // Returns false if no bookmarks found (back at start position).
    func (s *ClassifyState) NextBookmark() bool {
    startFile := s.FileIdx
    startSeg := s.SegmentIdx
    first := true
    for {
    // Advance to next segment
    if !s.NextSegment() {
    // Wrap to start of folder
    s.FileIdx = 0
    s.SegmentIdx = 0
    }
    // Check if we've looped back to start
    if !first && s.FileIdx == startFile && s.SegmentIdx == startSeg {
    return false // full circle, no bookmark found
    }
    first = false
    // Check if current segment has bookmark
    if s.hasFilterBookmark() {
    return true
    }
    }
    }
    // PrevBookmark navigates to the previous bookmark, wrapping around if needed.
    // Returns false if no bookmarks found (back at start position).
    func (s *ClassifyState) PrevBookmark() bool {
    startFile := s.FileIdx
    startSeg := s.SegmentIdx
    first := true
    for {
    // Move to previous segment
    if !s.PrevSegment() {
    // Wrap to end of folder
    s.FileIdx = len(s.DataFiles) - 1
    segs := s.filteredSegs[s.FileIdx]
    s.SegmentIdx = max(len(segs)-1, 0)
    }
    // Check if we've looped back to start
    if !first && s.FileIdx == startFile && s.SegmentIdx == startSeg {
    return false // full circle, no bookmark found
    }
    first = false
    // Check if current segment has bookmark
    if s.hasFilterBookmark() {
    return true
    }
    }
    }
    // hasFilterBookmark checks if current segment has bookmark on filter-matching label.
    func (s *ClassifyState) hasFilterBookmark() bool {
    seg := s.CurrentSegment()
    if seg == nil {
    return false
    }
    label := s.getFilterLabel(seg)
    return label != nil && label.Bookmark
    }
    // FormatLabels formats labels for display
    func FormatLabels(labels []*utils.Label, filter string) string {
    var parts []string
    for _, l := range labels {
    if filter != "" && l.Filter != filter {
    continue
    }
    part := l.Species
    if l.CallType != "" {
    part += "/" + l.CallType
    }
    part += fmt.Sprintf(" (%d%%)", l.Certainty)
    if l.Filter != "" {
    part += " [" + l.Filter + "]"
    }
    if l.Comment != "" {
    part += fmt.Sprintf(" \"%s\"", l.Comment)
    }
    parts = append(parts, part)
    }
    return strings.Join(parts, ", ")
    }
    // buildClassifyState constructs the ClassifyState, handling --goto file positioning.
    func buildClassifyState(config ClassifyConfig, dataFiles []*utils.DataFile, filteredSegs [][]*utils.Segment, timeFiltered int) (*ClassifyState, error) {
    return kept, cachedSegs, timeFiltered
    }
    func LoadDataFiles(config ClassifyConfig) (*ClassifyState, error) {
    filePaths, err := findDataFilePaths(config)
    if err != nil {
    return nil, err
    }
    dataFiles, err := parseAndSortDataFiles(config)
    if err != nil {
    return nil, err
    }
    kept, cachedSegs, timeFiltered := filterDataFiles(dataFiles, config)
    if config.Sample > 0 && config.Sample < 100 {
    rng := rand.New(rand.NewSource(time.Now().UnixNano()))
    kept, cachedSegs = applySampling(kept, cachedSegs, config.Sample, rng)
    }
    return buildClassifyState(config, kept, cachedSegs, timeFiltered)
    }
    // parseAndSortDataFiles finds, parses, and sorts .data files from the config.
    func parseAndSortDataFiles(config ClassifyConfig) ([]*utils.DataFile, error) {
    // filterByTimeOfDay checks --night/--day time-of-day filter for a .data file.
    // Returns (keep, timeFilteredCount).
    func filterByTimeOfDay(dataFilePath string, config ClassifyConfig) (bool, int) {
    wavPath := filepath.Clean(strings.TrimSuffix(dataFilePath, ".data"))
    result, err := IsNight(IsNightInput{
    FilePath: wavPath,
    Lat: config.Lat,
    Lng: config.Lng,
    Timezone: config.Timezone,
    })
    if err != nil {
    fmt.Fprintf(os.Stderr, "warning: skipping %s (isnight error: %v)\n", wavPath, err)
    return false, 1
    }
    if config.Night && !result.SolarNight {
    return false, 1
    }
    if config.Day && !result.DiurnalActive {
    return false, 1
    }
    return true, 0
    }
    return segs, true, timeFiltered
    }
    }
    // filterSegmentsByLabel applies label/species/certainty filters, returning matching segments.
    // Returns nil if no segments match (caller should skip the file).
    func filterSegmentsByLabel(segments []*utils.Segment, config ClassifyConfig) []*utils.Segment {
    hasFilter := config.Filter != "" || config.Species != "" || config.Certainty >= 0
    if !hasFilter {
    return segments
    }
    var segs []*utils.Segment
    for _, seg := range segments {
    if seg.SegmentMatchesFilters(config.Filter, config.Species, config.CallType, config.Certainty) {
    segs = append(segs, seg)
    }
    }
    return segs // nil if empty, caller treats as "skip"
  • file deletion: bulk_file_import.go (----------)
    [6.248737][6.638264:638307](),[6.638307][6.622571:622571]()
    package tools
    import (
    "context"
    "database/sql"
    "encoding/csv"
    "fmt"
    "os"
    "path/filepath"
    "strconv"
    "strings"
    "time"
    "skraak/db"
    "skraak/utils"
    )
    // BulkFileImportInput defines the input parameters for the bulk_file_import tool
    type BulkFileImportInput struct {
    DatasetID string `json:"dataset_id"`
    CSVPath string `json:"csv_path"`
    LogFilePath string `json:"log_file_path"`
    }
    // BulkFileImportOutput defines the output structure for the bulk_file_import tool
    type BulkFileImportOutput struct {
    TotalLocations int `json:"total_locations"`
    ClustersCreated int `json:"clusters_created"`
    ClustersExisting int `json:"clusters_existing"`
    TotalFilesScanned int `json:"total_files_scanned"`
    FilesImported int `json:"files_imported"`
    FilesDuplicate int `json:"files_duplicate"`
    FilesError int `json:"files_error"`
    ProcessingTime string `json:"processing_time"`
    Errors []string `json:"errors,omitempty"`
    }
    // bulkLocationData holds CSV row data for a location
    type bulkLocationData struct {
    LocationName string
    LocationID string
    DirectoryPath string
    DateRange string
    SampleRate int
    FileCount int
    }
    // bulkImportStats tracks import statistics for a single cluster
    type bulkImportStats struct {
    TotalFiles int
    ImportedFiles int
    DuplicateFiles int
    ErrorFiles int
    }
    // progressLogger handles writing to both log file and internal buffer
    type progressLogger struct {
    file *os.File
    buffer *strings.Builder
    }
    // Log writes a formatted message with timestamp to both log file and buffer
    func (l *progressLogger) Log(format string, args ...any) {
    timestamp := time.Now().Format("2006-01-02 15:04:05")
    message := fmt.Sprintf(format, args...)
    line := fmt.Sprintf("[%s] %s\n", timestamp, message)
    // Write to file; log write failures are non-fatal for import progress
    if _, err := l.file.WriteString(line); err != nil {
    fmt.Fprintf(os.Stderr, "Warning: log write failed: %v\n", err)
    }
    if err := l.file.Sync(); err != nil {
    fmt.Fprintf(os.Stderr, "Warning: log sync failed: %v\n", err)
    }
    // Also keep in memory for potential error reporting
    l.buffer.WriteString(line)
    }
    // BulkFileImport imports WAV files across multiple locations using CSV specification
    func BulkFileImport(
    ctx context.Context,
    input BulkFileImportInput,
    ) (BulkFileImportOutput, error) {
    startTime := time.Now()
    var output BulkFileImportOutput
    // Open log file
    logFile, err := os.OpenFile(input.LogFilePath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
    if err != nil {
    return output, fmt.Errorf("failed to open log file: %w", err)
    }
    defer func() { _ = logFile.Close() }()
    logger := &progressLogger{
    file: logFile,
    buffer: &strings.Builder{},
    }
    logger.Log("Starting bulk file import for dataset %s", input.DatasetID)
    // Phase 0: Validate input
    logger.Log("Validating input parameters...")
    if err := bulkValidateInput(input); err != nil {
    logger.Log("ERROR: Validation failed: %v", err)
    output.failOutput([]string{fmt.Sprintf("validation failed: %v", err)}, startTime)
    return output, fmt.Errorf("validation failed: %w", err)
    }
    logger.Log("Validation complete")
    // Phase 1: Read CSV
    logger.Log("Reading CSV file: %s", input.CSVPath)
    locations, err := bulkReadCSV(input.CSVPath)
    if err != nil {
    logger.Log("ERROR: Failed to read CSV: %v", err)
    output.failOutput([]string{fmt.Sprintf("failed to read CSV: %v", err)}, startTime)
    return output, fmt.Errorf("failed to read CSV: %w", err)
    }
    logger.Log("Loaded %d locations from CSV", len(locations))
    output.TotalLocations = len(locations)
    // Phase 1.5: Validate all location_ids belong to the dataset
    logger.Log("Validating location_ids belong to dataset...")
    output.failOutput([]string{err.Error()}, startTime)
    return output, err
    }
    logger.Log("Location validation complete")
    // Phase 2: Create/Validate Clusters
    logger.Log("=== Phase 1: Creating/Validating Clusters ===")
    if err != nil {
    logger.Log("ERROR: Failed to open database: %v", err)
    output.failOutput([]string{fmt.Sprintf("failed to open database: %v", err)}, startTime)
    return output, fmt.Errorf("failed to open database: %w", err)
    }
    defer database.Close()
    clusterIDMap, created, existing, err := bulkCreateClusters(ctx, database, logger, locations, input.DatasetID)
    if err != nil {
    output.failOutput(output.Errors, startTime)
    return output, err
    }
    logger.Log("=== Phase 2: Importing Files ===")
    fileStats, errs := bulkImportAllFiles(database, logger, locations, clusterIDMap, input.DatasetID)
    output.TotalFilesScanned = fileStats.TotalFiles
    output.FilesImported = fileStats.ImportedFiles
    output.FilesDuplicate = fileStats.DuplicateFiles
    output.FilesError = fileStats.ErrorFiles
    output.Errors = append(output.Errors, errs...)
    if len(errs) > 0 {
    output.ProcessingTime = time.Since(startTime).String()
    return output, fmt.Errorf("failed to import files: %s", errs[0])
    }
    logger.Log("=== Import Complete ===")
    logger.Log("Total files scanned: %d", fileStats.TotalFiles)
    logger.Log("Files imported: %d", fileStats.ImportedFiles)
    logger.Log("Duplicates skipped: %d", fileStats.DuplicateFiles)
    logger.Log("Errors: %d", fileStats.ErrorFiles)
    logger.Log("Processing time: %s", time.Since(startTime).Round(time.Second))
    output.ProcessingTime = time.Since(startTime).String()
    return output, nil
    }
    // bulkValidateInput validates input parameters
    func bulkValidateInput(input BulkFileImportInput) error {
    // Validate ID format first (fast fail before DB queries)
    if err := utils.ValidateShortID(input.DatasetID, "dataset_id"); err != nil {
    return err
    }
    // Verify CSV file exists
    if _, err := os.Stat(input.CSVPath); err != nil {
    return fmt.Errorf("CSV file not accessible: %w", err)
    }
    // Verify log file path is writable
    logDir := filepath.Dir(input.LogFilePath)
    if _, err := os.Stat(logDir); err != nil {
    return fmt.Errorf("log file directory not accessible: %w", err)
    }
    // Open database for validation queries
    if err != nil {
    return fmt.Errorf("failed to open database: %w", err)
    }
    defer database.Close()
    // Verify dataset exists and is structured
    if err := db.ValidateDatasetTypeForImport(database, input.DatasetID); err != nil {
    return err
    }
    return nil
    }
    // bulkValidateLocationsBelongToDataset validates that all unique location_ids in the CSV belong to the dataset
    func bulkValidateLocationsBelongToDataset(dbConn *sql.DB, locations []bulkLocationData, datasetID string) []string {
    var errors []string
    // Collect unique location_ids
    uniqueLocations := make(map[string]bool)
    for _, loc := range locations {
    uniqueLocations[loc.LocationID] = true
    }
    // Validate each unique location_id
    for locationID := range uniqueLocations {
    if err := db.ValidateLocationBelongsToDataset(dbConn, locationID, datasetID); err != nil {
    errors = append(errors, err.Error())
    }
    }
    return errors
    }
    var clusterID string
    if err == sql.ErrNoRows {
    clusterID, err = bulkCreateCluster(ctx, database, datasetID, loc.LocationID, loc.DateRange, loc.SampleRate)
    if err != nil {
    logger.Log("ERROR: Failed to create cluster for location %s: %v", loc.LocationName, err)
    return nil, 0, 0, fmt.Errorf("failed to create cluster: %w", err)
    }
    logger.Log(" Created cluster: %s", clusterID)
    created++
    } else if err != nil {
    logger.Log("ERROR: Failed to check cluster for location %s: %v", loc.LocationName, err)
    return nil, 0, 0, fmt.Errorf("failed to check cluster: %w", err)
    } else {
    clusterID = existingClusterID
    logger.Log(" Using existing cluster: %s", clusterID)
    existing++
    }
    compositeKey := loc.LocationID + "|" + loc.DateRange
    clusterIDMap[compositeKey] = clusterID
    }
    return clusterIDMap, created, existing, nil
    }
    // bulkImportAllFiles imports files for all locations using the cluster ID map.
    // Returns aggregate stats and any error messages.
    func bulkImportAllFiles(database *sql.DB, logger *progressLogger, locations []bulkLocationData, clusterIDMap map[string]string, datasetID string) (bulkImportStats, []string) {
    var total bulkImportStats
    var errs []string
    for i, loc := range locations {
    compositeKey := loc.LocationID + "|" + loc.DateRange
    clusterID, ok := clusterIDMap[compositeKey]
    if !ok {
    continue
    }
    logger.Log("[%d/%d] Importing files for: %s", i+1, len(locations), loc.LocationName)
    logger.Log(" Directory: %s", loc.DirectoryPath)
    if _, err := os.Stat(loc.DirectoryPath); os.IsNotExist(err) {
    logger.Log(" WARNING: Directory not found, skipping")
    continue
    }
    stats, err := bulkImportFilesForCluster(database, logger, loc.DirectoryPath, datasetID, loc.LocationID, clusterID)
    if err != nil {
    errMsg := fmt.Sprintf("Failed to import files for location %s: %v", loc.LocationName, err)
    logger.Log("ERROR: %s", errMsg)
    return total, []string{errMsg}
    }
    logger.Log(" Scanned: %d files", stats.TotalFiles)
    logger.Log(" Imported: %d, Duplicates: %d", stats.ImportedFiles, stats.DuplicateFiles)
    if stats.ErrorFiles > 0 {
    logger.Log(" Errors: %d files", stats.ErrorFiles)
    }
    total.TotalFiles += stats.TotalFiles
    total.ImportedFiles += stats.ImportedFiles
    total.DuplicateFiles += stats.DuplicateFiles
    total.ErrorFiles += stats.ErrorFiles
    }
    return total, errs
    }
    func bulkReadCSV(path string) ([]bulkLocationData, error) {
    file, err := os.Open(path)
    if err != nil {
    return nil, err
    }
    defer func() { _ = file.Close() }()
    reader := csv.NewReader(file)
    records, err := reader.ReadAll()
    if err != nil {
    return nil, err
    }
    if len(records) == 0 {
    return nil, fmt.Errorf("CSV file is empty")
    }
    var locations []bulkLocationData
    for i, record := range records {
    if i == 0 {
    continue // Skip header
    }
    if len(record) < 6 {
    return nil, fmt.Errorf("CSV row %d has insufficient columns (expected 6, got %d)", i+1, len(record))
    }
    // Validate required string fields are non-empty
    locationName := strings.TrimSpace(record[0])
    if locationName == "" {
    return nil, fmt.Errorf("empty location_name in row %d", i+1)
    }
    directoryPath := strings.TrimSpace(record[2])
    if directoryPath == "" {
    return nil, fmt.Errorf("empty directory_path in row %d", i+1)
    }
    dateRange := strings.TrimSpace(record[3])
    if dateRange == "" {
    return nil, fmt.Errorf("empty date_range in row %d", i+1)
    }
    // Validate location_id format
    locationID := record[1]
    if err := utils.ValidateShortID(locationID, "location_id"); err != nil {
    return nil, fmt.Errorf("invalid location_id in row %d: %v", i+1, err)
    }
    sampleRate, err := strconv.Atoi(record[4])
    if err != nil {
    return nil, fmt.Errorf("invalid sample_rate in row %d: %v", i+1, err)
    }
    // Validate sample rate is in reasonable range
    if err := utils.ValidateSampleRate(sampleRate); err != nil {
    return nil, fmt.Errorf("invalid sample_rate in row %d: %v", i+1, err)
    }
    fileCount, err := strconv.Atoi(record[5])
    if err != nil {
    return nil, fmt.Errorf("invalid file_count in row %d: %v", i+1, err)
    }
    locations = append(locations, bulkLocationData{
    LocationName: locationName,
    LocationID: locationID,
    DirectoryPath: directoryPath,
    DateRange: dateRange,
    SampleRate: sampleRate,
    FileCount: fileCount,
    })
    }
    return locations, nil
    }
    // bulkCreateCluster creates a new cluster in the database
    func bulkCreateCluster(ctx context.Context, database *sql.DB, datasetID, locationID, name string, sampleRate int) (string, error) {
    // Generate a 12-character nanoid
    clusterID, err := utils.GenerateShortID()
    if err != nil {
    return "", fmt.Errorf("failed to generate cluster ID: %v", err)
    }
    now := time.Now().UTC()
    // Get location name for the path
    var locationName string
    err = database.QueryRow("SELECT name FROM location WHERE id = ?", locationID).Scan(&locationName)
    if err != nil {
    return "", fmt.Errorf("failed to get location name: %v", err)
    }
    // Normalize path: replace spaces and special characters
    path := strings.ReplaceAll(locationName, " ", "_")
    path = strings.ReplaceAll(path, "/", "_")
    tx, err := db.BeginLoggedTx(ctx, database, "bulk_file_import")
    if err != nil {
    return "", fmt.Errorf("failed to begin transaction: %w", err)
    }
    defer tx.Rollback()
    _, err = tx.ExecContext(ctx, `
    INSERT INTO cluster (id, dataset_id, location_id, name, path, sample_rate, active, created_at, last_modified)
    VALUES (?, ?, ?, ?, ?, ?, true, ?, ?)
    `, clusterID, datasetID, locationID, name, path, sampleRate, now, now)
    if err != nil {
    return "", fmt.Errorf("failed to insert cluster: %w", err)
    }
    if err = tx.Commit(); err != nil {
    return "", fmt.Errorf("failed to commit cluster creation: %w", err)
    }
    return clusterID, nil
    }
    // bulkImportFilesForCluster imports all WAV files for a single cluster
    func bulkImportFilesForCluster(database *sql.DB, logger *progressLogger, folderPath, datasetID, locationID, clusterID string) (*bulkImportStats, error) {
    stats := &bulkImportStats{}
    // Check if directory exists
    if _, err := os.Stat(folderPath); os.IsNotExist(err) {
    logger.Log(" WARNING: Directory not found, skipping")
    return stats, nil
    }
    // Import the cluster (SAME LOGIC AS import_files.go)
    logger.Log(" Importing cluster %s", clusterID)
    FolderPath: folderPath,
    DatasetID: datasetID,
    LocationID: locationID,
    ClusterID: clusterID,
    Recursive: true,
    })
    if err != nil {
    return nil, err
    }
    // Map to bulk import stats
    stats.TotalFiles = clusterOutput.TotalFiles
    stats.ImportedFiles = clusterOutput.ImportedFiles
    stats.DuplicateFiles = clusterOutput.SkippedFiles
    stats.ErrorFiles = clusterOutput.FailedFiles
    // Log errors
    for i, fileErr := range clusterOutput.Errors {
    if i < 5 { // Log first 5
    logger.Log(" ERROR: %s: %s", fileErr.FileName, fileErr.Error)
    }
    }
    logger.Log(" Complete: %d imported, %d duplicates, %d errors", stats.ImportedFiles, stats.DuplicateFiles, stats.ErrorFiles)
    return stats, nil
    }
    if err := tx.Commit(); err != nil {
    return nil, fmt.Errorf("transaction commit failed: %w", err)
    }
    tx.Rollback()
    ctx := context.Background()
    tx, err := db.BeginLoggedTx(ctx, database, "import_audio_files")
    if err != nil {
    return nil, fmt.Errorf("failed to begin transaction: %w", err)
    }
    clusterOutput, err := utils.ImportCluster(database, tx.UnderlyingTx(), utils.ClusterImportInput{
    // bulkCreateClusters creates or validates clusters for all locations.
    // Returns the cluster ID map, counts of created/existing clusters, and any error.
    func bulkCreateClusters(ctx context.Context, database *sql.DB, logger *progressLogger, locations []bulkLocationData, datasetID string) (map[string]string, int, int, error) {
    clusterIDMap := make(map[string]string)
    created := 0
    existing := 0
    for i, loc := range locations {
    logger.Log("[%d/%d] Processing location: %s", i+1, len(locations), loc.LocationName)
    var existingClusterID string
    err := database.QueryRow(`
    SELECT id FROM cluster
    WHERE location_id = ? AND name = ? AND active = true
    `, loc.LocationID, loc.DateRange).Scan(&existingClusterID)
    }
    // bulkValidateLocations validates that all location_ids in the CSV belong to the dataset.
    // Returns an error if validation fails.
    readDB, err := db.OpenReadOnlyDB(dbPath)
    if err != nil {
    logger.Log("ERROR: Failed to open database: %v", err)
    return fmt.Errorf("failed to open database: %w", err)
    }
    locationErrors := bulkValidateLocationsBelongToDataset(readDB, locations, datasetID)
    readDB.Close()
    if len(locationErrors) > 0 {
    for _, locErr := range locationErrors {
    logger.Log("ERROR: %s", locErr)
    }
    return fmt.Errorf("location validation failed: %d location(s) do not belong to dataset %s", len(locationErrors), datasetID)
    }
    return nil
    func bulkValidateLocations(logger *progressLogger, locations []bulkLocationData, datasetID string, dbPath string) error {
    database, err := db.OpenReadOnlyDB(resolveDBPath(input.DBPath))
    // Phase 3: Import files
    output.ClustersCreated = created
    output.ClustersExisting = existing
    database, err := db.OpenWriteableDB(resolveDBPath(input.DBPath))
    if err := bulkValidateLocations(logger, locations, input.DatasetID, resolveDBPath(input.DBPath)); err != nil {
    }
    // BulkFileImport imports WAV files across multiple locations using CSV specification
    // failOutput sets error details and processing time on the output before returning.
    func (o *BulkFileImportOutput) failOutput(errs []string, startTime time.Time) {
    o.Errors = errs
    o.ProcessingTime = time.Since(startTime).String()
    DBPath string `json:"db_path"`
  • edit in tools/update_test.go at line 65
    [6.250266][6.250266:250286]()
    SetDBPath(dbPath)
  • edit in tools/update_test.go at line 71
    [6.250436]
    [6.250436]
    DBPath: dbPath,
  • edit in tools/update_test.go at line 97
    [6.251252]
    [6.251252]
    DBPath: dbPath,
  • edit in tools/update_test.go at line 125
    [6.252154][6.252154:252174]()
    SetDBPath(dbPath)
  • replacement in tools/update_test.go at line 127
    [6.252227][6.252227:252319]()
    dsCreated, err := CreateOrUpdateDataset(context.Background(), DatasetInput{Name: &dsName})
    [6.252227]
    [6.252319]
    dsCreated, err := CreateOrUpdateDataset(context.Background(), DatasetInput{DBPath: dbPath, Name: &dsName})
  • edit in tools/update_test.go at line 139
    [6.252577]
    [6.252577]
    DBPath: dbPath,
  • edit in tools/update_test.go at line 165
    [6.253317]
    [6.253317]
    DBPath: dbPath,
  • edit in tools/update_test.go at line 199
    [6.254530][6.254530:254550]()
    SetDBPath(dbPath)
  • replacement in tools/update_test.go at line 201
    [6.254608][6.254608:254700]()
    dsCreated, err := CreateOrUpdateDataset(context.Background(), DatasetInput{Name: &dsName})
    [6.254608]
    [6.254700]
    dsCreated, err := CreateOrUpdateDataset(context.Background(), DatasetInput{DBPath: dbPath, Name: &dsName})
  • edit in tools/update_test.go at line 210
    [6.254931]
    [6.254931]
    DBPath: dbPath,
  • edit in tools/update_test.go at line 226
    [6.255277]
    [6.255277]
    DBPath: dbPath,
  • edit in tools/update_test.go at line 243
    [6.255717]
    [6.255717]
    DBPath: dbPath,
  • edit in tools/update_test.go at line 271
    [6.256635][6.256635:256655]()
    SetDBPath(dbPath)
  • edit in tools/update_test.go at line 275
    [6.256749]
    [6.256749]
    DBPath: dbPath,
  • edit in tools/update_test.go at line 297
    [6.257297]
    [6.257297]
    DBPath: dbPath,
  • edit in tools/update_test.go at line 321
    [6.257999][6.257999:258019]()
    SetDBPath(dbPath)
  • replacement in tools/update_test.go at line 324
    [6.258065][6.258065:258153]()
    created, err := CreateOrUpdateDataset(context.Background(), DatasetInput{Name: &name})
    [6.258065]
    [6.258153]
    created, err := CreateOrUpdateDataset(context.Background(), DatasetInput{DBPath: dbPath, Name: &name})
  • replacement in tools/update_test.go at line 331
    [6.258291][6.258291:258318]()
    ID: &created.Dataset.ID,
    [6.258291]
    [6.258318]
    DBPath: dbPath,
    ID: &created.Dataset.ID,
  • edit in tools/sql.go at line 14
    [6.259494][6.259494:259544](),[6.259544][4.55:144](),[4.144][6.259544:259621](),[6.259544][6.259544:259621](),[6.259621][4.145:234](),[4.234][6.259666:259711](),[6.259666][6.259666:259711](),[6.259711][4.235:237]()
    // Package-level variable to store database path
    // Deprecated: use Input.DBPath instead. Will be removed after all callers are migrated.
    var dbPath string
    // SetDBPath sets the database path for the tools package
    // Deprecated: use Input.DBPath instead. Will be removed after all callers are migrated.
    func SetDBPath(path string) {
    dbPath = path
    }
  • replacement in tools/sql.go at line 15
    [4.238][4.238:444]()
    // resolveDBPath returns the DBPath from the input if set, otherwise falls back
    // to the package-level dbPath. This supports the incremental migration from
    // the global variable to explicit input fields.
    [4.238]
    [4.444]
    // resolveDBPath returns the DBPath from the input if set, otherwise returns
    // the empty string. Callers that need a fallback should use db.ResolveDBPath.
  • replacement in tools/sql.go at line 18
    [4.490][4.490:549]()
    if inputPath != "" {
    return inputPath
    }
    return dbPath
    [4.490]
    [6.259711]
    return db.ResolveDBPath(inputPath, "")
  • edit in tools/pattern_test.go at line 16
    [6.276774][6.276774:276794]()
    SetDBPath(testDB)
  • edit in tools/pattern_test.go at line 24
    [6.277050]
    [6.277050]
    DBPath: testDB,
  • edit in tools/pattern_test.go at line 58
    [6.277928]
    [6.277928]
    DBPath: testDB,
  • replacement in tools/pattern_test.go at line 82
    [6.278573][6.278573:278626]()
    output2, err2 := CreateOrUpdatePattern(ctx, input)
    [6.278573]
    [6.278626]
    input2 := PatternInput{
    DBPath: testDB,
    RecordSeconds: &record,
    SleepSeconds: &sleep,
    }
    output2, err2 := CreateOrUpdatePattern(ctx, input2)
  • edit in tools/pattern_test.go at line 106
    [6.279169][6.279169:279189]()
    SetDBPath(testDB)
  • edit in tools/pattern_test.go at line 125
    [6.279681]
    [6.279681]
    DBPath: testDB,
  • edit in tools/pattern_test.go at line 143
    [6.280148][6.280148:280168]()
    SetDBPath(testDB)
  • edit in tools/pattern_test.go at line 149
    [6.280318]
    [6.280318]
    DBPath: testDB,
  • replacement in tools/pattern_test.go at line 163
    [6.280594][6.280594:280606]()
    ID: &id,
    [6.280594]
    [6.280606]
    DBPath: testDB,
    ID: &id,
  • edit in tools/integration_test.go at line 15
    [6.304268][6.304268:304287]()
    SetDBPath(testDB)
  • replacement in tools/integration_test.go at line 18
    [6.304420][6.3948:3984]()
    testQueryExistingPatterns(t, ctx)
    [6.304420]
    [6.304955]
    testQueryExistingPatterns(t, ctx, testDB)
  • replacement in tools/integration_test.go at line 22
    [6.305071][6.3985:4024]()
    testCreateClusterWithPattern(t, ctx)
    [6.305071]
    [6.4024]
    testCreateClusterWithPattern(t, ctx, testDB)
  • replacement in tools/integration_test.go at line 26
    [6.305432][6.4031:4099]()
    func testQueryExistingPatterns(t *testing.T, ctx context.Context) {
    [6.305432]
    [6.4099]
    func testQueryExistingPatterns(t *testing.T, ctx context.Context, testDB string) {
  • replacement in tools/integration_test.go at line 29
    [6.4138][6.4138:4256]()
    Query: "SELECT id, record_s, sleep_s FROM cyclic_recording_pattern WHERE active = true ORDER BY record_s, sleep_s",
    [6.4138]
    [6.4256]
    DBPath: testDB,
    Query: "SELECT id, record_s, sleep_s FROM cyclic_recording_pattern WHERE active = true ORDER BY record_s, sleep_s",
  • replacement in tools/integration_test.go at line 48
    [6.306843][6.4635:4706]()
    func testCreateClusterWithPattern(t *testing.T, ctx context.Context) {
    [6.306843]
    [6.4706]
    func testCreateClusterWithPattern(t *testing.T, ctx context.Context, testDB string) {
  • replacement in tools/integration_test.go at line 52
    [6.4799][6.4799:4862]()
    Query: "SELECT id FROM dataset WHERE active = true LIMIT 1",
    [6.4799]
    [6.4862]
    DBPath: testDB,
    Query: "SELECT id FROM dataset WHERE active = true LIMIT 1",
  • edit in tools/integration_test.go at line 62
    [6.5107]
    [6.5107]
    DBPath: testDB,
  • edit in tools/integration_test.go at line 75
    [6.5538]
    [6.5538]
    DBPath: testDB,
  • edit in tools/integration_test.go at line 90
    [6.6041]
    [6.6041]
    DBPath: testDB,
  • file addition: import (d--r------)
    [6.248737]
  • file addition: import_unstructured.go (----------)
    [0.1]
    package imp
    import (
    "context"
    "database/sql"
    "fmt"
    "io/fs"
    "os"
    "path/filepath"
    "strings"
    "time"
    "skraak/db"
    "skraak/utils"
    )
    // ImportUnstructuredInput defines the input parameters for importing files into an unstructured dataset
    type ImportUnstructuredInput struct {
    DBPath string `json:"db_path"`
    DatasetID string `json:"dataset_id"`
    FolderPath string `json:"folder_path"`
    Recursive *bool `json:"recursive,omitempty"`
    }
    // ImportUnstructuredOutput defines the output structure
    type ImportUnstructuredOutput struct {
    TotalFiles int `json:"total_files"`
    ImportedFiles int `json:"imported_files"`
    SkippedFiles int `json:"skipped_files"` // Duplicates
    FailedFiles int `json:"failed_files"`
    TotalDuration float64 `json:"total_duration_seconds"`
    ProcessingTime string `json:"processing_time"`
    Errors []utils.FileImportError `json:"errors,omitempty"`
    }
    // ImportUnstructured imports WAV files into an unstructured dataset
    // Files are stored with minimal metadata: hash, duration, sample_rate, file_mod_time as timestamp
    // No location/cluster hierarchy, no astronomical data, no AudioMoth parsing
    func ImportUnstructured(
    ctx context.Context,
    input ImportUnstructuredInput,
    ) (ImportUnstructuredOutput, error) {
    startTime := time.Now()
    var output ImportUnstructuredOutput
    // Default recursive to true
    recursive := true
    if input.Recursive != nil {
    recursive = *input.Recursive
    }
    // Validate input
    if err := validateUnstructuredInput(input); err != nil {
    return output, fmt.Errorf("validation failed: %w", err)
    }
    // Scan for WAV files (no DB needed)
    files, scanErrors := scanWavFiles(input.FolderPath, recursive)
    output.Errors = append(output.Errors, scanErrors...)
    output.TotalFiles = len(files)
    if len(files) == 0 {
    output.ProcessingTime = time.Since(startTime).String()
    return output, nil
    }
    err := db.WithWriteTx(ctx, db.ResolveDBPath(input.DBPath, ""), "import_unstructured", func(database *sql.DB, tx *db.LoggedTx) error {
    // Process each file
    for _, filePath := range files {
    fileResult, procErr := processUnstructuredFile(tx, filePath, input.DatasetID)
    if procErr != nil {
    output.FailedFiles++
    output.Errors = append(output.Errors, utils.FileImportError{
    FileName: filepath.Base(filePath),
    Error: procErr.Error(),
    Stage: utils.StageProcess,
    })
    continue
    }
    if fileResult.Skipped {
    output.SkippedFiles++
    } else {
    output.ImportedFiles++
    output.TotalDuration += fileResult.Duration
    }
    }
    return nil
    })
    if err != nil {
    return output, err
    }
    output.ProcessingTime = time.Since(startTime).String()
    return output, nil
    }
    // unstructuredFileResult holds the result of processing a single file
    type unstructuredFileResult struct {
    Skipped bool // True if duplicate
    Duration float64 // Duration in seconds
    }
    // processUnstructuredFile processes a single WAV file for unstructured import
    func processUnstructuredFile(tx *db.LoggedTx, filePath, datasetID string) (*unstructuredFileResult, error) {
    result := &unstructuredFileResult{}
    // Step 1: Parse WAV header
    metadata, err := utils.ParseWAVHeader(filePath)
    if err != nil {
    return nil, fmt.Errorf("WAV header parsing failed: %w", err)
    }
    // Step 2: Calculate hash
    hash, err := utils.ComputeXXH64(filePath)
    if err != nil {
    return nil, fmt.Errorf("hash calculation failed: %w", err)
    }
    // Step 3: Check for duplicate - if exists, skip entirely (do not link to dataset)
    _, isDuplicate, err := utils.CheckDuplicateHash(tx, hash)
    if err != nil {
    return nil, fmt.Errorf("duplicate check failed: %w", err)
    }
    if isDuplicate {
    // File already exists in database - skip completely, do not link to dataset
    result.Skipped = true
    result.Duration = metadata.Duration
    return result, nil
    }
    // Step 4: Generate file ID
    fileID, err := utils.GenerateLongID()
    if err != nil {
    return nil, fmt.Errorf("ID generation failed: %w", err)
    }
    // Step 5: Use file modification time as timestamp (no timezone conversion)
    timestamp := metadata.FileModTime
    // Step 6: Insert into file table
    _, err = tx.Exec(`
    INSERT INTO file (
    id, file_name, xxh64_hash, location_id, cluster_id,
    timestamp_local, duration, sample_rate,
    maybe_solar_night, maybe_civil_night, moon_phase,
    active
    ) VALUES (?, ?, ?, NULL, NULL, ?, ?, ?, NULL, NULL, NULL, TRUE)
    `,
    fileID,
    filepath.Base(filePath),
    hash,
    timestamp,
    metadata.Duration,
    metadata.SampleRate,
    )
    if err != nil {
    return nil, fmt.Errorf("file insert failed: %w", err)
    }
    // Step 7: Insert into file_dataset table
    _, err = tx.Exec(
    "INSERT INTO file_dataset (file_id, dataset_id) VALUES (?, ?)",
    fileID, datasetID,
    )
    if err != nil {
    return nil, fmt.Errorf("file_dataset insert failed: %w", err)
    }
    result.Duration = metadata.Duration
    return result, nil
    }
    // validateUnstructuredInput validates the input parameters
    func validateUnstructuredInput(input ImportUnstructuredInput) error {
    // Validate dataset ID format
    if err := utils.ValidateShortID(input.DatasetID, "dataset_id"); err != nil {
    return err
    }
    // Verify folder exists
    info, err := os.Stat(input.FolderPath)
    if err != nil {
    return fmt.Errorf("folder not accessible: %w", err)
    }
    if !info.IsDir() {
    return fmt.Errorf("path is not a directory: %s", input.FolderPath)
    }
    return db.WithReadDB(db.ResolveDBPath(input.DBPath, ""), func(database *sql.DB) error {
    // Verify dataset exists and is active
    if _, err := db.DatasetExistsAndActive(database, input.DatasetID); err != nil {
    return err
    }
    // Verify dataset is 'unstructured' type
    if err := db.ValidateDatasetTypeUnstructured(database, input.DatasetID); err != nil {
    return err
    }
    return nil
    })
    }
    // scanWavFiles scans a folder for WAV files
    func scanWavFiles(folderPath string, recursive bool) ([]string, []utils.FileImportError) {
    var files []string
    var errors []utils.FileImportError
    walkFunc := func(path string, d fs.DirEntry, err error) error {
    if err != nil {
    errors = append(errors, utils.FileImportError{
    FileName: path,
    Error: err.Error(),
    Stage: utils.StageScan,
    })
    return nil
    }
    // Skip directories if not recursive
    if d.IsDir() {
    if !recursive && path != folderPath {
    return fs.SkipDir
    }
    return nil
    }
    // Check for .wav extension (case-insensitive)
    if strings.HasSuffix(strings.ToLower(d.Name()), ".wav") {
    files = append(files, path)
    }
    return nil
    }
    if recursive {
    if err := filepath.WalkDir(folderPath, walkFunc); err != nil {
    errors = append(errors, utils.FileImportError{
    FileName: folderPath,
    Error: err.Error(),
    Stage: utils.StageScan,
    })
    }
    } else {
    // Non-recursive: only scan top-level
    entries, err := os.ReadDir(folderPath)
    if err != nil {
    errors = append(errors, utils.FileImportError{
    FileName: folderPath,
    Error: err.Error(),
    Stage: utils.StageScan,
    })
    return nil, errors
    }
    for _, entry := range entries {
    if !entry.IsDir() && strings.HasSuffix(strings.ToLower(entry.Name()), ".wav") {
    files = append(files, filepath.Join(folderPath, entry.Name()))
    }
    }
    }
    return files, errors
    }
  • file addition: import_segments_test.go (----------)
    [0.1]
    package imp
    import (
    "testing"
    "skraak/utils"
    )
    func TestValidateSegmentImportInput(t *testing.T) {
    t.Run("invalid dataset ID - too short", func(t *testing.T) {
    input := ImportSegmentsInput{
    DatasetID: "abc",
    }
    err := validateSegmentImportInput(input)
    if err == nil {
    t.Fatal("expected error for short dataset ID")
    }
    })
    t.Run("invalid dataset ID - too long", func(t *testing.T) {
    input := ImportSegmentsInput{
    DatasetID: "abc123def456ghi789",
    }
    err := validateSegmentImportInput(input)
    if err == nil {
    t.Fatal("expected error for long dataset ID")
    }
    })
    t.Run("invalid dataset ID - invalid characters", func(t *testing.T) {
    input := ImportSegmentsInput{
    DatasetID: "abc123!!!456",
    }
    err := validateSegmentImportInput(input)
    if err == nil {
    t.Fatal("expected error for invalid characters in dataset ID")
    }
    })
    t.Run("invalid location ID", func(t *testing.T) {
    input := ImportSegmentsInput{
    DatasetID: "abc123def456",
    LocationID: "invalid",
    }
    err := validateSegmentImportInput(input)
    if err == nil {
    t.Fatal("expected error for invalid location ID")
    }
    })
    t.Run("invalid cluster ID", func(t *testing.T) {
    input := ImportSegmentsInput{
    DatasetID: "abc123def456",
    LocationID: "xyz789uvw012",
    ClusterID: "invalid",
    }
    err := validateSegmentImportInput(input)
    if err == nil {
    t.Fatal("expected error for invalid cluster ID")
    }
    })
    }
    func TestCountTotalSegments(t *testing.T) {
    t.Run("empty", func(t *testing.T) {
    count := countTotalSegments(map[string]scannedDataFile{})
    if count != 0 {
    t.Errorf("expected 0, got %d", count)
    }
    })
    t.Run("single file - no segments", func(t *testing.T) {
    files := map[string]scannedDataFile{
    "file1": {Segments: []*utils.Segment{}},
    }
    count := countTotalSegments(files)
    if count != 0 {
    t.Errorf("expected 0, got %d", count)
    }
    })
    t.Run("single file - multiple segments", func(t *testing.T) {
    files := map[string]scannedDataFile{
    "file1": {Segments: []*utils.Segment{{}, {}, {}}},
    }
    count := countTotalSegments(files)
    if count != 3 {
    t.Errorf("expected 3, got %d", count)
    }
    })
    t.Run("multiple files", func(t *testing.T) {
    files := map[string]scannedDataFile{
    "file1": {Segments: []*utils.Segment{{}, {}}},
    "file2": {Segments: []*utils.Segment{{}}},
    "file3": {Segments: []*utils.Segment{{}, {}, {}, {}}},
    }
    count := countTotalSegments(files)
    if count != 7 {
    t.Errorf("expected 7, got %d", count)
    }
    })
    }
  • file addition: import_segments.go (----------)
    [0.1]
    package imp
    import (
    "context"
    "database/sql"
    "fmt"
    "os"
    "path/filepath"
    "strings"
    "time"
    "skraak/db"
    "skraak/utils"
    )
    // ImportSegmentsInput defines the input parameters for the import_segments tool
    type ImportSegmentsInput struct {
    DBPath string `json:"db_path"`
    Folder string `json:"folder"`
    Mapping string `json:"mapping"`
    DatasetID string `json:"dataset_id"`
    LocationID string `json:"location_id"`
    ClusterID string `json:"cluster_id"`
    ProgressHandler func(processed, total int, message string)
    }
    // ImportSegmentsOutput defines the output structure for the import_segments tool
    type ImportSegmentsOutput struct {
    Summary ImportSegmentsSummary `json:"summary"`
    Segments []SegmentImport `json:"segments"`
    Errors []ImportSegmentError `json:"errors,omitempty"`
    }
    // ImportSegmentsSummary provides summary statistics for the import operation
    type ImportSegmentsSummary struct {
    DataFilesFound int `json:"data_files_found"`
    DataFilesProcessed int `json:"data_files_processed"`
    TotalSegments int `json:"total_segments"`
    ImportedSegments int `json:"imported_segments"`
    ImportedLabels int `json:"imported_labels"`
    ImportedSubtypes int `json:"imported_subtypes"`
    ProcessingTimeMs int64 `json:"processing_time_ms"`
    }
    // SegmentImport represents an imported segment in the output
    type SegmentImport struct {
    SegmentID string `json:"segment_id"`
    FileName string `json:"file_name"`
    StartTime float64 `json:"start_time"`
    EndTime float64 `json:"end_time"`
    FreqLow float64 `json:"freq_low"`
    FreqHigh float64 `json:"freq_high"`
    Labels []LabelImport `json:"labels"`
    }
    // LabelImport represents an imported label in the output
    type LabelImport struct {
    LabelID string `json:"label_id"`
    Species string `json:"species"`
    CallType string `json:"calltype,omitempty"`
    Filter string `json:"filter"`
    Certainty int `json:"certainty"`
    Comment string `json:"comment,omitempty"`
    }
    // ImportSegmentError records errors encountered during segment import
    type ImportSegmentError struct {
    File string `json:"file,omitempty"`
    Stage utils.ImportStage `json:"stage"`
    Message string `json:"message"`
    }
    // scannedDataFile holds parsed data for a .data file
    type scannedDataFile struct {
    DataPath string
    WavPath string
    WavHash string
    FileID string
    Duration float64
    Segments []*utils.Segment
    }
    // segmentValidation holds the results of pre-import validation (phases B+C).
    type segmentValidation struct {
    scannedFiles []scannedDataFile
    filterIDMap map[string]string
    speciesIDMap map[string]string
    calltypeIDMap map[string]map[string]string
    fileIDMap map[string]scannedDataFile
    }
    // validateAndPrepareSegments performs phases B+C: parse data files, validate DB state, and prepare ID maps.
    func validateAndPrepareSegments(
    database *sql.DB,
    input ImportSegmentsInput,
    mapping utils.MappingFile,
    dataFiles []string,
    ) (*segmentValidation, []ImportSegmentError, error) {
    // Phase B: Parse all .data files and collect unique values
    scannedFiles, parseErrors, uniqueFilters, uniqueSpecies, uniqueCalltypes := scanAllDataFiles(dataFiles, input.Folder)
    if len(scannedFiles) == 0 {
    return nil, parseErrors, nil
    }
    // Validate dataset/location/cluster hierarchy
    if err := validateSegmentHierarchy(database, input.DatasetID, input.LocationID, input.ClusterID); err != nil {
    return nil, parseErrors, err
    }
    // Validate all filters exist
    filterIDMap, err := validateFiltersExist(database, uniqueFilters)
    if err != nil {
    return nil, parseErrors, fmt.Errorf("filter validation failed: %w", err)
    }
    // Validate mapping covers all species/calltypes and they exist in DB
    validationResult, err := utils.ValidateMappingAgainstDB(database, mapping, uniqueSpecies, uniqueCalltypes)
    if err != nil {
    return nil, parseErrors, fmt.Errorf("mapping validation failed: %w", err)
    }
    if validationResult.HasErrors() {
    return nil, parseErrors, fmt.Errorf("mapping validation failed: %s", validationResult.Error())
    }
    // Load species and calltype ID maps
    speciesIDMap, calltypeIDMap, err := loadSpeciesCalltypeIDs(database, mapping, uniqueSpecies, uniqueCalltypes)
    if err != nil {
    return nil, parseErrors, fmt.Errorf("failed to load species/calltype IDs: %w", err)
    }
    // Validate files: hash exists, linked to dataset, no existing labels
    fileIDMap, hashErrors := validateAndMapFiles(database, scannedFiles, input.ClusterID, input.DatasetID)
    allErrors := append(parseErrors, hashErrors...)
    return &segmentValidation{
    scannedFiles: scannedFiles,
    filterIDMap: filterIDMap,
    speciesIDMap: speciesIDMap,
    calltypeIDMap: calltypeIDMap,
    fileIDMap: fileIDMap,
    }, allErrors, nil
    }
    // ImportSegments imports segments from AviaNZ .data files into the database
    func ImportSegments(ctx context.Context, input ImportSegmentsInput) (ImportSegmentsOutput, error) {
    startTime := time.Now()
    var output ImportSegmentsOutput
    output.Segments = make([]SegmentImport, 0)
    output.Errors = make([]ImportSegmentError, 0)
    // Phase A: Input Validation
    if err := validateSegmentImportInput(input); err != nil {
    return output, err
    }
    // Load mapping file
    mapping, err := utils.LoadMappingFile(input.Mapping)
    if err != nil {
    return output, fmt.Errorf("failed to load mapping file: %w", err)
    }
    // Find .data files
    dataFiles, err := utils.FindDataFiles(input.Folder)
    if err != nil {
    return output, fmt.Errorf("failed to find .data files: %w", err)
    }
    output.Summary.DataFilesFound = len(dataFiles)
    if len(dataFiles) == 0 {
    return output, fmt.Errorf("no .data files found in folder: %s", input.Folder)
    }
    // Phase B+C: Parse data files and validate against DB
    database, err := db.OpenWriteableDB(db.ResolveDBPath(input.DBPath, ""))
    if err != nil {
    return output, fmt.Errorf("failed to open database: %w", err)
    }
    defer database.Close()
    val, valErrors, err := validateAndPrepareSegments(database, input, mapping, dataFiles)
    output.Errors = append(output.Errors, valErrors...)
    if err != nil {
    return output, err
    }
    if val == nil || len(val.fileIDMap) == 0 {
    output.Summary.ProcessingTimeMs = time.Since(startTime).Milliseconds()
    return output, nil
    }
    // Phase D: Transactional Import
    importedSegments, importedLabels, importedSubtypes, fileUpdates, importErrors := importSegmentsIntoDB(
    ctx, database, val.fileIDMap, val.scannedFiles, mapping, val.filterIDMap, val.speciesIDMap, val.calltypeIDMap, input.DatasetID, input.ProgressHandler,
    )
    output.Errors = append(output.Errors, importErrors...)
    output.Segments = append(output.Segments, importedSegments...)
    // Phase E: Write IDs back to .data files
    if len(fileUpdates) > 0 {
    writeErrors := writeIDsToDataFiles(fileUpdates)
    output.Errors = append(output.Errors, writeErrors...)
    }
    output.Summary.DataFilesProcessed = len(val.fileIDMap)
    output.Summary.TotalSegments = countTotalSegments(val.fileIDMap)
    output.Summary.ImportedSegments = len(importedSegments)
    output.Summary.ImportedLabels = importedLabels
    output.Summary.ImportedSubtypes = importedSubtypes
    output.Summary.ProcessingTimeMs = time.Since(startTime).Milliseconds()
    return output, nil
    }
    // validateSegmentImportInput validates input parameters
    func validateSegmentImportInput(input ImportSegmentsInput) error {
    // Validate folder exists
    if info, err := os.Stat(input.Folder); err != nil {
    return fmt.Errorf("folder does not exist: %s", input.Folder)
    } else if !info.IsDir() {
    return fmt.Errorf("path is not a folder: %s", input.Folder)
    }
    // Validate mapping file exists
    if _, err := os.Stat(input.Mapping); err != nil {
    return fmt.Errorf("mapping file does not exist: %s", input.Mapping)
    }
    // Validate IDs
    if err := utils.ValidateShortID(input.DatasetID, "dataset_id"); err != nil {
    return err
    }
    if err := utils.ValidateShortID(input.LocationID, "location_id"); err != nil {
    return err
    }
    if err := utils.ValidateShortID(input.ClusterID, "cluster_id"); err != nil {
    return err
    }
    return nil
    }
    // validateSegmentHierarchy validates dataset/location/cluster relationships
    func validateSegmentHierarchy(dbConn *sql.DB, datasetID, locationID, clusterID string) error {
    // Validate dataset exists and is structured
    if err := db.ValidateDatasetTypeForImport(dbConn, datasetID); err != nil {
    return err
    }
    // Validate location belongs to dataset
    if err := db.ValidateLocationBelongsToDataset(dbConn, locationID, datasetID); err != nil {
    return err
    }
    // Validate cluster belongs to location
    if err := db.ClusterBelongsToLocation(dbConn, clusterID, locationID); err != nil {
    return err
    }
    return nil
    }
    // scanAllDataFiles parses all .data files and collects unique values
    func scanAllDataFiles(dataFiles []string, folder string) (
    []scannedDataFile,
    []ImportSegmentError,
    map[string]bool,
    map[string]bool,
    map[string]map[string]bool,
    ) {
    var scanned []scannedDataFile
    var errors []ImportSegmentError
    uniqueFilters := make(map[string]bool)
    uniqueSpecies := make(map[string]bool)
    uniqueCalltypes := make(map[string]map[string]bool) // species -> calltype -> true
    for _, dataPath := range dataFiles {
    // Find corresponding WAV file
    wavPath := strings.TrimSuffix(dataPath, ".data")
    if _, err := os.Stat(wavPath); err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(dataPath),
    Stage: utils.StageValidation,
    Message: fmt.Sprintf("corresponding WAV file not found: %s", filepath.Base(wavPath)),
    })
    continue
    }
    // Parse .data file
    df, err := utils.ParseDataFile(dataPath)
    if err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(dataPath),
    Stage: utils.StageValidation,
    Message: fmt.Sprintf("failed to parse .data file: %v", err),
    })
    continue
    }
    // Collect unique filters, species, calltypes
    for _, seg := range df.Segments {
    for _, label := range seg.Labels {
    uniqueFilters[label.Filter] = true
    uniqueSpecies[label.Species] = true
    if label.CallType != "" {
    if uniqueCalltypes[label.Species] == nil {
    uniqueCalltypes[label.Species] = make(map[string]bool)
    }
    uniqueCalltypes[label.Species][label.CallType] = true
    }
    }
    }
    scanned = append(scanned, scannedDataFile{
    DataPath: dataPath,
    WavPath: wavPath,
    Duration: df.Meta.Duration,
    Segments: df.Segments,
    })
    }
    return scanned, errors, uniqueFilters, uniqueSpecies, uniqueCalltypes
    }
    // validateFiltersExist checks all filters exist in DB and returns ID map
    func validateFiltersExist(dbConn *sql.DB, filterNames map[string]bool) (map[string]string, error) {
    filterIDMap := make(map[string]string)
    if len(filterNames) == 0 {
    return filterIDMap, nil
    }
    names := make([]string, 0, len(filterNames))
    for name := range filterNames {
    names = append(names, name)
    }
    query := `SELECT id, name FROM filter WHERE name IN (` + db.Placeholders(len(names)) + `) AND active = true`
    args := make([]any, len(names))
    for i, name := range names {
    args[i] = name
    }
    rows, err := dbConn.Query(query, args...)
    if err != nil {
    return nil, fmt.Errorf("failed to query filters: %w", err)
    }
    defer rows.Close()
    for rows.Next() {
    var id, name string
    if err := rows.Scan(&id, &name); err == nil {
    filterIDMap[name] = id
    }
    }
    // Check for missing filters
    var missing []string
    for name := range filterNames {
    if _, exists := filterIDMap[name]; !exists {
    missing = append(missing, name)
    }
    }
    if len(missing) > 0 {
    return nil, fmt.Errorf("filters not found in database: [%s]", strings.Join(missing, ", "))
    }
    return filterIDMap, nil
    }
    // loadSpeciesCalltypeIDs loads species and calltype ID maps
    func loadSpeciesCalltypeIDs(
    dbConn *sql.DB,
    mapping utils.MappingFile,
    uniqueSpecies map[string]bool,
    uniqueCalltypes map[string]map[string]bool,
    ) (map[string]string, map[string]map[string]string, error) {
    speciesIDMap := make(map[string]string)
    calltypeIDMap := make(map[string]map[string]string) // (dbSpecies, dbCalltype) -> calltype_id
    // Collect all DB species labels from mapping
    dbSpeciesSet := make(map[string]bool)
    for dataSpecies := range uniqueSpecies {
    if dbSpecies, ok := mapping.GetDBSpecies(dataSpecies); ok {
    dbSpeciesSet[dbSpecies] = true
    }
    }
    // Load species IDs
    if len(dbSpeciesSet) > 0 {
    dbSpeciesList := make([]string, 0, len(dbSpeciesSet))
    for s := range dbSpeciesSet {
    dbSpeciesList = append(dbSpeciesList, s)
    }
    query := `SELECT id, label FROM species WHERE label IN (` + db.Placeholders(len(dbSpeciesList)) + `) AND active = true`
    args := make([]any, len(dbSpeciesList))
    for i, s := range dbSpeciesList {
    args[i] = s
    }
    rows, err := dbConn.Query(query, args...)
    if err != nil {
    return nil, nil, fmt.Errorf("failed to query species: %w", err)
    }
    defer rows.Close()
    for rows.Next() {
    var id, label string
    if err := rows.Scan(&id, &label); err == nil {
    speciesIDMap[label] = id
    }
    }
    }
    // Load calltype IDs
    for dataSpecies, ctSet := range uniqueCalltypes {
    dbSpecies, ok := mapping.GetDBSpecies(dataSpecies)
    if !ok {
    continue
    }
    if calltypeIDMap[dbSpecies] == nil {
    calltypeIDMap[dbSpecies] = make(map[string]string)
    }
    for dataCalltype := range ctSet {
    dbCalltype := mapping.GetDBCalltype(dataSpecies, dataCalltype)
    // Query calltype ID
    var calltypeID string
    err := dbConn.QueryRow(`
    SELECT ct.id
    FROM call_type ct
    JOIN species s ON ct.species_id = s.id
    WHERE s.label = ? AND ct.label = ? AND ct.active = true
    `, dbSpecies, dbCalltype).Scan(&calltypeID)
    if err == nil {
    calltypeIDMap[dbSpecies][dbCalltype] = calltypeID
    }
    }
    }
    return speciesIDMap, calltypeIDMap, nil
    }
    // validateAndMapFiles validates files exist by hash, are linked to dataset, and have no existing labels
    func validateAndMapFiles(
    dbConn *sql.DB,
    scannedFiles []scannedDataFile,
    clusterID string,
    datasetID string,
    ) (map[string]scannedDataFile, []ImportSegmentError) {
    fileIDMap := make(map[string]scannedDataFile)
    var errors []ImportSegmentError
    for _, sf := range scannedFiles {
    // Compute hash
    hash, err := utils.ComputeXXH64(sf.WavPath)
    if err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.WavPath),
    Stage: utils.StageHash,
    Message: fmt.Sprintf("failed to compute hash: %v", err),
    })
    continue
    }
    sf.WavHash = hash
    // Find file by hash in cluster
    var fileID string
    var duration float64
    err = dbConn.QueryRow(`
    SELECT id, duration FROM file WHERE xxh64_hash = ? AND cluster_id = ? AND active = true
    `, hash, clusterID).Scan(&fileID, &duration)
    if err == sql.ErrNoRows {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.WavPath),
    Stage: utils.StageValidation,
    Message: fmt.Sprintf("file hash not found in database for cluster (hash: %s)", hash),
    })
    continue
    }
    if err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.WavPath),
    Stage: utils.StageValidation,
    Message: fmt.Sprintf("failed to query file: %v", err),
    })
    continue
    }
    sf.FileID = fileID
    sf.Duration = duration
    // Verify file is linked to dataset via file_dataset junction table (composite FK)
    var fileLinkedToDataset bool
    err = dbConn.QueryRow(`
    SELECT EXISTS(SELECT 1 FROM file_dataset WHERE file_id = ? AND dataset_id = ?)
    `, fileID, datasetID).Scan(&fileLinkedToDataset)
    if err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.WavPath),
    Stage: utils.StageValidation,
    Message: fmt.Sprintf("failed to verify file-dataset link: %v", err),
    })
    continue
    }
    if !fileLinkedToDataset {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.WavPath),
    Stage: utils.StageValidation,
    Message: fmt.Sprintf("file exists in cluster but is not linked to dataset %s", datasetID),
    })
    continue
    }
    // Check no existing labels for this file
    var labelCount int
    err = dbConn.QueryRow(`
    SELECT COUNT(*) FROM label l
    JOIN segment s ON l.segment_id = s.id
    WHERE s.file_id = ? AND l.active = true
    `, fileID).Scan(&labelCount)
    if err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.WavPath),
    Stage: utils.StageValidation,
    Message: fmt.Sprintf("failed to check existing labels: %v", err),
    })
    continue
    }
    if labelCount > 0 {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.WavPath),
    Stage: utils.StageValidation,
    Message: fmt.Sprintf("file already has %d label(s) - fresh imports only", labelCount),
    })
    continue
    }
    fileIDMap[fileID] = sf
    }
    return fileIDMap, errors
    }
    // dataFileUpdate holds data to write back to .data file after import
    type dataFileUpdate struct {
    DataPath string
    WavHash string
    LabelIDs map[int]map[int]string // segmentIndex -> labelIndex -> labelID
    }
    // importLabelResult holds the result of importing a single label.
    type importLabelResult struct {
    labelImport LabelImport
    labelID string
    subtypesImported int
    err ImportSegmentError
    hasError bool
    }
    // importSingleLabel inserts a single label and its metadata/subtype into the DB.
    func importSingleLabel(
    ctx context.Context,
    tx *db.LoggedTx,
    label *utils.Label,
    segmentID string,
    segIdx, labelIdx int,
    sf scannedDataFile,
    mapping utils.MappingFile,
    filterIDMap map[string]string,
    speciesIDMap map[string]string,
    calltypeIDMap map[string]map[string]string,
    ) importLabelResult {
    dbSpecies, ok := mapping.GetDBSpecies(label.Species)
    if !ok {
    return importLabelResult{err: ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("species not found in mapping: %s", label.Species),
    }, hasError: true}
    }
    speciesID, ok := speciesIDMap[dbSpecies]
    if !ok {
    return importLabelResult{err: ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("species ID not found: %s", dbSpecies),
    }, hasError: true}
    }
    filterID, ok := filterIDMap[label.Filter]
    if !ok {
    return importLabelResult{err: ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("filter ID not found: %s", label.Filter),
    }, hasError: true}
    }
    labelID, err := utils.GenerateLongID()
    if err != nil {
    return importLabelResult{err: ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to generate label ID: %v", err),
    }, hasError: true}
    }
    _, err = tx.ExecContext(ctx, `
    INSERT INTO label (id, segment_id, species_id, filter_id, certainty, created_at, last_modified, active)
    VALUES (?, ?, ?, ?, ?, now(), now(), true)
    `, labelID, segmentID, speciesID, filterID, label.Certainty)
    if err != nil {
    return importLabelResult{err: ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to insert label: %v", err),
    }, hasError: true}
    }
    // Insert label_metadata if comment exists
    if label.Comment != "" {
    escapedComment := strings.ReplaceAll(label.Comment, `"`, `\"`)
    metadataJSON := fmt.Sprintf(`{"comment": "%s"}`, escapedComment)
    if _, err := tx.ExecContext(ctx, `
    INSERT INTO label_metadata (label_id, json, created_at, last_modified, active)
    VALUES (?, ?, now(), now(), true)
    `, labelID, metadataJSON); err != nil {
    return importLabelResult{err: ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to insert label_metadata: %v", err),
    }, hasError: true}
    }
    }
    labelImport := LabelImport{
    LabelID: labelID,
    Species: dbSpecies,
    Filter: label.Filter,
    Certainty: label.Certainty,
    }
    if label.Comment != "" {
    labelImport.Comment = label.Comment
    }
    // Insert label_subtype if calltype exists
    if label.CallType != "" {
    if err := importCalltype(ctx, tx, labelID, label, dbSpecies, filterID, mapping, calltypeIDMap, sf); err != nil {
    return importLabelResult{err: *err, hasError: true}
    }
    labelImport.CallType = mapping.GetDBCalltype(label.Species, label.CallType)
    return importLabelResult{labelImport: labelImport, labelID: labelID, subtypesImported: 1}
    }
    return importLabelResult{labelImport: labelImport, labelID: labelID}
    }
    // importCalltype inserts a label_subtype row for a calltype label.
    func importCalltype(
    ctx context.Context,
    tx *db.LoggedTx,
    labelID string,
    label *utils.Label,
    dbSpecies string,
    filterID string,
    mapping utils.MappingFile,
    calltypeIDMap map[string]map[string]string,
    sf scannedDataFile,
    ) *ImportSegmentError {
    dbCalltype := mapping.GetDBCalltype(label.Species, label.CallType)
    calltypeID := ""
    if calltypeIDMap[dbSpecies] != nil {
    calltypeID = calltypeIDMap[dbSpecies][dbCalltype]
    }
    if calltypeID == "" {
    return &ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("calltype ID not found: %s/%s", dbSpecies, dbCalltype),
    }
    }
    subtypeID, err := utils.GenerateLongID()
    if err != nil {
    return &ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to generate label_subtype ID: %v", err),
    }
    }
    _, err = tx.ExecContext(ctx, `
    INSERT INTO label_subtype (id, label_id, calltype_id, filter_id, certainty, created_at, last_modified, active)
    VALUES (?, ?, ?, ?, ?, now(), now(), true)
    `, subtypeID, labelID, calltypeID, filterID, label.Certainty)
    if err != nil {
    return &ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to insert label_subtype: %v", err),
    }
    }
    return nil
    }
    // importSegmentsIntoDB performs the transactional import
    func importSegmentsIntoDB(
    ctx context.Context,
    database *sql.DB,
    fileIDMap map[string]scannedDataFile,
    scannedFiles []scannedDataFile,
    mapping utils.MappingFile,
    filterIDMap map[string]string,
    speciesIDMap map[string]string,
    calltypeIDMap map[string]map[string]string,
    datasetID string,
    progressHandler func(processed, total int, message string),
    ) ([]SegmentImport, int, int, []dataFileUpdate, []ImportSegmentError) {
    var importedSegments []SegmentImport
    var errors []ImportSegmentError
    importedLabels := 0
    importedSubtypes := 0
    var fileUpdates []dataFileUpdate
    tx, err := db.BeginLoggedTx(ctx, database, "import_segments")
    if err != nil {
    errors = append(errors, ImportSegmentError{
    Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to begin transaction: %v", err),
    })
    return nil, 0, 0, nil, errors
    }
    defer tx.Rollback()
    totalFiles := len(fileIDMap)
    processedFiles := 0
    for _, sf := range fileIDMap {
    if sf.FileID == "" {
    continue
    }
    processedFiles++
    if progressHandler != nil {
    progressHandler(processedFiles, totalFiles, filepath.Base(sf.DataPath))
    }
    fileUpdate := dataFileUpdate{
    DataPath: sf.DataPath,
    WavHash: sf.WavHash,
    LabelIDs: make(map[int]map[int]string),
    }
    for segIdx, seg := range sf.Segments {
    segImp, labelIDs, subtypes, segErrs := importSegment(ctx, tx, seg, segIdx, sf, datasetID, mapping, filterIDMap, speciesIDMap, calltypeIDMap)
    errors = append(errors, segErrs...)
    importedSubtypes += subtypes
    if len(segImp.Labels) == 0 {
    // Delete orphaned segment (no labels succeeded)
    if _, err := tx.ExecContext(ctx, `DELETE FROM segment WHERE id = ?`, segImp.SegmentID); err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to delete orphaned segment: %v", err),
    })
    }
    } else {
    importedSegments = append(importedSegments, segImp)
    importedLabels += len(labelIDs)
    fileUpdate.LabelIDs[segIdx] = labelIDs
    }
    }
    fileUpdates = append(fileUpdates, fileUpdate)
    }
    if err := tx.Commit(); err != nil {
    errors = append(errors, ImportSegmentError{
    Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to commit transaction: %v", err),
    })
    return nil, 0, 0, nil, errors
    }
    return importedSegments, importedLabels, importedSubtypes, fileUpdates, errors
    }
    // importSegment inserts a single segment and its labels into the DB.
    func importSegment(
    ctx context.Context,
    tx *db.LoggedTx,
    seg *utils.Segment,
    segIdx int,
    sf scannedDataFile,
    datasetID string,
    mapping utils.MappingFile,
    filterIDMap map[string]string,
    speciesIDMap map[string]string,
    calltypeIDMap map[string]map[string]string,
    ) (SegmentImport, map[int]string, int, []ImportSegmentError) {
    var errors []ImportSegmentError
    if seg.StartTime >= seg.EndTime {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("invalid segment bounds: start=%.2f >= end=%.2f", seg.StartTime, seg.EndTime),
    })
    return SegmentImport{}, nil, 0, errors
    }
    if seg.EndTime > sf.Duration {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("segment end time (%.2f) exceeds file duration (%.2f)", seg.EndTime, sf.Duration),
    })
    return SegmentImport{}, nil, 0, errors
    }
    segmentID, err := utils.GenerateLongID()
    if err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to generate segment ID: %v", err),
    })
    return SegmentImport{}, nil, 0, errors
    }
    _, err = tx.ExecContext(ctx, `
    INSERT INTO segment (id, file_id, dataset_id, start_time, end_time, freq_low, freq_high, created_at, last_modified, active)
    VALUES (?, ?, ?, ?, ?, ?, ?, now(), now(), true)
    `, segmentID, sf.FileID, datasetID, seg.StartTime, seg.EndTime, seg.FreqLow, seg.FreqHigh)
    if err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to insert segment: %v", err),
    })
    return SegmentImport{}, nil, 0, errors
    }
    segImport := SegmentImport{
    SegmentID: segmentID,
    FileName: filepath.Base(sf.WavPath),
    StartTime: seg.StartTime,
    EndTime: seg.EndTime,
    FreqLow: seg.FreqLow,
    FreqHigh: seg.FreqHigh,
    Labels: make([]LabelImport, 0),
    }
    labelIDs := make(map[int]string)
    var subtypesImported int
    for labelIdx, label := range seg.Labels {
    result := importSingleLabel(ctx, tx, label, segmentID, segIdx, labelIdx, sf, mapping, filterIDMap, speciesIDMap, calltypeIDMap)
    if result.hasError {
    errors = append(errors, result.err)
    continue
    }
    labelIDs[labelIdx] = result.labelID
    segImport.Labels = append(segImport.Labels, result.labelImport)
    subtypesImported += result.subtypesImported
    }
    return segImport, labelIDs, subtypesImported, errors
    }
    // countTotalSegments counts total segments from validated files
    func countTotalSegments(fileIDMap map[string]scannedDataFile) int {
    count := 0
    for _, sf := range fileIDMap {
    count += len(sf.Segments)
    }
    return count
    }
    // writeIDsToDataFiles writes skraak_hash and skraak_label_ids back to .data files
    func writeIDsToDataFiles(fileUpdates []dataFileUpdate) []ImportSegmentError {
    var errors []ImportSegmentError
    for _, fu := range fileUpdates {
    // Parse the .data file
    df, err := utils.ParseDataFile(fu.DataPath)
    if err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(fu.DataPath),
    Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to re-parse .data file for writing: %v", err),
    })
    continue
    }
    // Write skraak_hash to metadata
    if df.Meta.Extra == nil {
    df.Meta.Extra = make(map[string]any)
    }
    df.Meta.Extra["skraak_hash"] = fu.WavHash
    // Write skraak_label_id to each label
    for segIdx, labelIDs := range fu.LabelIDs {
    if segIdx >= len(df.Segments) {
    continue
    }
    seg := df.Segments[segIdx]
    for labelIdx, labelID := range labelIDs {
    if labelIdx >= len(seg.Labels) {
    continue
    }
    label := seg.Labels[labelIdx]
    if label.Extra == nil {
    label.Extra = make(map[string]any)
    }
    label.Extra["skraak_label_id"] = labelID
    }
    }
    // Write the updated .data file
    if err := df.Write(fu.DataPath); err != nil {
    errors = append(errors, ImportSegmentError{
    File: filepath.Base(fu.DataPath),
    Stage: utils.StageImport,
    Message: fmt.Sprintf("failed to write updated .data file: %v", err),
    })
    continue
    }
    }
    return errors
    }
  • file addition: import_files.go (----------)
    [0.1]
    package imp
    import (
    "context"
    "database/sql"
    "fmt"
    "os"
    "time"
    "skraak/db"
    "skraak/utils"
    )
    // ImportAudioFilesInput defines the input parameters for the import_audio_files tool
    type ImportAudioFilesInput struct {
    DBPath string `json:"db_path"`
    FolderPath string `json:"folder_path"`
    DatasetID string `json:"dataset_id"`
    LocationID string `json:"location_id"`
    ClusterID string `json:"cluster_id"`
    Recursive *bool `json:"recursive,omitempty"` // *bool because default is true; plain bool would make "not provided" indistinguishable from "false"
    }
    // ImportAudioFilesOutput defines the output structure for the import_audio_files tool
    type ImportAudioFilesOutput struct {
    Summary ImportSummary `json:"summary"`
    FileIDs []string `json:"file_ids"`
    Errors []utils.FileImportError `json:"errors,omitempty"`
    }
    // ImportSummary provides summary statistics for the import operation
    type ImportSummary struct {
    TotalFiles int `json:"total_files"`
    ImportedFiles int `json:"imported_files"`
    SkippedFiles int `json:"skipped_files"` // Duplicates
    FailedFiles int `json:"failed_files"`
    AudioMothFiles int `json:"audiomoth_files"`
    TotalDuration float64 `json:"total_duration_seconds"`
    ProcessingTime string `json:"processing_time"`
    }
    // ImportAudioFiles batch imports WAV files from a folder with hash-based duplicate detection
    func ImportAudioFiles(
    ctx context.Context,
    input ImportAudioFilesInput,
    ) (ImportAudioFilesOutput, error) {
    startTime := time.Now()
    var output ImportAudioFilesOutput
    // Default recursive to true
    recursive := true
    if input.Recursive != nil {
    recursive = *input.Recursive
    }
    // Validate database hierarchy (dataset → location → cluster)
    if err := validateImportInput(input, db.ResolveDBPath(input.DBPath, "")); err != nil {
    return output, fmt.Errorf("validation failed: %w", err)
    }
    // Open database
    database, err := db.OpenWriteableDB(db.ResolveDBPath(input.DBPath, ""))
    if err != nil {
    return output, fmt.Errorf("failed to open database: %w", err)
    }
    defer database.Close()
    // Set cluster path if empty
    err = utils.EnsureClusterPath(database, input.ClusterID, input.FolderPath)
    if err != nil {
    return output, fmt.Errorf("failed to set cluster path: %w", err)
    }
    // Import the cluster (ALL THE LOGIC IS HERE)
    tx, err := db.BeginLoggedTx(ctx, database, "import_audio_files")
    if err != nil {
    return output, fmt.Errorf("failed to begin transaction: %w", err)
    }
    clusterOutput, err := utils.ImportCluster(database, tx.UnderlyingTx(), utils.ClusterImportInput{
    FolderPath: input.FolderPath,
    DatasetID: input.DatasetID,
    LocationID: input.LocationID,
    ClusterID: input.ClusterID,
    Recursive: recursive,
    })
    if err != nil {
    tx.Rollback()
    return output, fmt.Errorf("cluster import failed: %w", err)
    }
    if err := tx.Commit(); err != nil {
    return output, fmt.Errorf("transaction commit failed: %w", err)
    }
    // Map to output format
    output = ImportAudioFilesOutput{
    Summary: ImportSummary{
    TotalFiles: clusterOutput.TotalFiles,
    ImportedFiles: clusterOutput.ImportedFiles,
    SkippedFiles: clusterOutput.SkippedFiles,
    FailedFiles: clusterOutput.FailedFiles,
    AudioMothFiles: clusterOutput.AudioMothFiles,
    TotalDuration: clusterOutput.TotalDuration,
    ProcessingTime: time.Since(startTime).String(),
    },
    FileIDs: []string{}, // File IDs not tracked currently
    Errors: clusterOutput.Errors,
    }
    return output, nil
    }
    // validateImportInput validates all input parameters and database relationships
    func validateImportInput(input ImportAudioFilesInput, dbPath string) error {
    // Verify folder exists
    info, err := os.Stat(input.FolderPath)
    if err != nil {
    return fmt.Errorf("folder not accessible: %w", err)
    }
    if !info.IsDir() {
    return fmt.Errorf("path is not a directory: %s", input.FolderPath)
    }
    return validateHierarchyIDs(input.DatasetID, input.LocationID, input.ClusterID, dbPath)
    }
    // validateHierarchyIDs validates dataset/location/cluster ID formats and database relationships
    func validateHierarchyIDs(datasetID, locationID, clusterID, dbPath string) error {
    // Validate ID formats first (fast fail before DB queries)
    if err := utils.ValidateShortID(datasetID, "dataset_id"); err != nil {
    return err
    }
    if err := utils.ValidateShortID(locationID, "location_id"); err != nil {
    return err
    }
    if err := utils.ValidateShortID(clusterID, "cluster_id"); err != nil {
    return err
    }
    return db.WithReadDB(dbPath, func(database *sql.DB) error {
    // Verify dataset exists, is active, and is 'structured' type
    if err := db.ValidateDatasetTypeForImport(database, datasetID); err != nil {
    return err
    }
    // Verify location exists and belongs to dataset
    if err := db.ValidateLocationBelongsToDataset(database, locationID, datasetID); err != nil {
    return err
    }
    // Verify cluster exists and belongs to location
    if err := db.ClusterBelongsToLocation(database, clusterID, locationID); err != nil {
    return err
    }
    return nil
    })
    }
  • file addition: import_file.go (----------)
    [0.1]
    package imp
    import (
    "context"
    "database/sql"
    "fmt"
    "os"
    "path/filepath"
    "strings"
    "time"
    "skraak/db"
    "skraak/utils"
    )
    // ImportFileInput defines the input parameters for the import_file tool
    type ImportFileInput struct {
    DBPath string `json:"db_path"`
    FilePath string `json:"file_path"`
    DatasetID string `json:"dataset_id"`
    LocationID string `json:"location_id"`
    ClusterID string `json:"cluster_id"`
    }
    // ImportFileOutput defines the output structure for the import_file tool
    type ImportFileOutput struct {
    FileID string `json:"file_id"`
    FileName string `json:"file_name"`
    Hash string `json:"hash"`
    Duration float64 `json:"duration_seconds"`
    SampleRate int `json:"sample_rate"`
    TimestampLocal time.Time `json:"timestamp_local"`
    IsAudioMoth bool `json:"is_audiomoth"`
    IsDuplicate bool `json:"is_duplicate"`
    ProcessingTime string `json:"processing_time"`
    Error *string `json:"error,omitempty"`
    }
    // ImportFile imports a single WAV file into the database with duplicate detection
    func ImportFile(
    ctx context.Context,
    input ImportFileInput,
    ) (ImportFileOutput, error) {
    startTime := time.Now()
    var output ImportFileOutput
    // Phase 1: Validate file path
    _, err := validateFilePath(input.FilePath)
    if err != nil {
    return output, fmt.Errorf("file validation failed: %w", err)
    }
    output.FileName = filepath.Base(input.FilePath)
    // Phase 2: Validate database hierarchy
    if err := validateHierarchyIDs(input.DatasetID, input.LocationID, input.ClusterID, db.ResolveDBPath(input.DBPath, "")); err != nil {
    return output, fmt.Errorf("hierarchy validation failed: %w", err)
    }
    // Phase 3: Open database connection (single connection for all DB operations)
    database, err := db.OpenWriteableDB(db.ResolveDBPath(input.DBPath, ""))
    if err != nil {
    return output, fmt.Errorf("database connection failed: %w", err)
    }
    defer database.Close()
    // Phase 4: Get location data for astronomical calculations
    locData, err := utils.GetLocationData(database, input.LocationID)
    if err != nil {
    return output, fmt.Errorf("failed to get location data: %w", err)
    }
    // Phase 5: Process file metadata
    result, err := utils.ProcessSingleFile(input.FilePath, locData.Latitude, locData.Longitude, locData.TimezoneID, true)
    if err != nil {
    errMsg := err.Error()
    output.Error = &errMsg
    output.ProcessingTime = time.Since(startTime).String()
    return output, fmt.Errorf("file processing failed: %w", err)
    }
    // Populate output with extracted metadata
    output.FileName = result.FileName
    output.Hash = result.Hash
    output.Duration = result.Duration
    output.SampleRate = result.SampleRate
    output.TimestampLocal = result.TimestampLocal
    output.IsAudioMoth = result.IsAudioMoth
    // Phase 6: Ensure cluster path is set
    if err := utils.EnsureClusterPath(database, input.ClusterID, filepath.Dir(input.FilePath)); err != nil {
    return output, fmt.Errorf("failed to set cluster path: %w", err)
    }
    // Phase 7: Insert into database
    fileID, isDuplicate, err := insertFileIntoDB(ctx, database, result, input.DatasetID, input.ClusterID, input.LocationID)
    if err != nil {
    errMsg := err.Error()
    output.Error = &errMsg
    output.ProcessingTime = time.Since(startTime).String()
    return output, fmt.Errorf("database insertion failed: %w", err)
    }
    output.FileID = fileID
    output.IsDuplicate = isDuplicate
    output.ProcessingTime = time.Since(startTime).String()
    return output, nil
    }
    // validateFilePath validates the file exists, is a regular file, is a WAV file, and is not empty
    func validateFilePath(filePath string) (os.FileInfo, error) {
    // Check file exists
    info, err := os.Stat(filePath)
    if err != nil {
    if os.IsNotExist(err) {
    return nil, fmt.Errorf("file does not exist: %s", filePath)
    }
    return nil, fmt.Errorf("cannot access file: %w", err)
    }
    // Check it's a regular file
    if !info.Mode().IsRegular() {
    return nil, fmt.Errorf("path is not a regular file: %s", filePath)
    }
    // Check extension is .wav (case-insensitive)
    ext := strings.ToLower(filepath.Ext(filePath))
    if ext != ".wav" {
    return nil, fmt.Errorf("file must be a WAV file (got extension: %s)", ext)
    }
    // Check file is not empty
    if info.Size() == 0 {
    return nil, fmt.Errorf("file is empty: %s", filePath)
    }
    return info, nil
    }
    // insertFileIntoDB inserts a single file into the database
    // Returns (fileID, isDuplicate, error)
    func insertFileIntoDB(
    ctx context.Context,
    database *sql.DB,
    result *utils.FileProcessingResult,
    datasetID, clusterID, locationID string,
    ) (string, bool, error) {
    // Begin logged transaction
    tx, err := db.BeginLoggedTx(ctx, database, "import_audio_file")
    if err != nil {
    return "", false, fmt.Errorf("failed to begin transaction: %w", err)
    }
    defer tx.Rollback() // Rollback if not committed
    // Check for duplicate hash
    existingID, isDup, err := utils.CheckDuplicateHash(tx, result.Hash)
    if err != nil {
    return "", false, err
    }
    if isDup {
    return existingID, true, nil
    }
    // Generate file ID
    fileID, err := utils.GenerateLongID()
    if err != nil {
    return "", false, fmt.Errorf("ID generation failed: %w", err)
    }
    // Insert file record
    _, err = tx.ExecContext(ctx, `
    INSERT INTO file (
    id, file_name, xxh64_hash, location_id, timestamp_local,
    cluster_id, duration, sample_rate, maybe_solar_night, maybe_civil_night,
    moon_phase, created_at, last_modified, active
    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, now(), now(), true)
    `,
    fileID, result.FileName, result.Hash, locationID,
    result.TimestampLocal, clusterID, result.Duration, result.SampleRate,
    result.AstroData.SolarNight, result.AstroData.CivilNight, result.AstroData.MoonPhase,
    )
    if err != nil {
    return "", false, fmt.Errorf("file insert failed: %w", err)
    }
    // Insert file_dataset junction
    _, err = tx.ExecContext(ctx, `
    INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified)
    VALUES (?, ?, now(), now())
    `, fileID, datasetID)
    if err != nil {
    return "", false, fmt.Errorf("file_dataset insert failed: %w", err)
    }
    // If AudioMoth, insert moth_metadata
    if result.IsAudioMoth && result.MothData != nil {
    _, err = tx.ExecContext(ctx, `
    INSERT INTO moth_metadata (
    file_id, timestamp, recorder_id, gain, battery_v, temp_c,
    created_at, last_modified, active
    ) VALUES (?, ?, ?, ?, ?, ?, now(), now(), true)
    `,
    fileID,
    result.MothData.Timestamp,
    &result.MothData.RecorderID,
    &result.MothData.Gain,
    &result.MothData.BatteryV,
    &result.MothData.TempC,
    )
    if err != nil {
    return "", false, fmt.Errorf("moth_metadata insert failed: %w", err)
    }
    }
    // Commit transaction
    if err = tx.Commit(); err != nil {
    return "", false, fmt.Errorf("transaction commit failed: %w", err)
    }
    return fileID, false, nil
    }
  • file addition: bulk_file_import.go (----------)
    [0.1]
    package imp
    import (
    "context"
    "database/sql"
    "encoding/csv"
    "fmt"
    "os"
    "path/filepath"
    "strconv"
    "strings"
    "time"
    "skraak/db"
    "skraak/utils"
    )
    // BulkFileImportInput defines the input parameters for the bulk_file_import tool
    type BulkFileImportInput struct {
    DBPath string `json:"db_path"`
    DatasetID string `json:"dataset_id"`
    CSVPath string `json:"csv_path"`
    LogFilePath string `json:"log_file_path"`
    }
    // BulkFileImportOutput defines the output structure for the bulk_file_import tool
    type BulkFileImportOutput struct {
    TotalLocations int `json:"total_locations"`
    ClustersCreated int `json:"clusters_created"`
    ClustersExisting int `json:"clusters_existing"`
    TotalFilesScanned int `json:"total_files_scanned"`
    FilesImported int `json:"files_imported"`
    FilesDuplicate int `json:"files_duplicate"`
    FilesError int `json:"files_error"`
    ProcessingTime string `json:"processing_time"`
    Errors []string `json:"errors,omitempty"`
    }
    // bulkLocationData holds CSV row data for a location
    type bulkLocationData struct {
    LocationName string
    LocationID string
    DirectoryPath string
    DateRange string
    SampleRate int
    FileCount int
    }
    // bulkImportStats tracks import statistics for a single cluster
    type bulkImportStats struct {
    TotalFiles int
    ImportedFiles int
    DuplicateFiles int
    ErrorFiles int
    }
    // progressLogger handles writing to both log file and internal buffer
    type progressLogger struct {
    file *os.File
    buffer *strings.Builder
    }
    // Log writes a formatted message with timestamp to both log file and buffer
    func (l *progressLogger) Log(format string, args ...any) {
    timestamp := time.Now().Format("2006-01-02 15:04:05")
    message := fmt.Sprintf(format, args...)
    line := fmt.Sprintf("[%s] %s\n", timestamp, message)
    // Write to file; log write failures are non-fatal for import progress
    if _, err := l.file.WriteString(line); err != nil {
    fmt.Fprintf(os.Stderr, "Warning: log write failed: %v\n", err)
    }
    if err := l.file.Sync(); err != nil {
    fmt.Fprintf(os.Stderr, "Warning: log sync failed: %v\n", err)
    }
    // Also keep in memory for potential error reporting
    l.buffer.WriteString(line)
    }
    // BulkFileImport imports WAV files across multiple locations using CSV specification
    // failOutput sets error details and processing time on the output before returning.
    func (o *BulkFileImportOutput) failOutput(errs []string, startTime time.Time) {
    o.Errors = errs
    o.ProcessingTime = time.Since(startTime).String()
    }
    // BulkFileImport imports WAV files across multiple locations using CSV specification
    func BulkFileImport(
    ctx context.Context,
    input BulkFileImportInput,
    ) (BulkFileImportOutput, error) {
    startTime := time.Now()
    var output BulkFileImportOutput
    // Open log file
    logFile, err := os.OpenFile(input.LogFilePath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
    if err != nil {
    return output, fmt.Errorf("failed to open log file: %w", err)
    }
    defer func() { _ = logFile.Close() }()
    logger := &progressLogger{
    file: logFile,
    buffer: &strings.Builder{},
    }
    logger.Log("Starting bulk file import for dataset %s", input.DatasetID)
    // Phase 0: Validate input
    logger.Log("Validating input parameters...")
    if err := bulkValidateInput(input); err != nil {
    logger.Log("ERROR: Validation failed: %v", err)
    output.failOutput([]string{fmt.Sprintf("validation failed: %v", err)}, startTime)
    return output, fmt.Errorf("validation failed: %w", err)
    }
    logger.Log("Validation complete")
    // Phase 1: Read CSV
    logger.Log("Reading CSV file: %s", input.CSVPath)
    locations, err := bulkReadCSV(input.CSVPath)
    if err != nil {
    logger.Log("ERROR: Failed to read CSV: %v", err)
    output.failOutput([]string{fmt.Sprintf("failed to read CSV: %v", err)}, startTime)
    return output, fmt.Errorf("failed to read CSV: %w", err)
    }
    logger.Log("Loaded %d locations from CSV", len(locations))
    output.TotalLocations = len(locations)
    // Phase 1.5: Validate all location_ids belong to the dataset
    logger.Log("Validating location_ids belong to dataset...")
    if err := bulkValidateLocations(logger, locations, input.DatasetID, db.ResolveDBPath(input.DBPath, "")); err != nil {
    output.failOutput([]string{err.Error()}, startTime)
    return output, err
    }
    logger.Log("Location validation complete")
    // Phase 2: Create/Validate Clusters
    logger.Log("=== Phase 1: Creating/Validating Clusters ===")
    database, err := db.OpenWriteableDB(db.ResolveDBPath(input.DBPath, ""))
    if err != nil {
    logger.Log("ERROR: Failed to open database: %v", err)
    output.failOutput([]string{fmt.Sprintf("failed to open database: %v", err)}, startTime)
    return output, fmt.Errorf("failed to open database: %w", err)
    }
    defer database.Close()
    clusterIDMap, created, existing, err := bulkCreateClusters(ctx, database, logger, locations, input.DatasetID)
    if err != nil {
    output.failOutput(output.Errors, startTime)
    return output, err
    }
    output.ClustersCreated = created
    output.ClustersExisting = existing
    // Phase 3: Import files
    logger.Log("=== Phase 2: Importing Files ===")
    fileStats, errs := bulkImportAllFiles(database, logger, locations, clusterIDMap, input.DatasetID)
    output.TotalFilesScanned = fileStats.TotalFiles
    output.FilesImported = fileStats.ImportedFiles
    output.FilesDuplicate = fileStats.DuplicateFiles
    output.FilesError = fileStats.ErrorFiles
    output.Errors = append(output.Errors, errs...)
    if len(errs) > 0 {
    output.ProcessingTime = time.Since(startTime).String()
    return output, fmt.Errorf("failed to import files: %s", errs[0])
    }
    logger.Log("=== Import Complete ===")
    logger.Log("Total files scanned: %d", fileStats.TotalFiles)
    logger.Log("Files imported: %d", fileStats.ImportedFiles)
    logger.Log("Duplicates skipped: %d", fileStats.DuplicateFiles)
    logger.Log("Errors: %d", fileStats.ErrorFiles)
    logger.Log("Processing time: %s", time.Since(startTime).Round(time.Second))
    output.ProcessingTime = time.Since(startTime).String()
    return output, nil
    }
    // bulkValidateInput validates input parameters
    func bulkValidateInput(input BulkFileImportInput) error {
    // Validate ID format first (fast fail before DB queries)
    if err := utils.ValidateShortID(input.DatasetID, "dataset_id"); err != nil {
    return err
    }
    // Verify CSV file exists
    if _, err := os.Stat(input.CSVPath); err != nil {
    return fmt.Errorf("CSV file not accessible: %w", err)
    }
    // Verify log file path is writable
    logDir := filepath.Dir(input.LogFilePath)
    if _, err := os.Stat(logDir); err != nil {
    return fmt.Errorf("log file directory not accessible: %w", err)
    }
    // Open database for validation queries
    database, err := db.OpenReadOnlyDB(db.ResolveDBPath(input.DBPath, ""))
    if err != nil {
    return fmt.Errorf("failed to open database: %w", err)
    }
    defer database.Close()
    // Verify dataset exists and is structured
    if err := db.ValidateDatasetTypeForImport(database, input.DatasetID); err != nil {
    return err
    }
    return nil
    }
    // bulkValidateLocationsBelongToDataset validates that all unique location_ids in the CSV belong to the dataset
    func bulkValidateLocationsBelongToDataset(dbConn *sql.DB, locations []bulkLocationData, datasetID string) []string {
    var errors []string
    // Collect unique location_ids
    uniqueLocations := make(map[string]bool)
    for _, loc := range locations {
    uniqueLocations[loc.LocationID] = true
    }
    // Validate each unique location_id
    for locationID := range uniqueLocations {
    if err := db.ValidateLocationBelongsToDataset(dbConn, locationID, datasetID); err != nil {
    errors = append(errors, err.Error())
    }
    }
    return errors
    }
    // bulkValidateLocations validates that all location_ids in the CSV belong to the dataset.
    // Returns an error if validation fails.
    func bulkValidateLocations(logger *progressLogger, locations []bulkLocationData, datasetID string, dbPath string) error {
    readDB, err := db.OpenReadOnlyDB(dbPath)
    if err != nil {
    logger.Log("ERROR: Failed to open database: %v", err)
    return fmt.Errorf("failed to open database: %w", err)
    }
    locationErrors := bulkValidateLocationsBelongToDataset(readDB, locations, datasetID)
    readDB.Close()
    if len(locationErrors) > 0 {
    for _, locErr := range locationErrors {
    logger.Log("ERROR: %s", locErr)
    }
    return fmt.Errorf("location validation failed: %d location(s) do not belong to dataset %s", len(locationErrors), datasetID)
    }
    return nil
    }
    // bulkCreateClusters creates or validates clusters for all locations.
    // Returns the cluster ID map, counts of created/existing clusters, and any error.
    func bulkCreateClusters(ctx context.Context, database *sql.DB, logger *progressLogger, locations []bulkLocationData, datasetID string) (map[string]string, int, int, error) {
    clusterIDMap := make(map[string]string)
    created := 0
    existing := 0
    for i, loc := range locations {
    logger.Log("[%d/%d] Processing location: %s", i+1, len(locations), loc.LocationName)
    var existingClusterID string
    err := database.QueryRow(`
    SELECT id FROM cluster
    WHERE location_id = ? AND name = ? AND active = true
    `, loc.LocationID, loc.DateRange).Scan(&existingClusterID)
    var clusterID string
    if err == sql.ErrNoRows {
    clusterID, err = bulkCreateCluster(ctx, database, datasetID, loc.LocationID, loc.DateRange, loc.SampleRate)
    if err != nil {
    logger.Log("ERROR: Failed to create cluster for location %s: %v", loc.LocationName, err)
    return nil, 0, 0, fmt.Errorf("failed to create cluster: %w", err)
    }
    logger.Log(" Created cluster: %s", clusterID)
    created++
    } else if err != nil {
    logger.Log("ERROR: Failed to check cluster for location %s: %v", loc.LocationName, err)
    return nil, 0, 0, fmt.Errorf("failed to check cluster: %w", err)
    } else {
    clusterID = existingClusterID
    logger.Log(" Using existing cluster: %s", clusterID)
    existing++
    }
    compositeKey := loc.LocationID + "|" + loc.DateRange
    clusterIDMap[compositeKey] = clusterID
    }
    return clusterIDMap, created, existing, nil
    }
    // bulkImportAllFiles imports files for all locations using the cluster ID map.
    // Returns aggregate stats and any error messages.
    func bulkImportAllFiles(database *sql.DB, logger *progressLogger, locations []bulkLocationData, clusterIDMap map[string]string, datasetID string) (bulkImportStats, []string) {
    var total bulkImportStats
    var errs []string
    for i, loc := range locations {
    compositeKey := loc.LocationID + "|" + loc.DateRange
    clusterID, ok := clusterIDMap[compositeKey]
    if !ok {
    continue
    }
    logger.Log("[%d/%d] Importing files for: %s", i+1, len(locations), loc.LocationName)
    logger.Log(" Directory: %s", loc.DirectoryPath)
    if _, err := os.Stat(loc.DirectoryPath); os.IsNotExist(err) {
    logger.Log(" WARNING: Directory not found, skipping")
    continue
    }
    stats, err := bulkImportFilesForCluster(database, logger, loc.DirectoryPath, datasetID, loc.LocationID, clusterID)
    if err != nil {
    errMsg := fmt.Sprintf("Failed to import files for location %s: %v", loc.LocationName, err)
    logger.Log("ERROR: %s", errMsg)
    return total, []string{errMsg}
    }
    logger.Log(" Scanned: %d files", stats.TotalFiles)
    logger.Log(" Imported: %d, Duplicates: %d", stats.ImportedFiles, stats.DuplicateFiles)
    if stats.ErrorFiles > 0 {
    logger.Log(" Errors: %d files", stats.ErrorFiles)
    }
    total.TotalFiles += stats.TotalFiles
    total.ImportedFiles += stats.ImportedFiles
    total.DuplicateFiles += stats.DuplicateFiles
    total.ErrorFiles += stats.ErrorFiles
    }
    return total, errs
    }
    func bulkReadCSV(path string) ([]bulkLocationData, error) {
    file, err := os.Open(path)
    if err != nil {
    return nil, err
    }
    defer func() { _ = file.Close() }()
    reader := csv.NewReader(file)
    records, err := reader.ReadAll()
    if err != nil {
    return nil, err
    }
    if len(records) == 0 {
    return nil, fmt.Errorf("CSV file is empty")
    }
    var locations []bulkLocationData
    for i, record := range records {
    if i == 0 {
    continue // Skip header
    }
    if len(record) < 6 {
    return nil, fmt.Errorf("CSV row %d has insufficient columns (expected 6, got %d)", i+1, len(record))
    }
    // Validate required string fields are non-empty
    locationName := strings.TrimSpace(record[0])
    if locationName == "" {
    return nil, fmt.Errorf("empty location_name in row %d", i+1)
    }
    directoryPath := strings.TrimSpace(record[2])
    if directoryPath == "" {
    return nil, fmt.Errorf("empty directory_path in row %d", i+1)
    }
    dateRange := strings.TrimSpace(record[3])
    if dateRange == "" {
    return nil, fmt.Errorf("empty date_range in row %d", i+1)
    }
    // Validate location_id format
    locationID := record[1]
    if err := utils.ValidateShortID(locationID, "location_id"); err != nil {
    return nil, fmt.Errorf("invalid location_id in row %d: %v", i+1, err)
    }
    sampleRate, err := strconv.Atoi(record[4])
    if err != nil {
    return nil, fmt.Errorf("invalid sample_rate in row %d: %v", i+1, err)
    }
    // Validate sample rate is in reasonable range
    if err := utils.ValidateSampleRate(sampleRate); err != nil {
    return nil, fmt.Errorf("invalid sample_rate in row %d: %v", i+1, err)
    }
    fileCount, err := strconv.Atoi(record[5])
    if err != nil {
    return nil, fmt.Errorf("invalid file_count in row %d: %v", i+1, err)
    }
    locations = append(locations, bulkLocationData{
    LocationName: locationName,
    LocationID: locationID,
    DirectoryPath: directoryPath,
    DateRange: dateRange,
    SampleRate: sampleRate,
    FileCount: fileCount,
    })
    }
    return locations, nil
    }
    // bulkCreateCluster creates a new cluster in the database
    func bulkCreateCluster(ctx context.Context, database *sql.DB, datasetID, locationID, name string, sampleRate int) (string, error) {
    // Generate a 12-character nanoid
    clusterID, err := utils.GenerateShortID()
    if err != nil {
    return "", fmt.Errorf("failed to generate cluster ID: %v", err)
    }
    now := time.Now().UTC()
    // Get location name for the path
    var locationName string
    err = database.QueryRow("SELECT name FROM location WHERE id = ?", locationID).Scan(&locationName)
    if err != nil {
    return "", fmt.Errorf("failed to get location name: %v", err)
    }
    // Normalize path: replace spaces and special characters
    path := strings.ReplaceAll(locationName, " ", "_")
    path = strings.ReplaceAll(path, "/", "_")
    tx, err := db.BeginLoggedTx(ctx, database, "bulk_file_import")
    if err != nil {
    return "", fmt.Errorf("failed to begin transaction: %w", err)
    }
    defer tx.Rollback()
    _, err = tx.ExecContext(ctx, `
    INSERT INTO cluster (id, dataset_id, location_id, name, path, sample_rate, active, created_at, last_modified)
    VALUES (?, ?, ?, ?, ?, ?, true, ?, ?)
    `, clusterID, datasetID, locationID, name, path, sampleRate, now, now)
    if err != nil {
    return "", fmt.Errorf("failed to insert cluster: %w", err)
    }
    if err = tx.Commit(); err != nil {
    return "", fmt.Errorf("failed to commit cluster creation: %w", err)
    }
    return clusterID, nil
    }
    // bulkImportFilesForCluster imports all WAV files for a single cluster
    func bulkImportFilesForCluster(database *sql.DB, logger *progressLogger, folderPath, datasetID, locationID, clusterID string) (*bulkImportStats, error) {
    stats := &bulkImportStats{}
    // Check if directory exists
    if _, err := os.Stat(folderPath); os.IsNotExist(err) {
    logger.Log(" WARNING: Directory not found, skipping")
    return stats, nil
    }
    // Import the cluster (SAME LOGIC AS import_files.go)
    logger.Log(" Importing cluster %s", clusterID)
    ctx := context.Background()
    tx, err := db.BeginLoggedTx(ctx, database, "import_audio_files")
    if err != nil {
    return nil, fmt.Errorf("failed to begin transaction: %w", err)
    }
    clusterOutput, err := utils.ImportCluster(database, tx.UnderlyingTx(), utils.ClusterImportInput{
    FolderPath: folderPath,
    DatasetID: datasetID,
    LocationID: locationID,
    ClusterID: clusterID,
    Recursive: true,
    })
    if err != nil {
    tx.Rollback()
    return nil, err
    }
    if err := tx.Commit(); err != nil {
    return nil, fmt.Errorf("transaction commit failed: %w", err)
    }
    // Map to bulk import stats
    stats.TotalFiles = clusterOutput.TotalFiles
    stats.ImportedFiles = clusterOutput.ImportedFiles
    stats.DuplicateFiles = clusterOutput.SkippedFiles
    stats.ErrorFiles = clusterOutput.FailedFiles
    // Log errors
    for i, fileErr := range clusterOutput.Errors {
    if i < 5 { // Log first 5
    logger.Log(" ERROR: %s: %s", fileErr.FileName, fileErr.Error)
    }
    }
    logger.Log(" Complete: %d imported, %d duplicates, %d errors", stats.ImportedFiles, stats.DuplicateFiles, stats.ErrorFiles)
    return stats, nil
    }
  • file addition: calls (d--r------)
    [6.248737]
  • file addition: parallel_aggregate.go (----------)
    [0.67281]
    package calls
    import (
    "fmt"
    "os"
    "path/filepath"
    "sort"
    "sync/atomic"
    )
    // parallelResult is the common interface for birda/raven worker results.
    type parallelResult interface {
    filePath() string
    getCalls() []ClusteredCall
    wasWritten() bool
    wasSkipped() bool
    getError() error
    }
    // aggregateStats holds the collected results from a parallel fan-out/fan-in.
    type aggregateStats struct {
    calls []ClusteredCall
    speciesCount map[string]int
    dataFilesWritten int
    dataFilesSkipped int
    filesProcessed int
    filesDeleted int
    firstErr error
    }
    // aggregateResults collects results from a channel of parallelResult values,
    // handling error tracking, species counting, optional file deletion, and
    // progress reporting. Returns the aggregated stats.
    func aggregateResults(
    results <-chan parallelResult,
    total int,
    processed *atomic.Int32,
    deleteFiles bool,
    progressHandler func(int, int, string),
    ) aggregateStats {
    var stats aggregateStats
    stats.speciesCount = make(map[string]int)
    for result := range results {
    if err := result.getError(); err != nil && stats.firstErr == nil {
    stats.firstErr = err
    }
    if result.wasWritten() {
    stats.dataFilesWritten++
    }
    if result.wasSkipped() {
    stats.dataFilesSkipped++
    }
    for _, call := range result.getCalls() {
    stats.calls = append(stats.calls, call)
    stats.speciesCount[call.EbirdCode]++
    }
    stats.filesProcessed++
    stats.maybeDeleteFile(deleteFiles, result)
    if progressHandler != nil {
    current := int(processed.Add(1))
    progressHandler(current, total, filepath.Base(result.filePath()))
    }
    }
    return stats
    }
    // maybeDeleteFile deletes the source file if requested and it was successfully processed.
    func (s *aggregateStats) maybeDeleteFile(deleteFiles bool, result parallelResult) {
    if !deleteFiles || !result.wasWritten() {
    return
    }
    if err := os.Remove(result.filePath()); err != nil {
    if s.firstErr == nil {
    s.firstErr = fmt.Errorf("failed to delete %s: %w", result.filePath(), err)
    }
    } else {
    s.filesDeleted++
    }
    }
    // sortCallsByFileAndTime sorts calls by filename, then start time.
    func sortCallsByFileAndTime(calls []ClusteredCall) {
    sort.Slice(calls, func(i, j int) bool {
    if calls[i].File != calls[j].File {
    return calls[i].File < calls[j].File
    }
    return calls[i].StartTime < calls[j].StartTime
    })
    }
  • file addition: isnight.go (----------)
    [0.67281]
    package calls
    import (
    "fmt"
    "strings"
    "time"
    "github.com/sixdouglas/suncalc"
    "skraak/utils"
    )
    // IsNightInput defines the input parameters for the isnight tool
    type IsNightInput struct {
    FilePath string `json:"file_path"`
    Lat float64 `json:"lat"`
    Lng float64 `json:"lng"`
    Timezone string `json:"timezone,omitempty"`
    }
    // IsNightOutput defines the output structure for the isnight tool
    type IsNightOutput struct {
    FilePath string `json:"file_path"`
    TimestampUTC string `json:"timestamp_utc"`
    SolarNight bool `json:"solar_night"`
    CivilNight bool `json:"civil_night"`
    DiurnalActive bool `json:"diurnal_active"`
    MoonPhase float64 `json:"moon_phase"`
    DurationSec float64 `json:"duration_seconds"`
    TimestampSrc string `json:"timestamp_source"`
    MidpointUTC string `json:"midpoint_utc"`
    SunriseUTC string `json:"sunrise_utc,omitempty"`
    SunsetUTC string `json:"sunset_utc,omitempty"`
    DawnUTC string `json:"dawn_utc,omitempty"`
    DuskUTC string `json:"dusk_utc,omitempty"`
    }
    // IsNight determines if a WAV file was recorded at night based on its
    // metadata timestamp and the given GPS coordinates.
    //
    // Timestamp resolution order:
    // 1. AudioMoth comment (timezone embedded)
    // 2. Filename timestamp + timezone offset (requires --timezone)
    // 3. File modification time (system local time)
    func IsNight(input IsNightInput) (IsNightOutput, error) {
    var output IsNightOutput
    // Step 1: Parse WAV header
    metadata, err := utils.ParseWAVHeader(input.FilePath)
    if err != nil {
    return output, fmt.Errorf("WAV header parsing failed: %w", err)
    }
    output.DurationSec = metadata.Duration
    // Step 2: Resolve timestamp (use file mod time as fallback)
    tsResult, err := utils.ResolveTimestamp(metadata, input.FilePath, input.Timezone, true, nil)
    if err != nil {
    return output, fmt.Errorf("cannot determine recording timestamp: %w", err)
    }
    // Determine timestamp source label
    tsSource := "file_mod_time"
    if tsResult.IsAudioMoth {
    tsSource = "audiomoth_comment"
    } else if utils.HasTimestampFilename(input.FilePath) {
    tsSource = "filename"
    }
    // Step 3: Calculate astronomical data using recording midpoint
    astroData := utils.CalculateAstronomicalData(
    tsResult.Timestamp.UTC(),
    metadata.Duration,
    input.Lat,
    input.Lng,
    )
    // Step 4: Get sun event times for informational output
    midpoint := utils.CalculateMidpointTime(tsResult.Timestamp.UTC(), metadata.Duration)
    sunTimes := suncalc.GetTimes(midpoint, input.Lat, input.Lng)
    output.FilePath = input.FilePath
    output.TimestampUTC = tsResult.Timestamp.UTC().Format(time.RFC3339)
    output.SolarNight = astroData.SolarNight
    output.CivilNight = astroData.CivilNight
    output.MoonPhase = astroData.MoonPhase
    output.TimestampSrc = tsSource
    output.MidpointUTC = midpoint.Format(time.RFC3339)
    populateSunTimes(&output, sunTimes, midpoint)
    return output, nil
    }
    // sunTimeUTC returns the UTC RFC3339 string for a suncalc event, or "" if absent/zero.
    func sunTimeUTC(sunTimes map[suncalc.DayTimeName]suncalc.DayTime, name suncalc.DayTimeName) string {
    if entry, ok := sunTimes[name]; ok && !entry.Value.IsZero() {
    return entry.Value.UTC().Format(time.RFC3339)
    }
    return ""
    }
    // populateSunTimes fills in sun event times and diurnal status from suncalc results.
    func populateSunTimes(output *IsNightOutput, sunTimes map[suncalc.DayTimeName]suncalc.DayTime, midpoint time.Time) {
    // Diurnal: midpoint is between dawn and sunset
    if dawn, ok := sunTimes[suncalc.Dawn]; ok && !dawn.Value.IsZero() {
    if sunset, ok := sunTimes[suncalc.Sunset]; ok && !sunset.Value.IsZero() {
    output.DiurnalActive = !midpoint.Before(dawn.Value) && !midpoint.After(sunset.Value)
    }
    }
    output.SunriseUTC = sunTimeUTC(sunTimes, suncalc.Sunrise)
    output.SunsetUTC = sunTimeUTC(sunTimes, suncalc.Sunset)
    output.DawnUTC = sunTimeUTC(sunTimes, suncalc.Dawn)
    output.DuskUTC = sunTimeUTC(sunTimes, suncalc.Dusk)
    }
    // String returns a human-readable summary of the isnight result
    func (o IsNightOutput) String() string {
    var sb strings.Builder
    fmt.Fprintf(&sb, "File: %s\n", o.FilePath)
    fmt.Fprintf(&sb, "Timestamp (UTC): %s\n", o.TimestampUTC)
    fmt.Fprintf(&sb, "Midpoint (UTC): %s\n", o.MidpointUTC)
    fmt.Fprintf(&sb, "Duration: %.1f seconds\n", o.DurationSec)
    fmt.Fprintf(&sb, "Source: %s\n", o.TimestampSrc)
    fmt.Fprintf(&sb, "Solar night: %v\n", o.SolarNight)
    fmt.Fprintf(&sb, "Civil night: %v\n", o.CivilNight)
    fmt.Fprintf(&sb, "Moon phase: %.2f\n", o.MoonPhase)
    if o.SunriseUTC != "" {
    fmt.Fprintf(&sb, "Sunrise (UTC): %s\n", o.SunriseUTC)
    }
    if o.SunsetUTC != "" {
    fmt.Fprintf(&sb, "Sunset (UTC): %s\n", o.SunsetUTC)
    }
    if o.DawnUTC != "" {
    fmt.Fprintf(&sb, "Dawn (UTC): %s\n", o.DawnUTC)
    }
    if o.DuskUTC != "" {
    fmt.Fprintf(&sb, "Dusk (UTC): %s\n", o.DuskUTC)
    }
    return sb.String()
    }
  • file addition: calls_summarise.go (----------)
    [0.67281]
    package calls
    import (
    "sort"
    "strings"
    "skraak/utils"
    )
    // CallsSummariseInput defines the input for the calls-summarise tool
    type CallsSummariseInput struct {
    Folder string `json:"folder"`
    Brief bool `json:"brief"`
    Filter string `json:"filter,omitempty"`
    }
    // CallsSummariseOutput defines the output for the calls-summarise tool
    type CallsSummariseOutput struct {
    Segments []SegmentSummary `json:"segments"`
    Folder string `json:"folder"`
    DataFilesRead int `json:"data_files_read"`
    DataFilesSkipped []string `json:"data_files_skipped"`
    TotalSegments int `json:"total_segments"`
    Filters map[string]FilterStats `json:"filters"`
    ReviewStatus ReviewStatus `json:"review_status"`
    Operators []string `json:"operators"`
    Reviewers []string `json:"reviewers"`
    Error *string `json:"error,omitempty"`
    }
    // SegmentSummary represents a single segment in the output
    type SegmentSummary struct {
    File string `json:"file"`
    StartTime float64 `json:"start_time"`
    EndTime float64 `json:"end_time"`
    Labels []LabelSummary `json:"labels"`
    }
    // LabelSummary represents a label in the output (omits empty fields)
    type LabelSummary struct {
    Filter string `json:"filter"`
    Certainty int `json:"certainty"`
    Species string `json:"species"`
    CallType string `json:"calltype,omitempty"`
    Comment string `json:"comment,omitempty"`
    Bookmark bool `json:"bookmark,omitempty"`
    }
    // FilterStats contains per-filter statistics
    type FilterStats struct {
    Segments int `json:"segments"`
    Species map[string]int `json:"species"`
    Calltypes map[string]map[string]int `json:"calltypes,omitempty"` // species -> calltype -> count
    }
    // ReviewStatus contains review progress statistics
    type ReviewStatus struct {
    Unreviewed int `json:"unreviewed"` // certainty < 100
    Confirmed int `json:"confirmed"` // certainty = 100
    DontKnow int `json:"dont_know"` // certainty = 0
    WithCallType int `json:"with_calltype"`
    WithComments int `json:"with_comments"`
    Bookmarked int `json:"bookmarked"`
    }
    // CallsSummarise reads all .data files in a folder and produces a summary
    func CallsSummarise(input CallsSummariseInput) (CallsSummariseOutput, error) {
    var output CallsSummariseOutput
    // Find all .data files
    filePaths, err := utils.FindDataFiles(input.Folder)
    if err != nil {
    errMsg := err.Error()
    output.Error = &errMsg
    return output, err
    }
    // Initialize empty slices/maps (avoid null in JSON)
    output.Segments = make([]SegmentSummary, 0)
    output.Folder = input.Folder
    output.Filters = make(map[string]FilterStats)
    output.Operators = make([]string, 0)
    output.Reviewers = make([]string, 0)
    output.DataFilesSkipped = make([]string, 0)
    if len(filePaths) == 0 {
    return output, nil
    }
    // Track unique operators and reviewers
    operatorSet := make(map[string]bool)
    reviewerSet := make(map[string]bool)
    summariseFiles(filePaths, input, &output, operatorSet, reviewerSet)
    // Count segments for total
    if input.Brief {
    for _, fs := range output.Filters {
    output.TotalSegments += fs.Segments
    }
    } else {
    output.TotalSegments = len(output.Segments)
    }
    finaliseSummary(&output, operatorSet, reviewerSet, input.Brief)
    return output, nil
    }
    // summariseFiles processes all data files, populating output stats
    func summariseFiles(filePaths []string, input CallsSummariseInput, output *CallsSummariseOutput, operatorSet, reviewerSet map[string]bool) {
    for _, path := range filePaths {
    df, err := utils.ParseDataFile(path)
    if err != nil {
    output.DataFilesSkipped = append(output.DataFilesSkipped, path)
    continue
    }
    output.DataFilesRead++
    trackMeta(df.Meta, operatorSet, reviewerSet)
    var relPath string
    if !input.Brief {
    relPath = extractRelativePath(input.Folder, path)
    }
    for _, seg := range df.Segments {
    filteredLabels := filterLabels(seg.Labels, input.Filter)
    if input.Filter != "" && len(filteredLabels) == 0 {
    continue
    }
    updateStatsFromLabels(filteredLabels, output)
    if !input.Brief {
    output.Segments = append(output.Segments, SegmentSummary{
    File: relPath,
    StartTime: seg.StartTime,
    EndTime: seg.EndTime,
    Labels: buildLabelSummaries(filteredLabels),
    })
    }
    }
    }
    }
    // trackMeta records operator and reviewer from file metadata
    func trackMeta(meta *utils.DataMeta, operatorSet, reviewerSet map[string]bool) {
    if meta == nil {
    return
    }
    if meta.Operator != "" {
    operatorSet[meta.Operator] = true
    }
    if meta.Reviewer != "" {
    reviewerSet[meta.Reviewer] = true
    }
    }
    // filterLabels returns labels matching the filter, or all labels if filter is empty
    func filterLabels(labels []*utils.Label, filter string) []*utils.Label {
    if filter == "" {
    return labels
    }
    var filtered []*utils.Label
    for _, l := range labels {
    if l.Filter == filter {
    filtered = append(filtered, l)
    }
    }
    return filtered
    }
    // buildLabelSummaries converts labels to label summaries
    func buildLabelSummaries(labels []*utils.Label) []LabelSummary {
    var summaries []LabelSummary
    for _, l := range labels {
    ls := LabelSummary{
    Filter: l.Filter,
    Certainty: l.Certainty,
    Species: l.Species,
    }
    if l.CallType != "" {
    ls.CallType = l.CallType
    }
    if l.Comment != "" {
    ls.Comment = l.Comment
    }
    if l.Bookmark {
    ls.Bookmark = true
    }
    summaries = append(summaries, ls)
    }
    return summaries
    }
    // updateStatsFromLabels updates filter stats and review status from a set of labels
    func updateStatsFromLabels(labels []*utils.Label, output *CallsSummariseOutput) {
    for _, l := range labels {
    updateFilterStats(l, output)
    updateReviewStatus(l, output)
    }
    }
    // updateFilterStats increments filter-level statistics for a single label
    func updateFilterStats(l *utils.Label, output *CallsSummariseOutput) {
    fs, exists := output.Filters[l.Filter]
    if !exists {
    fs = FilterStats{
    Segments: 0,
    Species: make(map[string]int),
    Calltypes: make(map[string]map[string]int),
    }
    }
    fs.Segments++
    fs.Species[l.Species]++
    if l.CallType != "" {
    if fs.Calltypes[l.Species] == nil {
    fs.Calltypes[l.Species] = make(map[string]int)
    }
    fs.Calltypes[l.Species][l.CallType]++
    }
    output.Filters[l.Filter] = fs
    }
    // updateReviewStatus increments review status counters for a single label
    func updateReviewStatus(l *utils.Label, output *CallsSummariseOutput) {
    switch l.Certainty {
    case 100:
    output.ReviewStatus.Confirmed++
    case 0:
    output.ReviewStatus.DontKnow++
    default:
    output.ReviewStatus.Unreviewed++
    }
    if l.CallType != "" {
    output.ReviewStatus.WithCallType++
    }
    if l.Comment != "" {
    output.ReviewStatus.WithComments++
    }
    if l.Bookmark {
    output.ReviewStatus.Bookmarked++
    }
    }
    // finaliseSummary sorts output, cleans empty maps, and converts sets to sorted slices
    func finaliseSummary(output *CallsSummariseOutput, operatorSet, reviewerSet map[string]bool, brief bool) {
    // Clean up empty calltypes maps
    for filter, fs := range output.Filters {
    if len(fs.Calltypes) == 0 {
    fs.Calltypes = nil
    output.Filters[filter] = fs
    }
    }
    // Convert sets to sorted slices
    for op := range operatorSet {
    output.Operators = append(output.Operators, op)
    }
    for r := range reviewerSet {
    output.Reviewers = append(output.Reviewers, r)
    }
    sort.Strings(output.Operators)
    sort.Strings(output.Reviewers)
    // Sort segments by file, then start time
    if !brief {
    sort.Slice(output.Segments, func(i, j int) bool {
    if output.Segments[i].File != output.Segments[j].File {
    return output.Segments[i].File < output.Segments[j].File
    }
    return output.Segments[i].StartTime < output.Segments[j].StartTime
    })
    }
    }
    // extractRelativePath extracts the audio filename from a .data file path
    // e.g., "/folder/tx51_LISTENING_20260221_203004.WAV.data" -> "tx51_LISTENING_20260221_203004.WAV"
    // Preserves the original case of the extension as-is.
    func extractRelativePath(folder, dataPath string) string {
    // Get the filename
    filename := dataPath
    if idx := strings.LastIndex(dataPath, "/"); idx >= 0 {
    filename = dataPath[idx+1:]
    }
    // Remove .data extension, preserve everything else
    return strings.TrimSuffix(filename, ".data")
    }
  • file addition: calls_show_images.go (----------)
    [0.67281]
    package calls
    import (
    "fmt"
    "os"
    "strings"
    "skraak/utils"
    )
    // CallsShowImagesInput defines the input for the show-images tool
    type CallsShowImagesInput struct {
    DataFilePath string `json:"data_file_path"`
    Color bool `json:"color"`
    ImageSize int `json:"image_size"`
    Sixel bool `json:"sixel"`
    ITerm bool `json:"iterm"`
    }
    // CallsShowImagesOutput defines the output for the show-images tool
    type CallsShowImagesOutput struct {
    SegmentsShown int `json:"segments_shown"`
    WavFile string `json:"wav_file"`
    Error string `json:"error,omitempty"`
    }
    // CallsShowImages reads a .data file and displays spectrogram images for each segment
    func CallsShowImages(input CallsShowImagesInput) (CallsShowImagesOutput, error) {
    var output CallsShowImagesOutput
    // Validate file exists
    if _, err := os.Stat(input.DataFilePath); os.IsNotExist(err) {
    output.Error = fmt.Sprintf("File not found: %s", input.DataFilePath)
    return output, fmt.Errorf("%s", output.Error)
    }
    // Derive WAV file path (strip .data suffix)
    wavPath := strings.TrimSuffix(input.DataFilePath, ".data")
    output.WavFile = wavPath
    // Check WAV file exists
    if _, err := os.Stat(wavPath); os.IsNotExist(err) {
    output.Error = fmt.Sprintf("WAV file not found: %s", wavPath)
    return output, fmt.Errorf("%s", output.Error)
    }
    // Parse .data file (includes labels for future filtering)
    dataFile, err := utils.ParseDataFile(input.DataFilePath)
    if err != nil {
    output.Error = err.Error()
    return output, fmt.Errorf("%s", output.Error)
    }
    if len(dataFile.Segments) == 0 {
    output.Error = "No segments found in .data file"
    return output, fmt.Errorf("%s", output.Error)
    }
    // Resolve image size
    imgSize := input.ImageSize
    if imgSize == 0 {
    imgSize = utils.SpectrogramDisplaySize
    }
    // Select graphics protocol
    protocol := utils.ProtocolKitty
    if input.ITerm {
    protocol = utils.ProtocolITerm
    } else if input.Sixel {
    protocol = utils.ProtocolSixel
    }
    // Generate spectrogram for each segment and output
    for i, seg := range dataFile.Segments {
    // Generate spectrogram image
    img, err := utils.GenerateSegmentSpectrogram(input.DataFilePath, seg.StartTime, seg.EndTime, input.Color, imgSize)
    if err != nil || img == nil {
    continue
    }
    // Print segment info
    labelInfo := formatSegmentLabels(seg.Labels)
    fmt.Fprintf(os.Stderr, "Segment %d: %.1fs - %.1fs (%.1fs)%s\n",
    i+1, seg.StartTime, seg.EndTime, seg.EndTime-seg.StartTime, labelInfo)
    // Write to stdout via terminal graphics protocol
    if err := utils.WriteImage(img, os.Stdout, protocol); err != nil {
    output.Error = fmt.Sprintf("Failed to write image: %v", err)
    return output, fmt.Errorf("%s", output.Error)
    }
    fmt.Println() // Newline after image
    }
    output.SegmentsShown = len(dataFile.Segments)
    return output, nil
    }
    // formatSegmentLabels formats labels for display in segment info
    func formatSegmentLabels(labels []*utils.Label) string {
    if len(labels) == 0 {
    return ""
    }
    var parts []string
    for _, l := range labels {
    part := l.Species
    if l.CallType != "" {
    part += "/" + l.CallType
    }
    if l.Filter != "" {
    part += " [" + l.Filter + "]"
    }
    parts = append(parts, part)
    }
    return " " + strings.Join(parts, ", ")
    }
  • file addition: calls_push_certainty_test.go (----------)
    [0.67281]
    package calls
    import (
    "encoding/json"
    "os"
    "path/filepath"
    "testing"
    "skraak/utils"
    )
    func TestPushCertaintyPromotesMatchingLabels(t *testing.T) {
    tempDir := t.TempDir()
    // File with two Kiwi segments: certainty=90 and certainty=70
    file1 := `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]], [10, 20, 100, 1000, [{"species": "Kiwi", "certainty": 70}]]]`
    file1Path := filepath.Join(tempDir, "file1.data")
    if err := os.WriteFile(file1Path, []byte(file1), 0644); err != nil {
    t.Fatal(err)
    }
    // File with one Tomtit at certainty=90 (must not be promoted when species=Kiwi)
    file2 := `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Tomtit", "certainty": 90}]]]`
    file2Path := filepath.Join(tempDir, "file2.data")
    if err := os.WriteFile(file2Path, []byte(file2), 0644); err != nil {
    t.Fatal(err)
    }
    result, err := PushCertainty(PushCertaintyConfig{
    Folder: tempDir,
    Species: "Kiwi",
    Reviewer: "TestReviewer",
    })
    if err != nil {
    t.Fatal(err)
    }
    if result.SegmentsUpdated != 1 {
    t.Errorf("expected 1 segment updated, got %d", result.SegmentsUpdated)
    }
    if result.FilesUpdated != 1 {
    t.Errorf("expected 1 file updated, got %d", result.FilesUpdated)
    }
    // Verify file1: certainty=90 Kiwi → 100, certainty=70 Kiwi → unchanged
    df, err := utils.ParseDataFile(file1Path)
    if err != nil {
    t.Fatal(err)
    }
    if df.Segments[0].Labels[0].Certainty != 100 {
    t.Errorf("expected certainty=100, got %d", df.Segments[0].Labels[0].Certainty)
    }
    if df.Segments[1].Labels[0].Certainty != 70 {
    t.Errorf("expected certainty=70 unchanged, got %d", df.Segments[1].Labels[0].Certainty)
    }
    if df.Meta.Reviewer != "TestReviewer" {
    t.Errorf("expected reviewer=TestReviewer, got %q", df.Meta.Reviewer)
    }
    // Verify Tomtit file was not modified
    df2, err := utils.ParseDataFile(file2Path)
    if err != nil {
    t.Fatal(err)
    }
    if df2.Segments[0].Labels[0].Certainty != 90 {
    t.Errorf("Tomtit certainty should be unchanged at 90, got %d", df2.Segments[0].Labels[0].Certainty)
    }
    }
    func TestPushCertaintyFilterScope(t *testing.T) {
    tempDir := t.TempDir()
    // Segment has two labels from different filters, both Kiwi certainty=90
    data := []any{
    map[string]any{"Operator": "test"},
    []any{0.0, 10.0, 100.0, 1000.0, []any{
    map[string]any{"species": "Kiwi", "certainty": 90, "filter": "model-a"},
    map[string]any{"species": "Kiwi", "certainty": 90, "filter": "model-b"},
    }},
    }
    raw, _ := json.Marshal(data)
    filePath := filepath.Join(tempDir, "file1.data")
    if err := os.WriteFile(filePath, raw, 0644); err != nil {
    t.Fatal(err)
    }
    // Push only model-a
    result, err := PushCertainty(PushCertaintyConfig{
    Folder: tempDir,
    Filter: "model-a",
    Species: "Kiwi",
    Reviewer: "TestReviewer",
    })
    if err != nil {
    t.Fatal(err)
    }
    if result.SegmentsUpdated != 1 {
    t.Errorf("expected 1 segment updated, got %d", result.SegmentsUpdated)
    }
    // Verify only model-a label was promoted; model-b stays at 90
    df, err := utils.ParseDataFile(filePath)
    if err != nil {
    t.Fatal(err)
    }
    for _, label := range df.Segments[0].Labels {
    if label.Filter == "model-a" && label.Certainty != 100 {
    t.Errorf("model-a label should be 100, got %d", label.Certainty)
    }
    if label.Filter == "model-b" && label.Certainty != 90 {
    t.Errorf("model-b label should be unchanged at 90, got %d", label.Certainty)
    }
    }
    }
  • file addition: calls_push_certainty.go (----------)
    [0.67281]
    package calls
    import (
    "fmt"
    "skraak/utils"
    )
    // PushCertaintyConfig holds the configuration for push-certainty
    type PushCertaintyConfig struct {
    Folder string
    File string
    Filter string
    Species string
    CallType string
    Night bool
    Day bool
    Lat float64
    Lng float64
    Timezone string
    Reviewer string
    }
    // PushCertaintyResult holds the result of push-certainty
    type PushCertaintyResult struct {
    SegmentsUpdated int `json:"segments_updated"`
    FilesUpdated int `json:"files_updated"`
    TimeFilteredCount int `json:"time_filtered_count"`
    }
    // PushCertainty promotes all certainty=90 segments matching the filter scope to certainty=100.
    // Uses identical filtering logic to LoadDataFiles so the scope matches calls classify exactly.
    func PushCertainty(config PushCertaintyConfig) (*PushCertaintyResult, error) {
    state, err := LoadDataFiles(ClassifyConfig{
    Folder: config.Folder,
    File: config.File,
    Filter: config.Filter,
    Species: config.Species,
    CallType: config.CallType,
    Certainty: 90,
    Sample: -1,
    Night: config.Night,
    Day: config.Day,
    Lat: config.Lat,
    Lng: config.Lng,
    Timezone: config.Timezone,
    })
    if err != nil {
    return nil, err
    }
    var segsUpdated, filesUpdated int
    for i, df := range state.DataFiles {
    changed := false
    for _, seg := range state.FilteredSegs()[i] {
    for _, label := range seg.Labels {
    if labelMatchesPush(label, config.Filter, config.Species, config.CallType) {
    label.Certainty = 100
    changed = true
    segsUpdated++
    }
    }
    }
    if changed {
    df.Meta.Reviewer = config.Reviewer
    if err := df.Write(df.FilePath); err != nil {
    return nil, fmt.Errorf("write %s: %w", df.FilePath, err)
    }
    filesUpdated++
    }
    }
    return &PushCertaintyResult{
    SegmentsUpdated: segsUpdated,
    FilesUpdated: filesUpdated,
    TimeFilteredCount: state.TimeFilteredCount,
    }, nil
    }
    // labelMatchesPush returns true if the label matches the push scope and has certainty=90.
    // Certainty is already guaranteed by LoadDataFiles, but we re-check to target only the
    // specific label that matched (a segment may carry labels from multiple filters).
    func labelMatchesPush(label *utils.Label, filter, species, callType string) bool {
    if filter != "" && label.Filter != filter {
    return false
    }
    if species != "" && label.Species != species {
    return false
    }
    if callType != "" && label.CallType != callType {
    return false
    }
    return label.Certainty == 90
    }
  • file addition: calls_propagate_test.go (----------)
    [0.67281]
    package calls
    import (
    "path/filepath"
    "testing"
    "skraak/utils"
    )
    // helpers
    func seg(start, end float64, labels ...*utils.Label) *utils.Segment {
    return &utils.Segment{
    StartTime: start,
    EndTime: end,
    FreqLow: 100,
    FreqHigh: 8000,
    Labels: labels,
    }
    }
    func lbl(filter, species, calltype string, certainty int) *utils.Label {
    return &utils.Label{
    Filter: filter,
    Species: species,
    CallType: calltype,
    Certainty: certainty,
    }
    }
    func writeFile(t *testing.T, segs ...*utils.Segment) string {
    t.Helper()
    dir := t.TempDir()
    path := filepath.Join(dir, "test.data")
    df := &utils.DataFile{
    Meta: &utils.DataMeta{Operator: "ML", Reviewer: "David", Duration: 3600},
    Segments: segs,
    }
    if err := df.Write(path); err != nil {
    t.Fatalf("write fixture: %v", err)
    }
    return path
    }
    func readFile(t *testing.T, path string) *utils.DataFile {
    t.Helper()
    df, err := utils.ParseDataFile(path)
    if err != nil {
    t.Fatalf("parse %s: %v", path, err)
    }
    return df
    }
    // findLabel returns the label with matching filter and time on the parsed file, or nil.
    func findLabel(df *utils.DataFile, filter string, start, end float64) *utils.Label {
    for _, s := range df.Segments {
    if s.StartTime != start || s.EndTime != end {
    continue
    }
    for _, l := range s.Labels {
    if l.Filter == filter {
    return l
    }
    }
    }
    return nil
    }
    const (
    fFrom = "opensoundscape-kiwi-1.2"
    fTo = "opensoundscape-kiwi-1.5"
    )
    func TestPropagate_HappyPathSingle(t *testing.T) {
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v (%s)", err, out.Error)
    }
    if out.Propagated != 1 || out.TargetsExamined != 1 || out.SkippedConflict != 0 || out.SkippedNoOverlap != 0 {
    t.Fatalf("counts wrong: %+v", out)
    }
    df := readFile(t, path)
    target := findLabel(df, fTo, 100, 125)
    if target == nil {
    t.Fatal("target label missing")
    }
    if target.Species != "Kiwi" || target.CallType != "Male" || target.Certainty != 90 {
    t.Errorf("target not updated correctly: species=%q calltype=%q cert=%d", target.Species, target.CallType, target.Certainty)
    }
    if df.Meta.Reviewer != "Skraak" {
    t.Errorf("reviewer = %q, want Skraak", df.Meta.Reviewer)
    }
    }
    func TestPropagate_NoOverlap(t *testing.T) {
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(500, 525, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.Propagated != 0 || out.TargetsExamined != 1 || out.SkippedNoOverlap != 1 {
    t.Fatalf("counts wrong: %+v", out)
    }
    df := readFile(t, path)
    target := findLabel(df, fTo, 500, 525)
    if target.Certainty != 70 {
    t.Errorf("target should not be modified, cert=%d", target.Certainty)
    }
    if df.Meta.Reviewer != "David" {
    t.Errorf("reviewer should stay David (no write), got %q", df.Meta.Reviewer)
    }
    }
    func TestPropagate_SourceWrongSpecies_Ignored(t *testing.T) {
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Weka", "", 100)),
    seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.Propagated != 0 || out.SkippedNoOverlap != 1 {
    t.Fatalf("counts wrong: %+v", out)
    }
    }
    func TestPropagate_SourceWrongCertainty_Ignored(t *testing.T) {
    // cert=70 and cert=0 source labels must NOT count as sources.
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 70)),
    seg(200, 225, lbl(fFrom, "Don't Know", "", 0)),
    seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
    seg(200, 225, lbl(fTo, "Kiwi", "Male", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.Propagated != 0 || out.SkippedNoOverlap != 2 {
    t.Fatalf("counts wrong: %+v", out)
    }
    }
    func TestPropagate_SourceWrongFilter_Ignored(t *testing.T) {
    path := writeFile(t,
    seg(100, 125, lbl("some-other-filter", "Kiwi", "Male", 100)),
    seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if !out.FiltersMissing || out.Propagated != 0 || out.TargetsExamined != 0 {
    t.Fatalf("expected FiltersMissing=true with zero counts, got: %+v", out)
    }
    }
    func TestPropagate_TargetCert100_NotTouched(t *testing.T) {
    // Target with cert=100 is human-verified — must NOT be overwritten.
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(100, 125, lbl(fTo, "Kiwi", "Male", 100)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.TargetsExamined != 0 || out.Propagated != 0 {
    t.Fatalf("cert=100 target must not be examined: %+v", out)
    }
    df := readFile(t, path)
    if df.Meta.Reviewer != "David" {
    t.Errorf("reviewer should stay David (no write), got %q", df.Meta.Reviewer)
    }
    }
    func TestPropagate_TargetCert90_NotTouched(t *testing.T) {
    // Target with cert=90 (already propagated earlier) must NOT be re-propagated.
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(100, 125, lbl(fTo, "Kiwi", "Female", 90)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.TargetsExamined != 0 || out.Propagated != 0 {
    t.Fatalf("cert=90 target must not be examined: %+v", out)
    }
    df := readFile(t, path)
    target := findLabel(df, fTo, 100, 125)
    if target.Certainty != 90 || target.CallType != "Female" {
    t.Errorf("cert=90 target was modified: %+v", target)
    }
    }
    func TestPropagate_TargetCert0_Propagated(t *testing.T) {
    // Target at cert=0 ("Don't Know" / "Noise") SHOULD be propagated when an
    // overlapping cert=100 source exists — rescues labels from the noise bucket
    // so they surface for review even if occasionally wrong.
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(100, 125, lbl(fTo, "Don't Know", "", 0)),
    seg(200, 225, lbl(fFrom, "Kiwi", "Female", 100)),
    seg(200, 225, lbl(fTo, "Noise", "", 0)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.TargetsExamined != 2 || out.Propagated != 2 {
    t.Fatalf("cert=0 targets must be propagated: %+v", out)
    }
    df := readFile(t, path)
    for _, c := range []struct {
    start, end float64
    calltype string
    }{{100, 125, "Male"}, {200, 225, "Female"}} {
    l := findLabel(df, fTo, c.start, c.end)
    if l == nil || l.Species != "Kiwi" || l.CallType != c.calltype || l.Certainty != 90 {
    t.Errorf("at %v-%v got %+v, want Kiwi+%s cert=90", c.start, c.end, l, c.calltype)
    }
    }
    }
    func TestPropagate_MultipleSourcesAgree(t *testing.T) {
    // Two overlapping sources with same calltype → propagate.
    path := writeFile(t,
    seg(100, 110, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(105, 120, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.Propagated != 1 || out.SkippedConflict != 0 {
    t.Fatalf("counts wrong: %+v", out)
    }
    df := readFile(t, path)
    target := findLabel(df, fTo, 100, 125)
    if target.CallType != "Male" {
    t.Errorf("calltype should be Male, got %q", target.CallType)
    }
    }
    func TestPropagate_MultipleSourcesConflict(t *testing.T) {
    // Two overlapping sources with different calltypes → conflict, skip, report.
    path := writeFile(t,
    seg(100, 110, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(115, 120, lbl(fFrom, "Kiwi", "Female", 100)),
    seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.Propagated != 0 || out.SkippedConflict != 1 {
    t.Fatalf("expected 1 conflict skip: %+v", out)
    }
    if len(out.Conflicts) != 1 {
    t.Fatalf("expected 1 conflict report, got %d", len(out.Conflicts))
    }
    if out.Conflicts[0].TargetStart != 100 || out.Conflicts[0].TargetEnd != 125 {
    t.Errorf("conflict target wrong: %+v", out.Conflicts[0])
    }
    if len(out.Conflicts[0].SourceChoices) != 2 {
    t.Errorf("expected 2 source choices, got %d", len(out.Conflicts[0].SourceChoices))
    }
    // Target must NOT be modified.
    df := readFile(t, path)
    target := findLabel(df, fTo, 100, 125)
    if target.CallType != "Duet" || target.Certainty != 70 {
    t.Errorf("conflicted target was modified: %+v", target)
    }
    if df.Meta.Reviewer != "David" {
    t.Errorf("reviewer should stay David (no write), got %q", df.Meta.Reviewer)
    }
    }
    func TestPropagate_EmptyCallTypePropagates(t *testing.T) {
    // Source with empty calltype → target gets empty calltype.
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Kiwi", "", 100)),
    seg(100, 125, lbl(fTo, "Kiwi", "Male", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.Propagated != 1 {
    t.Fatalf("expected propagated=1: %+v", out)
    }
    df := readFile(t, path)
    target := findLabel(df, fTo, 100, 125)
    if target.CallType != "" {
    t.Errorf("calltype should be cleared, got %q", target.CallType)
    }
    if target.Species != "Kiwi" || target.Certainty != 90 {
    t.Errorf("target fields wrong: %+v", target)
    }
    }
    func TestPropagate_SpeciesOverride(t *testing.T) {
    // Target species was different from --species; must be overwritten.
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(100, 125, lbl(fTo, "Don't Know", "", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.Propagated != 1 {
    t.Fatalf("expected propagated=1: %+v", out)
    }
    df := readFile(t, path)
    target := findLabel(df, fTo, 100, 125)
    if target.Species != "Kiwi" || target.CallType != "Male" || target.Certainty != 90 {
    t.Errorf("target not overwritten correctly: %+v", target)
    }
    }
    func TestPropagate_OverlapBoundaryExclusive(t *testing.T) {
    // Segments touching at a point (src ends exactly where tgt starts) do NOT overlap.
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(125, 150, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.Propagated != 0 || out.SkippedNoOverlap != 1 {
    t.Fatalf("touching boundary must not count as overlap: %+v", out)
    }
    }
    func TestPropagate_OverlapPartial(t *testing.T) {
    // 1-second overlap is enough.
    path := writeFile(t,
    seg(100, 126, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(125, 150, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.Propagated != 1 {
    t.Fatalf("expected propagated=1: %+v", out)
    }
    }
    func TestPropagate_SupersetEitherDirection(t *testing.T) {
    // Source engulfs target.
    path1 := writeFile(t,
    seg(100, 200, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(110, 150, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    if out, _ := CallsPropagate(CallsPropagateInput{File: path1, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"}); out.Propagated != 1 {
    t.Errorf("source-engulfs-target: %+v", out)
    }
    // Target engulfs source.
    path2 := writeFile(t,
    seg(110, 150, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(100, 200, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    if out, _ := CallsPropagate(CallsPropagateInput{File: path2, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"}); out.Propagated != 1 {
    t.Errorf("target-engulfs-source: %+v", out)
    }
    }
    func TestPropagate_MissingFlags(t *testing.T) {
    cases := []struct {
    name string
    in CallsPropagateInput
    }{
    {"no file", CallsPropagateInput{FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"}},
    {"no from", CallsPropagateInput{File: "x", ToFilter: fTo, Species: "Kiwi"}},
    {"no to", CallsPropagateInput{File: "x", FromFilter: fFrom, Species: "Kiwi"}},
    {"no species", CallsPropagateInput{File: "x", FromFilter: fFrom, ToFilter: fTo}},
    }
    for _, c := range cases {
    t.Run(c.name, func(t *testing.T) {
    _, err := CallsPropagate(c.in)
    if err == nil {
    t.Errorf("expected error")
    }
    })
    }
    }
    func TestPropagate_SameFromAndTo(t *testing.T) {
    _, err := CallsPropagate(CallsPropagateInput{
    File: "x", FromFilter: fFrom, ToFilter: fFrom, Species: "Kiwi",
    })
    if err == nil {
    t.Error("expected error when --from == --to")
    }
    }
    func TestPropagate_NonexistentFile(t *testing.T) {
    _, err := CallsPropagate(CallsPropagateInput{
    File: "/nonexistent/path.data", FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err == nil {
    t.Error("expected error for nonexistent file")
    }
    }
    func TestPropagate_RealisticMixed(t *testing.T) {
    // Mimics the 20260228_211500.WAV.data case: cert=0 "Don't Know" and cert=100 Kiwi sources
    // coexist; only cert=100 Kiwi gets propagated.
    path := writeFile(t,
    // Sources (kiwi-1.2)
    seg(45, 52.5, lbl(fFrom, "Don't Know", "", 0)),
    seg(142.5, 177.5, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(195, 217.5, lbl(fFrom, "Don't Know", "", 0)),
    seg(647.5, 682.5, lbl(fFrom, "Kiwi", "Female", 100)),
    seg(815, 855, lbl(fFrom, "Kiwi", "Duet", 100)),
    // Targets (kiwi-1.5)
    seg(147.5, 167.5, lbl(fTo, "Kiwi", "Male", 70)),
    seg(647.5, 672.5, lbl(fTo, "Kiwi", "Female", 70)),
    seg(815, 852.5, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.TargetsExamined != 3 || out.Propagated != 3 || out.SkippedConflict != 0 {
    t.Fatalf("counts wrong: %+v", out)
    }
    df := readFile(t, path)
    expect := []struct {
    start, end float64
    calltype string
    }{
    {147.5, 167.5, "Male"},
    {647.5, 672.5, "Female"},
    {815, 852.5, "Duet"},
    }
    for _, e := range expect {
    l := findLabel(df, fTo, e.start, e.end)
    if l == nil || l.Certainty != 90 || l.CallType != e.calltype || l.Species != "Kiwi" {
    t.Errorf("at %v-%v got %+v, want Kiwi+%s cert=90", e.start, e.end, l, e.calltype)
    }
    }
    }
    func TestPropagate_NoWriteIfNothingChanged(t *testing.T) {
    // File with only non-target segments should not be rewritten (reviewer unchanged).
    path := writeFile(t,
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
    )
    out, err := CallsPropagate(CallsPropagateInput{
    File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.Propagated != 0 || out.TargetsExamined != 0 {
    t.Fatalf("expected no activity: %+v", out)
    }
    df := readFile(t, path)
    if df.Meta.Reviewer != "David" {
    t.Errorf("reviewer should not be touched, got %q", df.Meta.Reviewer)
    }
    }
    // writeFileAt is like writeFile but puts the file inside an existing dir
    // with a caller-provided basename (must end in .data).
    func writeFileAt(t *testing.T, dir, base string, segs ...*utils.Segment) string {
    t.Helper()
    path := filepath.Join(dir, base)
    df := &utils.DataFile{
    Meta: &utils.DataMeta{Operator: "ML", Reviewer: "David", Duration: 3600},
    Segments: segs,
    }
    if err := df.Write(path); err != nil {
    t.Fatalf("write fixture: %v", err)
    }
    return path
    }
    // assertPropagateStats checks output stats against expected values.
    func assertPropagateStats(t *testing.T, got, want CallsPropagateFolderOutput) {
    t.Helper()
    checks := []struct {
    name string
    got int
    want int
    }{
    {"FilesTotal", got.FilesTotal, want.FilesTotal},
    {"FilesWithBothFilters", got.FilesWithBothFilters, want.FilesWithBothFilters},
    {"FilesSkippedNoFilter", got.FilesSkippedNoFilter, want.FilesSkippedNoFilter},
    {"FilesChanged", got.FilesChanged, want.FilesChanged},
    {"FilesErrored", got.FilesErrored, want.FilesErrored},
    {"TargetsExamined", got.TargetsExamined, want.TargetsExamined},
    {"Propagated", got.Propagated, want.Propagated},
    {"SkippedNoOverlap", got.SkippedNoOverlap, want.SkippedNoOverlap},
    }
    for _, c := range checks {
    if c.got != c.want {
    t.Errorf("%s: got %d, want %d", c.name, c.got, c.want)
    }
    }
    }
    func TestPropagateFolder_AggregatesAndSkipsMissing(t *testing.T) {
    dir := t.TempDir()
    // File A: both filters present, one clean propagation.
    aPath := writeFileAt(t, dir, "a.wav.data",
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    // File B: only target filter — missing source, must be skipped silently.
    bPath := writeFileAt(t, dir, "b.wav.data",
    seg(200, 225, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    // File C: only source filter — missing target, must be skipped silently.
    writeFileAt(t, dir, "c.wav.data",
    seg(300, 325, lbl(fFrom, "Kiwi", "Male", 100)),
    )
    // File D: both filters, but no overlap → targets examined, none propagated.
    dPath := writeFileAt(t, dir, "d.wav.data",
    seg(400, 425, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(500, 525, lbl(fTo, "Kiwi", "Duet", 70)),
    )
    out, err := CallsPropagateFolder(CallsPropagateFolderInput{
    Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    assertPropagateStats(t, out, CallsPropagateFolderOutput{
    FilesTotal: 4,
    FilesWithBothFilters: 2,
    FilesSkippedNoFilter: 2,
    FilesChanged: 1,
    FilesErrored: 0,
    TargetsExamined: 2,
    Propagated: 1,
    SkippedNoOverlap: 1,
    })
    t.Run("file_a_propagated", func(t *testing.T) {
    aDf := readFile(t, aPath)
    if aDf.Meta.Reviewer != "Skraak" {
    t.Errorf("reviewer: got %q, want Skraak", aDf.Meta.Reviewer)
    }
    if l := findLabel(aDf, fTo, 100, 125); l == nil || l.Certainty != 90 || l.CallType != "Male" {
    t.Errorf("target label: got %+v, want cert=90 calltype=Male", l)
    }
    })
    t.Run("file_b_skipped", func(t *testing.T) {
    bDf := readFile(t, bPath)
    if bDf.Meta.Reviewer != "David" {
    t.Errorf("reviewer should not be touched, got %q", bDf.Meta.Reviewer)
    }
    })
    t.Run("file_d_no_overlap", func(t *testing.T) {
    dDf := readFile(t, dPath)
    if dDf.Meta.Reviewer != "David" {
    t.Errorf("reviewer should not be touched, got %q", dDf.Meta.Reviewer)
    }
    if l := findLabel(dDf, fTo, 500, 525); l == nil || l.Certainty != 70 {
    t.Errorf("target label should be unchanged cert=70, got %+v", l)
    }
    })
    }
    func TestPropagateFolder_EmptyFolder(t *testing.T) {
    dir := t.TempDir()
    out, err := CallsPropagateFolder(CallsPropagateFolderInput{
    Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.FilesTotal != 0 || out.Propagated != 0 {
    t.Errorf("expected empty result, got %+v", out)
    }
    }
    func TestPropagateFolder_MissingRequiredFlags(t *testing.T) {
    dir := t.TempDir()
    cases := []CallsPropagateFolderInput{
    {Folder: "", FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"},
    {Folder: dir, FromFilter: "", ToFilter: fTo, Species: "Kiwi"},
    {Folder: dir, FromFilter: fFrom, ToFilter: "", Species: "Kiwi"},
    {Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: ""},
    {Folder: dir, FromFilter: fFrom, ToFilter: fFrom, Species: "Kiwi"},
    }
    for i, in := range cases {
    if _, err := CallsPropagateFolder(in); err == nil {
    t.Errorf("case %d: expected error for input %+v", i, in)
    }
    }
    }
    func TestPropagateFolder_NonexistentFolder(t *testing.T) {
    _, err := CallsPropagateFolder(CallsPropagateFolderInput{
    Folder: "/nonexistent/path/xyz", FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err == nil {
    t.Fatal("expected error for nonexistent folder")
    }
    }
    func TestPropagateFolder_ConflictsTaggedWithFile(t *testing.T) {
    dir := t.TempDir()
    // Two sources with different calltypes both overlapping one target.
    writeFileAt(t, dir, "conflict.wav.data",
    seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
    seg(110, 130, lbl(fFrom, "Kiwi", "Female", 100)),
    seg(100, 130, lbl(fTo, "Kiwi", "", 70)),
    )
    out, err := CallsPropagateFolder(CallsPropagateFolderInput{
    Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
    })
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if out.SkippedConflict != 1 || len(out.Conflicts) != 1 {
    t.Fatalf("expected one conflict, got %+v", out)
    }
    if out.Conflicts[0].File == "" {
    t.Errorf("conflict should be tagged with file path, got %+v", out.Conflicts[0])
    }
    }
  • file addition: calls_propagate.go (----------)
    [0.67281]
    package calls
    import (
    "fmt"
    "os"
    "skraak/utils"
    )
    type CallsPropagateInput struct {
    File string `json:"file"`
    FromFilter string `json:"from_filter"`
    ToFilter string `json:"to_filter"`
    Species string `json:"species"`
    }
    type CallsPropagateOutput struct {
    File string `json:"file"`
    FromFilter string `json:"from_filter"`
    ToFilter string `json:"to_filter"`
    Species string `json:"species"`
    FiltersMissing bool `json:"filters_missing,omitempty"`
    TargetsExamined int `json:"targets_examined"`
    Propagated int `json:"propagated"`
    SkippedNoOverlap int `json:"skipped_no_overlap"`
    SkippedConflict int `json:"skipped_conflict"`
    Conflicts []PropagateConflict `json:"conflicts,omitempty"`
    Changes []PropagateChange `json:"changes,omitempty"`
    Error string `json:"error,omitempty"`
    }
    type CallsPropagateFolderInput struct {
    Folder string `json:"folder"`
    FromFilter string `json:"from_filter"`
    ToFilter string `json:"to_filter"`
    Species string `json:"species"`
    }
    type CallsPropagateFolderOutput struct {
    Folder string `json:"folder"`
    FromFilter string `json:"from_filter"`
    ToFilter string `json:"to_filter"`
    Species string `json:"species"`
    FilesTotal int `json:"files_total"`
    FilesWithBothFilters int `json:"files_with_both_filters"`
    FilesSkippedNoFilter int `json:"files_skipped_no_filter"`
    FilesChanged int `json:"files_changed"`
    FilesErrored int `json:"files_errored"`
    TargetsExamined int `json:"targets_examined"`
    Propagated int `json:"propagated"`
    SkippedNoOverlap int `json:"skipped_no_overlap"`
    SkippedConflict int `json:"skipped_conflict"`
    Conflicts []PropagateConflict `json:"conflicts,omitempty"`
    Errors []CallsPropagateOutput `json:"errors,omitempty"`
    Error string `json:"error,omitempty"`
    }
    type PropagateConflict struct {
    File string `json:"file,omitempty"`
    TargetStart float64 `json:"target_start"`
    TargetEnd float64 `json:"target_end"`
    TargetCallType string `json:"target_calltype,omitempty"`
    SourceChoices []PropagateSourceChoice `json:"source_choices"`
    }
    type PropagateSourceChoice struct {
    Start float64 `json:"start"`
    End float64 `json:"end"`
    Species string `json:"species"`
    CallType string `json:"calltype,omitempty"`
    }
    type PropagateChange struct {
    TargetStart float64 `json:"target_start"`
    TargetEnd float64 `json:"target_end"`
    PrevSpecies string `json:"prev_species"`
    PrevCallType string `json:"prev_calltype,omitempty"`
    PrevCertainty int `json:"prev_certainty"`
    NewSpecies string `json:"new_species"`
    NewCallType string `json:"new_calltype,omitempty"`
    NewCertainty int `json:"new_certainty"`
    }
    // CallsPropagate copies verified classifications (certainty==100) from one filter's
    // segments to overlapping target segments of another filter, within a single .data file.
    // Target labels with certainty==70 (ML-unverified) or certainty==0 (Don't Know / Noise)
    // are updated — targets at certainty==100 (human-verified) and certainty==90 (already
    // propagated) are left alone. Only source labels matching --species are considered.
    // Propagated target labels are set to certainty=90 and file reviewer is set to "Skraak".
    func CallsPropagate(input CallsPropagateInput) (CallsPropagateOutput, error) {
    output := CallsPropagateOutput{
    File: input.File,
    FromFilter: input.FromFilter,
    ToFilter: input.ToFilter,
    Species: input.Species,
    }
    if err := validatePropagateInput(&output, input); err != nil {
    return output, err
    }
    df, err := utils.ParseDataFile(input.File)
    if err != nil {
    output.Error = fmt.Sprintf("parse %s: %v", input.File, err)
    return output, fmt.Errorf("%s", output.Error)
    }
    // Fast path: skip files that don't contain both filters at all.
    if !hasBothFilters(df, input.FromFilter, input.ToFilter) {
    output.FiltersMissing = true
    return output, nil
    }
    sources := collectPropagateSources(df, input.FromFilter, input.Species)
    propagateTargets(df, sources, input, &output)
    if output.Propagated > 0 {
    df.Meta.Reviewer = "Skraak"
    if err := df.Write(input.File); err != nil {
    output.Error = fmt.Sprintf("write %s: %v", input.File, err)
    return output, fmt.Errorf("%s", output.Error)
    }
    }
    return output, nil
    }
    // validatePropagateInput checks required fields and file existence
    func validatePropagateInput(output *CallsPropagateOutput, input CallsPropagateInput) error {
    checks := []struct {
    val string
    msg string
    }{
    {input.File, "--file is required"},
    {input.FromFilter, "--from is required"},
    {input.ToFilter, "--to is required"},
    {input.Species, "--species is required"},
    }
    for _, c := range checks {
    if c.val == "" {
    output.Error = c.msg
    return fmt.Errorf("%s", c.msg)
    }
    }
    if input.FromFilter == input.ToFilter {
    output.Error = "--from and --to must differ"
    return fmt.Errorf("%s", output.Error)
    }
    if _, err := os.Stat(input.File); os.IsNotExist(err) {
    output.Error = fmt.Sprintf("file not found: %s", input.File)
    return fmt.Errorf("%s", output.Error)
    }
    return nil
    }
    // hasBothFilters checks whether the data file contains both from and to filters
    func hasBothFilters(df *utils.DataFile, fromFilter, toFilter string) bool {
    hasFrom, hasTo := false, false
    for _, seg := range df.Segments {
    for _, lbl := range seg.Labels {
    if lbl.Filter == fromFilter {
    hasFrom = true
    }
    if lbl.Filter == toFilter {
    hasTo = true
    }
    if hasFrom && hasTo {
    return true
    }
    }
    }
    return false
    }
    // sourceRef pairs a segment with its matching source label
    type sourceRef struct {
    seg *utils.Segment
    label *utils.Label
    }
    // collectPropagateSources gathers verified source labels (certainty==100) for the given filter/species
    func collectPropagateSources(df *utils.DataFile, fromFilter, species string) []sourceRef {
    var sources []sourceRef
    for _, seg := range df.Segments {
    for _, lbl := range seg.Labels {
    if lbl.Filter == fromFilter && lbl.Species == species && lbl.Certainty == 100 {
    sources = append(sources, sourceRef{seg: seg, label: lbl})
    break
    }
    }
    }
    return sources
    }
    // propagateTargets iterates target segments, finds overlapping sources, and applies agreed classifications
    func propagateTargets(df *utils.DataFile, sources []sourceRef, input CallsPropagateInput, output *CallsPropagateOutput) {
    for _, tSeg := range df.Segments {
    toLabel := findUpdatableTargetLabel(tSeg.Labels, input.ToFilter)
    if toLabel == nil {
    continue
    }
    output.TargetsExamined++
    overlaps := findOverlappingSources(sources, tSeg)
    if len(overlaps) == 0 {
    output.SkippedNoOverlap++
    continue
    }
    agreedCallType, conflict := resolveCallType(overlaps)
    if conflict {
    output.SkippedConflict++
    output.Conflicts = append(output.Conflicts, buildConflictRecord(tSeg, toLabel, overlaps))
    continue
    }
    applyPropagation(toLabel, input.Species, agreedCallType, tSeg, output)
    }
    }
    // findUpdatableTargetLabel finds a target label with certainty 70 or 0 for the given filter
    func findUpdatableTargetLabel(labels []*utils.Label, toFilter string) *utils.Label {
    for _, lbl := range labels {
    if lbl.Filter == toFilter && (lbl.Certainty == 70 || lbl.Certainty == 0) {
    return lbl
    }
    }
    return nil
    }
    // findOverlappingSources returns sources whose segments overlap with the target segment
    func findOverlappingSources(sources []sourceRef, tSeg *utils.Segment) []sourceRef {
    var overlaps []sourceRef
    for _, s := range sources {
    if s.seg.StartTime < tSeg.EndTime && tSeg.StartTime < s.seg.EndTime {
    overlaps = append(overlaps, s)
    }
    }
    return overlaps
    }
    // resolveCallType checks if all overlapping sources agree on a call type.
    // Returns the agreed call type and whether there is a conflict.
    func resolveCallType(overlaps []sourceRef) (string, bool) {
    agreedCallType := overlaps[0].label.CallType
    for _, s := range overlaps[1:] {
    if s.label.CallType != agreedCallType {
    return "", true
    }
    }
    return agreedCallType, false
    }
    // buildConflictRecord creates a PropagateConflict from overlapping disagreeing sources
    func buildConflictRecord(tSeg *utils.Segment, toLabel *utils.Label, overlaps []sourceRef) PropagateConflict {
    choices := make([]PropagateSourceChoice, 0, len(overlaps))
    for _, s := range overlaps {
    choices = append(choices, PropagateSourceChoice{
    Start: s.seg.StartTime,
    End: s.seg.EndTime,
    Species: s.label.Species,
    CallType: s.label.CallType,
    })
    }
    return PropagateConflict{
    TargetStart: tSeg.StartTime,
    TargetEnd: tSeg.EndTime,
    TargetCallType: toLabel.CallType,
    SourceChoices: choices,
    }
    }
    // applyPropagation updates the target label and records the change
    func applyPropagation(toLabel *utils.Label, species, callType string, tSeg *utils.Segment, output *CallsPropagateOutput) {
    change := PropagateChange{
    TargetStart: tSeg.StartTime,
    TargetEnd: tSeg.EndTime,
    PrevSpecies: toLabel.Species,
    PrevCallType: toLabel.CallType,
    PrevCertainty: toLabel.Certainty,
    NewSpecies: species,
    NewCallType: callType,
    NewCertainty: 90,
    }
    toLabel.Species = species
    toLabel.CallType = callType
    toLabel.Certainty = 90
    output.Propagated++
    output.Changes = append(output.Changes, change)
    }
    // CallsPropagateFolder runs CallsPropagate against every .data file in a folder,
    // aggregating counts. Files that do not contain both --from and --to filters are
    // skipped silently (counted as files_skipped_no_filter). Parse/write errors on
    // individual files are collected in Errors; they don't abort the run.
    func CallsPropagateFolder(input CallsPropagateFolderInput) (CallsPropagateFolderOutput, error) {
    output := CallsPropagateFolderOutput{
    Folder: input.Folder,
    FromFilter: input.FromFilter,
    ToFilter: input.ToFilter,
    Species: input.Species,
    }
    if input.Folder == "" {
    output.Error = "--folder is required"
    return output, fmt.Errorf("%s", output.Error)
    }
    if input.FromFilter == "" {
    output.Error = "--from is required"
    return output, fmt.Errorf("%s", output.Error)
    }
    if input.ToFilter == "" {
    output.Error = "--to is required"
    return output, fmt.Errorf("%s", output.Error)
    }
    if input.Species == "" {
    output.Error = "--species is required"
    return output, fmt.Errorf("%s", output.Error)
    }
    if input.FromFilter == input.ToFilter {
    output.Error = "--from and --to must differ"
    return output, fmt.Errorf("%s", output.Error)
    }
    info, err := os.Stat(input.Folder)
    if err != nil {
    output.Error = fmt.Sprintf("folder not found: %s", input.Folder)
    return output, fmt.Errorf("%s", output.Error)
    }
    if !info.IsDir() {
    output.Error = fmt.Sprintf("not a directory: %s", input.Folder)
    return output, fmt.Errorf("%s", output.Error)
    }
    files, err := utils.FindDataFiles(input.Folder)
    if err != nil {
    output.Error = fmt.Sprintf("list .data files: %v", err)
    return output, fmt.Errorf("%s", output.Error)
    }
    output.FilesTotal = len(files)
    for _, f := range files {
    fileOut, err := CallsPropagate(CallsPropagateInput{
    File: f,
    FromFilter: input.FromFilter,
    ToFilter: input.ToFilter,
    Species: input.Species,
    })
    if err != nil {
    output.FilesErrored++
    output.Errors = append(output.Errors, fileOut)
    continue
    }
    if fileOut.FiltersMissing {
    output.FilesSkippedNoFilter++
    continue
    }
    output.FilesWithBothFilters++
    output.TargetsExamined += fileOut.TargetsExamined
    output.Propagated += fileOut.Propagated
    output.SkippedNoOverlap += fileOut.SkippedNoOverlap
    output.SkippedConflict += fileOut.SkippedConflict
    if fileOut.Propagated > 0 {
    output.FilesChanged++
    }
    for _, c := range fileOut.Conflicts {
    c.File = f
    output.Conflicts = append(output.Conflicts, c)
    }
    }
    return output, nil
    }
  • file addition: calls_modify_test.go (----------)
    [0.67281]
    package calls
    import (
    "path/filepath"
    "testing"
    "skraak/utils"
    )
    func TestCallsModifyBookmark(t *testing.T) {
    // Create a temp .data file with a bookmarked segment
    tmpDir := t.TempDir()
    tmpFile := filepath.Join(tmpDir, "test.data")
    df := &utils.DataFile{
    Meta: &utils.DataMeta{Operator: "test", Duration: 60},
    Segments: []*utils.Segment{
    {
    StartTime: 10.0,
    EndTime: 15.0,
    FreqLow: 100,
    FreqHigh: 5000,
    Labels: []*utils.Label{
    {Species: "Kiwi", Certainty: 80, Filter: "myfilter", CallType: "Duet", Bookmark: true},
    },
    },
    },
    }
    if err := df.Write(tmpFile); err != nil {
    t.Fatalf("failed to write test file: %v", err)
    }
    // Test 1: Adding bookmark when already true should do nothing
    bookmark := true
    result, err := CallsModify(CallsModifyInput{
    File: tmpFile,
    Reviewer: "tester",
    Filter: "myfilter",
    Segment: "10-15",
    Certainty: 80,
    Bookmark: &bookmark,
    })
    // Should return error "no changes needed"
    if err == nil {
    t.Errorf("expected error 'no changes needed' when bookmark already true, got nil")
    }
    if result.Error != "No changes needed: all values already match" {
    t.Errorf("expected 'no changes needed' error, got: %s", result.Error)
    }
    // Verify bookmark is still true in the file
    df2, err := utils.ParseDataFile(tmpFile)
    if err != nil {
    t.Fatalf("failed to parse file: %v", err)
    }
    if !df2.Segments[0].Labels[0].Bookmark {
    t.Errorf("bookmark should still be true, got false")
    }
    }
    func TestCallsModifyBookmarkFalse(t *testing.T) {
    // Create a temp .data file WITHOUT a bookmark
    tmpDir := t.TempDir()
    tmpFile := filepath.Join(tmpDir, "test.data")
    df := &utils.DataFile{
    Meta: &utils.DataMeta{Operator: "test", Duration: 60},
    Segments: []*utils.Segment{
    {
    StartTime: 10.0,
    EndTime: 15.0,
    FreqLow: 100,
    FreqHigh: 5000,
    Labels: []*utils.Label{
    {Species: "Kiwi", Certainty: 80, Filter: "myfilter", CallType: "Duet", Bookmark: false},
    },
    },
    },
    }
    if err := df.Write(tmpFile); err != nil {
    t.Fatalf("failed to write test file: %v", err)
    }
    // Test: Adding bookmark when false should set it to true
    bookmark := true
    result, err := CallsModify(CallsModifyInput{
    File: tmpFile,
    Reviewer: "tester",
    Filter: "myfilter",
    Segment: "10-15",
    Certainty: 80,
    Bookmark: &bookmark,
    })
    if err != nil {
    t.Errorf("unexpected error: %v", err)
    }
    if result.Bookmark == nil || !*result.Bookmark {
    t.Errorf("expected bookmark=true in result, got %v", result.Bookmark)
    }
    // Verify bookmark is true in the file
    df2, err := utils.ParseDataFile(tmpFile)
    if err != nil {
    t.Fatalf("failed to parse file: %v", err)
    }
    if !df2.Segments[0].Labels[0].Bookmark {
    t.Errorf("bookmark should be true, got false")
    }
    }
    func TestCallsModifyCommentAdditive(t *testing.T) {
    // Create a temp .data file with an existing comment
    tmpDir := t.TempDir()
    tmpFile := filepath.Join(tmpDir, "test.data")
    df := &utils.DataFile{
    Meta: &utils.DataMeta{Operator: "test", Duration: 60},
    Segments: []*utils.Segment{
    {
    StartTime: 10.0,
    EndTime: 15.0,
    FreqLow: 100,
    FreqHigh: 5000,
    Labels: []*utils.Label{
    {Species: "Kiwi", Certainty: 80, Filter: "myfilter", Comment: "First observation"},
    },
    },
    },
    }
    if err := df.Write(tmpFile); err != nil {
    t.Fatalf("failed to write test file: %v", err)
    }
    // Test: Adding comment should be additive
    result, err := CallsModify(CallsModifyInput{
    File: tmpFile,
    Reviewer: "tester",
    Filter: "myfilter",
    Segment: "10-15",
    Certainty: 80,
    Comment: "Good example",
    })
    if err != nil {
    t.Errorf("unexpected error: %v", err)
    }
    expectedComment := "First observation | Good example"
    if result.Comment != expectedComment {
    t.Errorf("expected comment=%q, got %q", expectedComment, result.Comment)
    }
    // Verify comment in file
    df2, err := utils.ParseDataFile(tmpFile)
    if err != nil {
    t.Fatalf("failed to parse file: %v", err)
    }
    if df2.Segments[0].Labels[0].Comment != expectedComment {
    t.Errorf("expected comment in file=%q, got %q", expectedComment, df2.Segments[0].Labels[0].Comment)
    }
    }
    func TestCallsModifyCommentAdditiveMultiple(t *testing.T) {
    // Create a temp .data file and add multiple comments
    tmpDir := t.TempDir()
    tmpFile := filepath.Join(tmpDir, "test.data")
    df := &utils.DataFile{
    Meta: &utils.DataMeta{Operator: "test", Duration: 60},
    Segments: []*utils.Segment{
    {
    StartTime: 10.0,
    EndTime: 15.0,
    FreqLow: 100,
    FreqHigh: 5000,
    Labels: []*utils.Label{
    {Species: "Kiwi", Certainty: 80, Filter: "myfilter"},
    },
    },
    },
    }
    if err := df.Write(tmpFile); err != nil {
    t.Fatalf("failed to write test file: %v", err)
    }
    // Add first comment
    _, err := CallsModify(CallsModifyInput{
    File: tmpFile,
    Reviewer: "tester",
    Filter: "myfilter",
    Segment: "10-15",
    Certainty: 80,
    Comment: "First",
    })
    if err != nil {
    t.Fatalf("unexpected error on first comment: %v", err)
    }
    // Add second comment
    _, err = CallsModify(CallsModifyInput{
    File: tmpFile,
    Reviewer: "tester",
    Filter: "myfilter",
    Segment: "10-15",
    Certainty: 80,
    Comment: "Second",
    })
    if err != nil {
    t.Fatalf("unexpected error on second comment: %v", err)
    }
    // Add third comment
    result, err := CallsModify(CallsModifyInput{
    File: tmpFile,
    Reviewer: "tester",
    Filter: "myfilter",
    Segment: "10-15",
    Certainty: 80,
    Comment: "Third",
    })
    if err != nil {
    t.Fatalf("unexpected error on third comment: %v", err)
    }
    expectedComment := "First | Second | Third"
    if result.Comment != expectedComment {
    t.Errorf("expected comment=%q, got %q", expectedComment, result.Comment)
    }
    }
    func TestCallsModifyCommentTooLong(t *testing.T) {
    // Create a temp .data file with an existing long comment
    tmpDir := t.TempDir()
    tmpFile := filepath.Join(tmpDir, "test.data")
    existingComment := "This is a fairly long existing comment that takes up space"
    df := &utils.DataFile{
    Meta: &utils.DataMeta{Operator: "test", Duration: 60},
    Segments: []*utils.Segment{
    {
    StartTime: 10.0,
    EndTime: 15.0,
    FreqLow: 100,
    FreqHigh: 5000,
    Labels: []*utils.Label{
    {Species: "Kiwi", Certainty: 80, Filter: "myfilter", Comment: existingComment},
    },
    },
    },
    }
    if err := df.Write(tmpFile); err != nil {
    t.Fatalf("failed to write test file: %v", err)
    }
    // Test: Adding a long comment that would exceed 140 chars should fail
    longNewComment := "This is another very long comment that when combined with the existing one will exceed the limit"
    result, err := CallsModify(CallsModifyInput{
    File: tmpFile,
    Reviewer: "tester",
    Filter: "myfilter",
    Segment: "10-15",
    Certainty: 80,
    Comment: longNewComment,
    })
    if err == nil {
    t.Errorf("expected error for combined comment exceeding 140 chars, got nil")
    }
    if result.Error == "" {
    t.Errorf("expected error message, got empty")
    }
    // Verify original comment is preserved
    df2, err := utils.ParseDataFile(tmpFile)
    if err != nil {
    t.Fatalf("failed to parse file: %v", err)
    }
    if df2.Segments[0].Labels[0].Comment != existingComment {
    t.Errorf("original comment should be preserved, got %q", df2.Segments[0].Labels[0].Comment)
    }
    }
    func TestCallsModifyPreservesBookmarkOnOtherChange(t *testing.T) {
    // Create a temp .data file with a bookmark
    tmpDir := t.TempDir()
    tmpFile := filepath.Join(tmpDir, "test.data")
    df := &utils.DataFile{
    Meta: &utils.DataMeta{Operator: "test", Duration: 60},
    Segments: []*utils.Segment{
    {
    StartTime: 10.0,
    EndTime: 15.0,
    FreqLow: 100,
    FreqHigh: 5000,
    Labels: []*utils.Label{
    {Species: "Kiwi", Certainty: 80, Filter: "myfilter", Bookmark: true},
    },
    },
    },
    }
    if err := df.Write(tmpFile); err != nil {
    t.Fatalf("failed to write test file: %v", err)
    }
    // Change certainty (without passing --bookmark) - bookmark should be preserved
    result, err := CallsModify(CallsModifyInput{
    File: tmpFile,
    Reviewer: "tester",
    Filter: "myfilter",
    Segment: "10-15",
    Certainty: 100,
    // No Bookmark set
    })
    if err != nil {
    t.Errorf("unexpected error: %v", err)
    }
    if result.Bookmark != nil {
    t.Errorf("bookmark should not be in output when not changed, got %v", result.Bookmark)
    }
    // Verify bookmark is still true in the file
    df2, err := utils.ParseDataFile(tmpFile)
    if err != nil {
    t.Fatalf("failed to parse file: %v", err)
    }
    if !df2.Segments[0].Labels[0].Bookmark {
    t.Errorf("bookmark should still be true after changing certainty, got false")
    }
    }
    func TestCallsModifyInvalidSegment(t *testing.T) {
    tmpDir := t.TempDir()
    tmpFile := filepath.Join(tmpDir, "test.data")
    df := &utils.DataFile{
    Meta: &utils.DataMeta{Operator: "test", Duration: 60},
    Segments: []*utils.Segment{
    {
    StartTime: 10.0,
    EndTime: 15.0,
    FreqLow: 100,
    FreqHigh: 5000,
    Labels: []*utils.Label{
    {Species: "Kiwi", Certainty: 80, Filter: "myfilter"},
    },
    },
    },
    }
    if err := df.Write(tmpFile); err != nil {
    t.Fatalf("failed to write test file: %v", err)
    }
    // Test: Non-existent segment should error
    result, err := CallsModify(CallsModifyInput{
    File: tmpFile,
    Reviewer: "tester",
    Filter: "myfilter",
    Segment: "99-100",
    Certainty: 80,
    })
    if err == nil {
    t.Errorf("expected error for non-existent segment, got nil")
    }
    if result.Error == "" {
    t.Errorf("expected error message, got empty")
    }
    }
  • file addition: calls_modify.go (----------)
    [0.67281]
    package calls
    import (
    "fmt"
    "math"
    "os"
    "strings"
    "skraak/utils"
    )
    // CallsModifyInput defines the input for the modify tool
    type CallsModifyInput struct {
    File string `json:"file"`
    Reviewer string `json:"reviewer"`
    Filter string `json:"filter"`
    Segment string `json:"segment"`
    Certainty int `json:"certainty"`
    Species string `json:"species"`
    Bookmark *bool `json:"bookmark"`
    Comment string `json:"comment"`
    }
    // CallsModifyOutput defines the output for the modify tool
    type CallsModifyOutput struct {
    File string `json:"file"`
    SegmentStart int `json:"segment_start"`
    SegmentEnd int `json:"segment_end"`
    Species string `json:"species,omitempty"`
    CallType string `json:"calltype,omitempty"`
    Certainty int `json:"certainty,omitempty"`
    Bookmark *bool `json:"bookmark,omitempty"`
    Comment string `json:"comment,omitempty"`
    PreviousValue string `json:"previous_value,omitempty"`
    Error string `json:"error,omitempty"`
    }
    // validateModifyInput checks required fields and comment constraints.
    func validateModifyInput(input CallsModifyInput) error {
    if input.File == "" {
    return fmt.Errorf("--file is required")
    }
    if input.Reviewer == "" {
    return fmt.Errorf("--reviewer is required")
    }
    if input.Filter == "" {
    return fmt.Errorf("--filter is required")
    }
    if input.Segment == "" {
    return fmt.Errorf("--segment is required")
    }
    if len(input.Comment) > 140 {
    return fmt.Errorf("--comment must be 140 characters or less")
    }
    for i, r := range input.Comment {
    if r > 127 {
    return fmt.Errorf("--comment must be ASCII only (non-ASCII at position %d)", i)
    }
    }
    return nil
    }
    // resolveSpecies parses species+calltype from the input species string.
    // If input species is empty, keeps the existing label values.
    func resolveSpecies(inputSpecies string, label *utils.Label) (species, callType string) {
    if inputSpecies == "" {
    return label.Species, label.CallType
    }
    if before, after, ok := strings.Cut(inputSpecies, "+"); ok {
    return before, after
    }
    return inputSpecies, ""
    }
    // hasModifyChanges checks whether any field would actually change.
    func hasModifyChanges(newSpecies, newCallType string, input CallsModifyInput, label *utils.Label) bool {
    if newSpecies != label.Species || newCallType != label.CallType {
    return true
    }
    if input.Certainty != label.Certainty {
    return true
    }
    if input.Bookmark != nil && *input.Bookmark != label.Bookmark {
    return true
    }
    if input.Comment != "" {
    return true
    }
    return false
    }
    // applyLabelChanges updates the label and data file, populating the output.
    func applyLabelChanges(label *utils.Label, dataFile *utils.DataFile, input CallsModifyInput, newSpecies, newCallType string, output *CallsModifyOutput) error {
    dataFile.Meta.Reviewer = input.Reviewer
    label.Species = newSpecies
    label.CallType = newCallType
    output.Species = newSpecies
    output.CallType = newCallType
    label.Certainty = input.Certainty
    output.Certainty = input.Certainty
    if input.Bookmark != nil && *input.Bookmark != label.Bookmark {
    label.Bookmark = *input.Bookmark
    output.Bookmark = input.Bookmark
    }
    if input.Comment != "" {
    var newComment string
    if label.Comment != "" {
    newComment = label.Comment + " | " + input.Comment
    } else {
    newComment = input.Comment
    }
    if len(newComment) > 140 {
    return fmt.Errorf("combined comment exceeds 140 characters (%d)", len(newComment))
    }
    label.Comment = newComment
    output.Comment = newComment
    }
    return nil
    }
    // CallsModify modifies a label in a .data file
    func CallsModify(input CallsModifyInput) (CallsModifyOutput, error) {
    var output CallsModifyOutput
    if err := validateModifyInput(input); err != nil {
    output.Error = err.Error()
    return output, err
    }
    startTime, endTime, err := parseSegmentRange(input.Segment)
    if err != nil {
    output.Error = err.Error()
    return output, err
    }
    output.File = input.File
    output.SegmentStart = startTime
    output.SegmentEnd = endTime
    if _, err := os.Stat(input.File); os.IsNotExist(err) {
    output.Error = fmt.Sprintf("File not found: %s", input.File)
    return output, fmt.Errorf("%s", output.Error)
    }
    dataFile, err := utils.ParseDataFile(input.File)
    if err != nil {
    output.Error = fmt.Sprintf("Failed to parse file: %v", err)
    return output, fmt.Errorf("%s", output.Error)
    }
    segment := findSegment(dataFile.Segments, startTime, endTime, input.Filter)
    if segment == nil {
    output.Error = fmt.Sprintf("No segment found matching time range %d-%d", startTime, endTime)
    return output, fmt.Errorf("%s", output.Error)
    }
    targetLabel := findLabelByFilter(segment, input.Filter)
    if targetLabel == nil {
    output.Error = fmt.Sprintf("No label found with filter '%s' in segment %d-%d", input.Filter, startTime, endTime)
    return output, fmt.Errorf("%s", output.Error)
    }
    output.PreviousValue = formatLabel(targetLabel)
    newSpecies, newCallType := resolveSpecies(input.Species, targetLabel)
    if !hasModifyChanges(newSpecies, newCallType, input, targetLabel) {
    output.Error = "No changes needed: all values already match"
    return output, fmt.Errorf("%s", output.Error)
    }
    if err := applyLabelChanges(targetLabel, dataFile, input, newSpecies, newCallType, &output); err != nil {
    output.Error = err.Error()
    return output, err
    }
    if err := dataFile.Write(input.File); err != nil {
    output.Error = fmt.Sprintf("Failed to save file: %v", err)
    return output, fmt.Errorf("%s", output.Error)
    }
    return output, nil
    }
    // findLabelByFilter finds the first label matching the given filter in a segment.
    func findLabelByFilter(segment *utils.Segment, filter string) *utils.Label {
    for _, label := range segment.Labels {
    if label.Filter == filter {
    return label
    }
    }
    return nil
    }
    // parseSegmentRange parses "12-15" format into start and end integers
    func parseSegmentRange(s string) (int, int, error) {
    parts := strings.Split(s, "-")
    if len(parts) != 2 {
    return 0, 0, fmt.Errorf("invalid segment format: %s (expected start-end, e.g., 12-15)", s)
    }
    var start, end int
    if _, err := fmt.Sscanf(parts[0], "%d", &start); err != nil {
    return 0, 0, fmt.Errorf("invalid start time: %s", parts[0])
    }
    if _, err := fmt.Sscanf(parts[1], "%d", &end); err != nil {
    return 0, 0, fmt.Errorf("invalid end time: %s", parts[1])
    }
    if start < 0 || end < 0 {
    return 0, 0, fmt.Errorf("times must be non-negative")
    }
    if start >= end {
    return 0, 0, fmt.Errorf("start time must be less than end time")
    }
    return start, end, nil
    }
    // findSegment finds a segment matching the time range using floor/ceil matching.
    // It also checks that the segment contains a label with the specified filter,
    // so that duplicate segments (same time range, different filters) are resolved correctly.
    func findSegment(segments []*utils.Segment, startTime, endTime int, filter string) *utils.Segment {
    for _, seg := range segments {
    segStart := int(math.Floor(seg.StartTime))
    segEnd := int(math.Ceil(seg.EndTime))
    if segEnd == segStart {
    segEnd = segStart + 1 // minimum 1 second
    }
    if segStart == startTime && segEnd == endTime {
    for _, label := range seg.Labels {
    if label.Filter == filter {
    return seg
    }
    }
    }
    }
    return nil
    }
    // formatLabel formats a label for display
    func formatLabel(label *utils.Label) string {
    result := label.Species
    if label.CallType != "" {
    result += "+" + label.CallType
    }
    result += fmt.Sprintf(" (%d%%)", label.Certainty)
    return result
    }
  • file addition: calls_from_raven.go (----------)
    [0.67281]
    package calls
    import (
    "bufio"
    "fmt"
    "os"
    "path/filepath"
    "strconv"
    "strings"
    "skraak/utils"
    )
    // CallsFromRavenInput defines the input for the calls-from-raven tool
    type CallsFromRavenInput struct {
    Folder string `json:"folder"`
    File string `json:"file"`
    Delete bool `json:"delete"`
    ProgressHandler ProgressHandler `json:"-"` // Optional progress callback
    }
    // CallsFromRavenOutput defines the output for the calls-from-raven tool
    type CallsFromRavenOutput struct {
    Calls []ClusteredCall `json:"calls"`
    TotalCalls int `json:"total_calls"`
    SpeciesCount map[string]int `json:"species_count"`
    DataFilesWritten int `json:"data_files_written"`
    DataFilesSkipped int `json:"data_files_skipped"`
    FilesProcessed int `json:"files_processed"`
    FilesDeleted int `json:"files_deleted"`
    Filter string `json:"filter"`
    Error *string `json:"error,omitempty"`
    }
    // ravenSource implements CallSource for Raven selection files
    type ravenSource struct{}
    func (ravenSource) Name() string { return "Raven" }
    func (ravenSource) FindFiles(folder string) ([]string, error) {
    var files []string
    entries, err := os.ReadDir(folder)
    if err != nil {
    return nil, err
    }
    for _, entry := range entries {
    name := entry.Name()
    if strings.HasSuffix(name, ".selections.txt") {
    files = append(files, filepath.Join(folder, name))
    }
    }
    return files, nil
    }
    func (ravenSource) ProcessFile(ravenFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
    return processRavenFileCached(ravenFile, cache)
    }
    // CallsFromRaven processes Raven selection files and writes .data files
    func CallsFromRaven(input CallsFromRavenInput) (CallsFromRavenOutput, error) {
    src := ravenSource{}
    commonInput := CallsFromSourceInput(input)
    commonOutput, err := callsFromSource(src, commonInput)
    // Convert to Raven-specific output type
    var output CallsFromRavenOutput
    output.Calls = commonOutput.Calls
    output.TotalCalls = commonOutput.TotalCalls
    output.SpeciesCount = commonOutput.SpeciesCount
    output.DataFilesWritten = commonOutput.DataFilesWritten
    output.DataFilesSkipped = commonOutput.DataFilesSkipped
    output.FilesProcessed = commonOutput.FilesProcessed
    output.FilesDeleted = commonOutput.FilesDeleted
    output.Filter = commonOutput.Filter
    output.Error = commonOutput.Error
    return output, err
    }
    // RavenSelection represents a single Raven selection
    type RavenSelection struct {
    StartTime float64
    EndTime float64
    FreqLow float64
    FreqHigh float64
    Species string
    }
    // ravenColumnIndices holds the column index positions for a Raven file
    type ravenColumnIndices struct {
    beginTimeIdx int
    endTimeIdx int
    lowFreqIdx int
    highFreqIdx int
    speciesIdx int
    }
    // parseRavenHeader finds column indices from a tab-separated header line
    func parseRavenHeader(header []string) (ravenColumnIndices, error) {
    idx := ravenColumnIndices{beginTimeIdx: -1, endTimeIdx: -1, lowFreqIdx: -1, highFreqIdx: -1, speciesIdx: -1}
    for i, col := range header {
    switch col {
    case "Begin Time (s)":
    idx.beginTimeIdx = i
    case "End Time (s)":
    idx.endTimeIdx = i
    case "Low Freq (Hz)":
    idx.lowFreqIdx = i
    case "High Freq (Hz)":
    idx.highFreqIdx = i
    case "Species":
    idx.speciesIdx = i
    }
    }
    if idx.beginTimeIdx == -1 || idx.endTimeIdx == -1 || idx.speciesIdx == -1 {
    return idx, fmt.Errorf("missing required columns in Raven file")
    }
    return idx, nil
    }
    // parseRavenSelections reads all selection rows from a scanner and returns parsed selections
    func parseRavenSelections(scanner *bufio.Scanner, idx ravenColumnIndices) ([]RavenSelection, error) {
    var selections []RavenSelection
    for scanner.Scan() {
    line := scanner.Text()
    if line == "" {
    continue
    }
    fields := strings.Split(line, "\t")
    if len(fields) <= idx.speciesIdx {
    continue
    }
    sel, err := parseRavenRow(fields, idx)
    if err != nil {
    return nil, err
    }
    selections = append(selections, sel)
    }
    if err := scanner.Err(); err != nil {
    return nil, fmt.Errorf("error reading file: %w", err)
    }
    return selections, nil
    }
    // parseRavenRow parses a single tab-separated row into a RavenSelection
    func parseRavenRow(fields []string, idx ravenColumnIndices) (RavenSelection, error) {
    var sel RavenSelection
    startTime, err := strconv.ParseFloat(fields[idx.beginTimeIdx], 64)
    if err != nil {
    return sel, fmt.Errorf("failed to parse begin time %q: %w", fields[idx.beginTimeIdx], err)
    }
    sel.StartTime = startTime
    endTime, err := strconv.ParseFloat(fields[idx.endTimeIdx], 64)
    if err != nil {
    return sel, fmt.Errorf("failed to parse end time %q: %w", fields[idx.endTimeIdx], err)
    }
    sel.EndTime = endTime
    if idx.lowFreqIdx >= 0 && idx.lowFreqIdx < len(fields) {
    freqLow, err := strconv.ParseFloat(fields[idx.lowFreqIdx], 64)
    if err != nil {
    return sel, fmt.Errorf("failed to parse low freq %q: %w", fields[idx.lowFreqIdx], err)
    }
    sel.FreqLow = freqLow
    }
    if idx.highFreqIdx >= 0 && idx.highFreqIdx < len(fields) {
    freqHigh, err := strconv.ParseFloat(fields[idx.highFreqIdx], 64)
    if err != nil {
    return sel, fmt.Errorf("failed to parse high freq %q: %w", fields[idx.highFreqIdx], err)
    }
    sel.FreqHigh = freqHigh
    }
    sel.Species = fields[idx.speciesIdx]
    return sel, nil
    }
    // deriveWAVBaseName extracts the base WAV filename from a Raven .selections.txt filename
    func deriveWAVBaseName(ravenFile string) string {
    base := filepath.Base(ravenFile)
    nameWithoutSuffix := strings.TrimSuffix(base, ".selections.txt")
    idx := strings.Index(nameWithoutSuffix, ".Table.")
    if idx > 0 {
    nameWithoutSuffix = nameWithoutSuffix[:idx]
    }
    return nameWithoutSuffix
    }
    // processRavenFileCached processes a single Raven selection file using a DirCache for WAV lookup
    func processRavenFileCached(ravenFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
    file, err := os.Open(ravenFile)
    if err != nil {
    return nil, false, false, fmt.Errorf("failed to open file: %w", err)
    }
    defer func() { _ = file.Close() }()
    scanner := bufio.NewScanner(file)
    if !scanner.Scan() {
    return nil, false, false, fmt.Errorf("empty file")
    }
    header := strings.Split(scanner.Text(), "\t")
    idx, err := parseRavenHeader(header)
    if err != nil {
    return nil, false, false, err
    }
    selections, err := parseRavenSelections(scanner, idx)
    if err != nil {
    return nil, false, false, err
    }
    if len(selections) == 0 {
    return nil, false, true, nil
    }
    // Find WAV file
    wavPath := resolveWAVPath(ravenFile, cache)
    if wavPath == "" {
    return nil, false, true, nil
    }
    sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
    if err != nil {
    return nil, false, true, nil
    }
    dataPath := wavPath + ".data"
    segments := buildRavenSegments(selections, sampleRate)
    meta := AviaNZMeta{Operator: "Raven", Duration: duration}
    reviewer := "None"
    meta.Reviewer = &reviewer
    if err := writeDotDataFileSafe(dataPath, segments, "Raven", meta); err != nil {
    return nil, false, false, err
    }
    var calls []ClusteredCall
    for _, sel := range selections {
    calls = append(calls, ClusteredCall{
    File: wavPath,
    StartTime: sel.StartTime,
    EndTime: sel.EndTime,
    EbirdCode: sel.Species,
    Segments: 1,
    })
    }
    return calls, true, false, nil
    }
    // resolveWAVPath finds the WAV file corresponding to a Raven file
    func resolveWAVPath(ravenFile string, cache *DirCache) string {
    baseName := deriveWAVBaseName(ravenFile)
    if cache != nil {
    return cache.FindWAV(baseName)
    }
    return findWAVFile(filepath.Dir(ravenFile), baseName)
    }
    // buildRavenSegments converts Raven selections to AviaNZ segments
    func buildRavenSegments(selections []RavenSelection, sampleRate int) []AviaNZSegment {
    var segments []AviaNZSegment
    for _, sel := range selections {
    labels := []AviaNZLabel{
    {
    Species: sel.Species,
    Certainty: 70, // Default certainty for Raven (no confidence metric)
    Filter: "Raven",
    },
    }
    // Use frequency range from Raven, or full band if not specified
    freqLow := sel.FreqLow
    freqHigh := sel.FreqHigh
    if freqLow == 0 && freqHigh == 0 {
    freqHigh = float64(sampleRate)
    }
    segment := AviaNZSegment{
    sel.StartTime,
    sel.EndTime,
    freqLow,
    freqHigh,
    labels,
    }
    segments = append(segments, segment)
    }
    return segments
    }
  • file addition: calls_from_preds_test.go (----------)
    [0.67281]
    package calls
    import (
    "os"
    "path/filepath"
    "testing"
    "skraak/utils"
    )
    func TestCallsFromPreds_EmptyFilterError(t *testing.T) {
    // Create a temp CSV file
    tmpDir := t.TempDir()
    csvPath := filepath.Join(tmpDir, "preds.csv")
    csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
    if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
    t.Fatal(err)
    }
    // Create a dummy WAV file (minimal valid WAV)
    wavPath := filepath.Join(tmpDir, "test.wav")
    createMinimalWAV(t, wavPath, 44100, 10.0)
    // Test with empty filter (should error)
    input := CallsFromPredsInput{
    CSVPath: csvPath,
    Filter: "",
    WriteDotData: true,
    ProgressHandler: nil,
    }
    output, err := CallsFromPreds(input)
    // Should return error
    if err == nil {
    t.Error("expected error for empty filter, got nil")
    }
    if output.Error == nil || *output.Error == "" {
    t.Error("expected error message in output, got empty")
    }
    }
    func TestCallsFromPreds_NewDataFile(t *testing.T) {
    // Create a temp CSV file
    tmpDir := t.TempDir()
    csvPath := filepath.Join(tmpDir, "predsST_test-filter_2025-01-01.csv")
    csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
    if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
    t.Fatal(err)
    }
    // Create a dummy WAV file
    wavPath := filepath.Join(tmpDir, "test.wav")
    createMinimalWAV(t, wavPath, 44100, 10.0)
    // Test with filter parsed from filename
    input := CallsFromPredsInput{
    CSVPath: csvPath,
    Filter: "", // Will parse from filename
    WriteDotData: true,
    ProgressHandler: nil,
    }
    output, err := CallsFromPreds(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.DataFilesWritten != 1 {
    t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
    }
    if output.Filter != "test-filter" {
    t.Errorf("expected filter 'test-filter', got '%s'", output.Filter)
    }
    // Verify .data file was created
    dataPath := wavPath + ".data"
    if _, err := os.Stat(dataPath); os.IsNotExist(err) {
    t.Error("expected .data file to be created")
    }
    // Verify content
    df, err := utils.ParseDataFile(dataPath)
    if err != nil {
    t.Fatalf("failed to parse .data file: %v", err)
    }
    if len(df.Segments) != 1 {
    t.Errorf("expected 1 segment, got %d", len(df.Segments))
    }
    if len(df.Segments[0].Labels) != 1 {
    t.Errorf("expected 1 label, got %d", len(df.Segments[0].Labels))
    }
    if df.Segments[0].Labels[0].Filter != "test-filter" {
    t.Errorf("expected filter 'test-filter', got '%s'", df.Segments[0].Labels[0].Filter)
    }
    }
    func TestCallsFromPreds_ExistingDataFileSameFilter(t *testing.T) {
    // Create a temp CSV file
    tmpDir := t.TempDir()
    csvPath := filepath.Join(tmpDir, "predsST_existing-filter_2025-01-01.csv")
    csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
    if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
    t.Fatal(err)
    }
    // Create a dummy WAV file
    wavPath := filepath.Join(tmpDir, "test.wav")
    createMinimalWAV(t, wavPath, 44100, 10.0)
    // Create existing .data file with same filter
    dataPath := wavPath + ".data"
    existingData := `[
    {"Operator": "Manual", "Reviewer": "David", "Duration": 10.0},
    [5.0, 8.0, 0, 44100, [{"species": "morepork", "certainty": 90, "filter": "existing-filter"}]]
    ]`
    if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
    t.Fatal(err)
    }
    // Test with same filter (should error)
    input := CallsFromPredsInput{
    CSVPath: csvPath,
    Filter: "", // Will parse from filename -> "existing-filter"
    WriteDotData: true,
    ProgressHandler: nil,
    }
    output, err := CallsFromPreds(input)
    // Should return error
    if err == nil {
    t.Error("expected error for same filter, got nil")
    }
    if output.Error == nil {
    t.Error("expected error message in output")
    }
    // Verify original .data file is unchanged
    df, err := utils.ParseDataFile(dataPath)
    if err != nil {
    t.Fatalf("failed to parse .data file: %v", err)
    }
    if len(df.Segments) != 1 {
    t.Errorf("expected original 1 segment, got %d", len(df.Segments))
    }
    if df.Segments[0].Labels[0].Species != "morepork" {
    t.Errorf("expected original species 'morepork', got '%s'", df.Segments[0].Labels[0].Species)
    }
    }
    func TestCallsFromPreds_ExistingDataFileDifferentFilter(t *testing.T) {
    // Create a temp CSV file
    tmpDir := t.TempDir()
    csvPath := filepath.Join(tmpDir, "predsST_new-filter_2025-01-01.csv")
    csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
    if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
    t.Fatal(err)
    }
    // Create a dummy WAV file
    wavPath := filepath.Join(tmpDir, "test.wav")
    createMinimalWAV(t, wavPath, 44100, 10.0)
    // Create existing .data file with different filter
    dataPath := wavPath + ".data"
    existingData := `[
    {"Operator": "Manual", "Reviewer": "David", "Duration": 10.0},
    [5.0, 8.0, 0, 44100, [{"species": "morepork", "certainty": 90, "filter": "old-filter"}]]
    ]`
    if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
    t.Fatal(err)
    }
    // Test with different filter (should merge)
    input := CallsFromPredsInput{
    CSVPath: csvPath,
    Filter: "", // Will parse from filename -> "new-filter"
    WriteDotData: true,
    ProgressHandler: nil,
    }
    output, err := CallsFromPreds(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.DataFilesWritten != 1 {
    t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
    }
    // Verify .data file has merged content
    df, err := utils.ParseDataFile(dataPath)
    if err != nil {
    t.Fatalf("failed to parse .data file: %v", err)
    }
    if len(df.Segments) != 2 {
    t.Errorf("expected 2 segments after merge, got %d", len(df.Segments))
    }
    // Check segments are sorted by start time
    if df.Segments[0].StartTime > df.Segments[1].StartTime {
    t.Error("expected segments to be sorted by start time")
    }
    // Check both filters are present
    filters := make(map[string]bool)
    for _, seg := range df.Segments {
    for _, label := range seg.Labels {
    filters[label.Filter] = true
    }
    }
    if !filters["old-filter"] {
    t.Error("expected 'old-filter' to be present")
    }
    if !filters["new-filter"] {
    t.Error("expected 'new-filter' to be present")
    }
    }
    func TestCallsFromPreds_ExistingDataFileParseError(t *testing.T) {
    // Create a temp CSV file
    tmpDir := t.TempDir()
    csvPath := filepath.Join(tmpDir, "predsST_test-filter_2025-01-01.csv")
    csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
    if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
    t.Fatal(err)
    }
    // Create a dummy WAV file
    wavPath := filepath.Join(tmpDir, "test.wav")
    createMinimalWAV(t, wavPath, 44100, 10.0)
    // Create corrupted .data file
    dataPath := wavPath + ".data"
    corruptedData := `this is not valid json`
    if err := os.WriteFile(dataPath, []byte(corruptedData), 0644); err != nil {
    t.Fatal(err)
    }
    // Test (should error due to parse failure)
    input := CallsFromPredsInput{
    CSVPath: csvPath,
    Filter: "",
    WriteDotData: true,
    ProgressHandler: nil,
    }
    output, err := CallsFromPreds(input)
    // Should return error
    if err == nil {
    t.Error("expected error for corrupted .data file, got nil")
    }
    if output.Error == nil {
    t.Error("expected error message in output")
    }
    // Verify original file is unchanged
    content, err := os.ReadFile(dataPath)
    if err != nil {
    t.Fatal(err)
    }
    if string(content) != corruptedData {
    t.Error("expected corrupted file to remain unchanged")
    }
    }
    func TestCallsFromPreds_ExplicitFilter(t *testing.T) {
    // Create a temp CSV file with non-standard name
    tmpDir := t.TempDir()
    csvPath := filepath.Join(tmpDir, "predictions.csv")
    csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
    if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
    t.Fatal(err)
    }
    // Create a dummy WAV file
    wavPath := filepath.Join(tmpDir, "test.wav")
    createMinimalWAV(t, wavPath, 44100, 10.0)
    // Test with explicit filter
    input := CallsFromPredsInput{
    CSVPath: csvPath,
    Filter: "my-custom-filter",
    WriteDotData: true,
    ProgressHandler: nil,
    }
    output, err := CallsFromPreds(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.Filter != "my-custom-filter" {
    t.Errorf("expected filter 'my-custom-filter', got '%s'", output.Filter)
    }
    // Verify .data file uses explicit filter
    dataPath := wavPath + ".data"
    df, err := utils.ParseDataFile(dataPath)
    if err != nil {
    t.Fatalf("failed to parse .data file: %v", err)
    }
    if df.Segments[0].Labels[0].Filter != "my-custom-filter" {
    t.Errorf("expected filter 'my-custom-filter' in .data file, got '%s'", df.Segments[0].Labels[0].Filter)
    }
    }
    func TestCallsFromPreds_NonParsableFilenameNoFilter(t *testing.T) {
    // Create a temp CSV file with non-standard name that can't be parsed
    tmpDir := t.TempDir()
    csvPath := filepath.Join(tmpDir, "random_name.csv")
    csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
    if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
    t.Fatal(err)
    }
    // Create a dummy WAV file
    wavPath := filepath.Join(tmpDir, "test.wav")
    createMinimalWAV(t, wavPath, 44100, 10.0)
    // Test with no filter and non-parsable filename (should error)
    input := CallsFromPredsInput{
    CSVPath: csvPath,
    Filter: "",
    WriteDotData: true,
    ProgressHandler: nil,
    }
    output, err := CallsFromPreds(input)
    // Should return error
    if err == nil {
    t.Error("expected error for unparsable filename with no filter, got nil")
    }
    if output.Error == nil {
    t.Error("expected error message in output")
    }
    }
    // createMinimalWAV creates a minimal valid WAV file for testing
    func createMinimalWAV(t *testing.T, path string, sampleRate int, duration float64) {
    t.Helper()
    numSamples := int(float64(sampleRate) * duration)
    dataSize := numSamples * 2 // 16-bit mono
    // WAV header (44 bytes)
    header := make([]byte, 44)
    // RIFF header
    copy(header[0:4], "RIFF")
    totalSize := uint32(36 + dataSize)
    header[4] = byte(totalSize)
    header[5] = byte(totalSize >> 8)
    header[6] = byte(totalSize >> 16)
    header[7] = byte(totalSize >> 24)
    copy(header[8:12], "WAVE")
    // fmt chunk
    copy(header[12:16], "fmt ")
    chunkSize := uint32(16)
    header[16] = byte(chunkSize)
    header[17] = byte(chunkSize >> 8)
    header[18] = byte(chunkSize >> 16)
    header[19] = byte(chunkSize >> 24)
    audioFormat := uint16(1) // PCM
    header[20] = byte(audioFormat)
    header[21] = byte(audioFormat >> 8)
    numChannels := uint16(1)
    header[22] = byte(numChannels)
    header[23] = byte(numChannels >> 8)
    header[24] = byte(sampleRate)
    header[25] = byte(sampleRate >> 8)
    header[26] = byte(sampleRate >> 16)
    header[27] = byte(sampleRate >> 24)
    byteRate := uint32(sampleRate * 2)
    header[28] = byte(byteRate)
    header[29] = byte(byteRate >> 8)
    header[30] = byte(byteRate >> 16)
    header[31] = byte(byteRate >> 24)
    blockAlign := uint16(2)
    header[32] = byte(blockAlign)
    header[33] = byte(blockAlign >> 8)
    bitsPerSample := uint16(16)
    header[34] = byte(bitsPerSample)
    header[35] = byte(bitsPerSample >> 8)
    // data chunk
    copy(header[36:40], "data")
    header[40] = byte(dataSize)
    header[41] = byte(dataSize >> 8)
    header[42] = byte(dataSize >> 16)
    header[43] = byte(dataSize >> 24)
    // Create file with header and silence
    file, err := os.Create(path)
    if err != nil {
    t.Fatal(err)
    }
    defer file.Close()
    if _, err := file.Write(header); err != nil {
    t.Fatal(err)
    }
    // Write silence (zeros)
    silence := make([]byte, dataSize)
    if _, err := file.Write(silence); err != nil {
    t.Fatal(err)
    }
    }
  • file addition: calls_from_preds.go (----------)
    [0.67281]
    package calls
    import (
    "encoding/csv"
    "encoding/json"
    "fmt"
    "io"
    "os"
    "path/filepath"
    "sort"
    "strconv"
    "strings"
    "sync"
    "sync/atomic"
    "skraak/utils"
    )
    // Constants for clustering algorithm
    const (
    CLUSTER_GAP_MULTIPLIER = 2 // 3 Gap threshold = CLUSTER_GAP_MULTIPLIER * clip_duration. 3 for kiwi
    MIN_DETECTIONS_PER_CLUSTER = 0 // 1 = filter out single detections (used for kiwi, they have long calls 30s), 0 = let single detections pass through
    DEFAULT_CERTAINTY = 70 // .data certainty:70
    DOT_DATA_WORKERS = 8 // Number of parallel workers for .data file writing
    )
    // ClusteredCall represents a clustered bird call detection
    type ClusteredCall struct {
    File string `json:"file"`
    StartTime float64 `json:"start_time"`
    EndTime float64 `json:"end_time"`
    EbirdCode string `json:"ebird_code"`
    Segments int `json:"segments"`
    }
    // CallsFromPredsInput defines the input for the calls-from-preds tool
    type CallsFromPredsInput struct {
    CSVPath string `json:"csv_path"`
    Filter string `json:"filter"`
    WriteDotData bool `json:"write_dot_data"`
    GapMultiplier int `json:"gap_multiplier"`
    MinDetections int `json:"min_detections"`
    ProgressHandler ProgressHandler `json:"-"` // Optional progress callback (not serialized)
    }
    // ProgressHandler is a callback function for reporting progress during long operations
    // processed: number of items processed so far
    // total: total number of items to process
    // message: optional status message
    type ProgressHandler func(processed, total int, message string)
    // CallsFromPredsOutput defines the output for the calls-from-preds tool
    type CallsFromPredsOutput struct {
    Calls []ClusteredCall `json:"calls"`
    TotalCalls int `json:"total_calls"`
    ClipDuration float64 `json:"clip_duration"`
    GapThreshold float64 `json:"gap_threshold"`
    SpeciesCount map[string]int `json:"species_count"`
    DataFilesWritten int `json:"data_files_written"`
    DataFilesSkipped int `json:"data_files_skipped"`
    Filter string `json:"filter"`
    Error *string `json:"error,omitempty"`
    }
    // AviaNZ .data file types
    // predFileSpeciesKey groups detections by file and ebird code
    type predFileSpeciesKey struct {
    File string
    EbirdCode string
    }
    // CallsFromPreds reads a predictions CSV and clusters detections into continuous bird calls
    func CallsFromPreds(input CallsFromPredsInput) (CallsFromPredsOutput, error) {
    var output CallsFromPredsOutput
    // Determine filter: use provided filter, or parse from CSV filename
    filter := input.Filter
    if filter == "" {
    filter = ParseFilterFromFilename(input.CSVPath)
    }
    if filter == "" {
    errMsg := "Filter must be specified via --filter flag or parsable from CSV filename"
    output.Error = &errMsg
    return output, fmt.Errorf("%s", errMsg)
    }
    output.Filter = filter
    _, detections, clipDuration, err := readPredCSV(input.CSVPath)
    if err != nil {
    errMsg := err.Error()
    output.Error = &errMsg
    return output, err
    }
    output.ClipDuration = clipDuration
    gapMultiplier := CLUSTER_GAP_MULTIPLIER
    if input.GapMultiplier > 0 {
    gapMultiplier = input.GapMultiplier
    }
    minDetections := MIN_DETECTIONS_PER_CLUSTER
    if input.MinDetections >= 0 {
    minDetections = input.MinDetections
    }
    gapThreshold := float64(gapMultiplier) * clipDuration
    output.GapThreshold = gapThreshold
    allCalls, speciesCount := clusterDetections(detections, clipDuration, gapThreshold, minDetections)
    output.Calls = allCalls
    output.TotalCalls = len(allCalls)
    output.SpeciesCount = speciesCount
    if input.WriteDotData {
    dataFilesWritten, dataFilesSkipped, err := writeDotFiles(input.CSVPath, filter, allCalls, input.ProgressHandler)
    if err != nil {
    errMsg := fmt.Sprintf("Error writing .data files: %v", err)
    output.Error = &errMsg
    return output, fmt.Errorf("%s", errMsg)
    }
    output.DataFilesWritten = dataFilesWritten
    output.DataFilesSkipped = dataFilesSkipped
    }
    return output, nil
    }
    // readPredCSV opens and reads a predictions CSV, returning column mappings, detections, and clip duration
    func readPredCSV(csvPath string) (predCSVColumns, map[predFileSpeciesKey][]float64, float64, error) {
    file, err := os.Open(csvPath)
    if err != nil {
    return predCSVColumns{}, nil, 0, fmt.Errorf("failed to open CSV file: %w", err)
    }
    defer func() { _ = file.Close() }()
    reader := csv.NewReader(file)
    reader.ReuseRecord = true
    header, err := reader.Read()
    if err != nil {
    return predCSVColumns{}, nil, 0, fmt.Errorf("failed to read CSV header: %w", err)
    }
    cols, err := findPredCSVColumns(header)
    if err != nil {
    return predCSVColumns{}, nil, 0, err
    }
    detections, clipDuration, err := readPredCSVRows(reader, cols)
    if err != nil {
    return predCSVColumns{}, nil, 0, err
    }
    return cols, detections, clipDuration, nil
    }
    // predCSVColumns holds the column indices for a predictions CSV
    type predCSVColumns struct {
    fileIdx int
    startTimeIdx int
    endTimeIdx int
    ebirdCodes []string
    ebirdIdx []int
    }
    // findPredCSVColumns parses the CSV header to find column indices
    func findPredCSVColumns(header []string) (predCSVColumns, error) {
    cols := predCSVColumns{
    fileIdx: -1,
    startTimeIdx: -1,
    endTimeIdx: -1,
    }
    ignoredColumns := map[string]bool{"NotKiwi": true, "0.0": true}
    for i, col := range header {
    switch col {
    case "file":
    cols.fileIdx = i
    case "start_time":
    cols.startTimeIdx = i
    case "end_time":
    cols.endTimeIdx = i
    default:
    if ignoredColumns[col] {
    continue
    }
    cols.ebirdCodes = append(cols.ebirdCodes, col)
    cols.ebirdIdx = append(cols.ebirdIdx, i)
    }
    }
    if cols.fileIdx == -1 || cols.startTimeIdx == -1 || cols.endTimeIdx == -1 {
    return cols, fmt.Errorf("CSV must have 'file', 'start_time', and 'end_time' columns")
    }
    if len(cols.ebirdCodes) == 0 {
    return cols, fmt.Errorf("CSV must have at least one ebird code column")
    }
    return cols, nil
    }
    // readPredCSVRows reads all CSV data rows and returns detections grouped by file+species, plus clip duration
    func readPredCSVRows(reader *csv.Reader, cols predCSVColumns) (map[predFileSpeciesKey][]float64, float64, error) {
    detections := make(map[predFileSpeciesKey][]float64)
    clipDuration := 0.0
    record, err := reader.Read()
    if err == io.EOF {
    return detections, 0, nil
    }
    if err != nil {
    return nil, 0, fmt.Errorf("failed to read first CSV row: %w", err)
    }
    startTime, _ := strconv.ParseFloat(record[cols.startTimeIdx], 64)
    endTime, _ := strconv.ParseFloat(record[cols.endTimeIdx], 64)
    clipDuration = endTime - startTime
    addDetectionsFromRow(record, cols, startTime, detections)
    for {
    record, err := reader.Read()
    if err == io.EOF {
    break
    }
    if err != nil {
    return nil, 0, fmt.Errorf("failed to read CSV row: %w", err)
    }
    startTime, _ = strconv.ParseFloat(record[cols.startTimeIdx], 64)
    addDetectionsFromRow(record, cols, startTime, detections)
    }
    return detections, clipDuration, nil
    }
    // addDetectionsFromRow adds positive detections from a single CSV row
    func addDetectionsFromRow(record []string, cols predCSVColumns, startTime float64, detections map[predFileSpeciesKey][]float64) {
    fileName := record[cols.fileIdx]
    for i, idx := range cols.ebirdIdx {
    if record[idx] == "1" {
    key := predFileSpeciesKey{File: fileName, EbirdCode: cols.ebirdCodes[i]}
    detections[key] = append(detections[key], startTime)
    }
    }
    }
    // clusterDetections groups detections into clusters and produces sorted ClusteredCalls
    func clusterDetections(detections map[predFileSpeciesKey][]float64, clipDuration, gapThreshold float64, minDetections int) ([]ClusteredCall, map[string]int) {
    var allCalls []ClusteredCall
    speciesCount := make(map[string]int)
    for key, startTimes := range detections {
    sort.Float64s(startTimes)
    clusters := clusterStartTimes(startTimes, gapThreshold)
    for _, cluster := range clusters {
    if len(cluster) <= minDetections {
    continue
    }
    call := ClusteredCall{
    File: key.File,
    StartTime: cluster[0],
    EndTime: cluster[len(cluster)-1] + clipDuration,
    EbirdCode: key.EbirdCode,
    Segments: len(cluster),
    }
    allCalls = append(allCalls, call)
    speciesCount[key.EbirdCode]++
    }
    }
    sort.Slice(allCalls, func(i, j int) bool {
    if allCalls[i].File != allCalls[j].File {
    return allCalls[i].File < allCalls[j].File
    }
    return allCalls[i].StartTime < allCalls[j].StartTime
    })
    return allCalls, speciesCount
    }
    // DirCache caches directory entries for fast WAV file lookup.
    // Scans the directory once and builds a map from lowercased basename to full filename.
    // Safe for concurrent read-only use after construction.
    type DirCache struct {
    dir string
    wavMap map[string]string // lowercase basename -> filename with original case (e.g. "20230610_150000" -> "20230610_150000.WAV")
    dirMap map[string]string // lowercase basename -> filename for any file (used by from-raven for .selections.txt etc.)
    }
    // NewDirCache creates a DirCache by scanning the directory once.
    func NewDirCache(dir string) *DirCache {
    entries, err := os.ReadDir(dir)
    if err != nil {
    return &DirCache{dir: dir, wavMap: make(map[string]string), dirMap: make(map[string]string)}
    }
    wavMap := make(map[string]string, len(entries))
    dirMap := make(map[string]string, len(entries))
    for _, entry := range entries {
    if entry.IsDir() {
    continue
    }
    name := entry.Name()
    ext := filepath.Ext(name)
    base := strings.TrimSuffix(name, ext)
    dirMap[strings.ToLower(base)] = name
    if strings.EqualFold(ext, ".wav") {
    wavMap[strings.ToLower(base)] = name
    }
    }
    return &DirCache{dir: dir, wavMap: wavMap, dirMap: dirMap}
    }
    // FindWAV looks up a WAV file by basename (case-insensitive).
    // Returns the full path with correct case, or empty string if not found.
    func (dc *DirCache) FindWAV(baseName string) string {
    if name, ok := dc.wavMap[strings.ToLower(baseName)]; ok {
    return filepath.Join(dc.dir, name)
    }
    return ""
    }
    // FindFile looks up any file by basename (case-insensitive).
    // Returns the full path with correct case, or empty string if not found.
    func (dc *DirCache) FindFile(baseName string) string {
    if name, ok := dc.dirMap[strings.ToLower(baseName)]; ok {
    return filepath.Join(dc.dir, name)
    }
    return ""
    }
    // findWAVFile finds a WAV file in the directory with case-insensitive matching.
    // baseName is the filename without extension (e.g., "20230610_150000").
    // Returns the full path with correct case, or empty string if not found.
    // Deprecated: Use DirCache.FindWAV for batch operations to avoid repeated directory scans.
    func findWAVFile(dir, baseName string) string {
    entries, err := os.ReadDir(dir)
    if err != nil {
    return ""
    }
    for _, entry := range entries {
    if entry.IsDir() {
    continue
    }
    name := entry.Name()
    ext := filepath.Ext(name)
    nameNoExt := strings.TrimSuffix(name, ext)
    if nameNoExt == baseName && strings.EqualFold(ext, ".wav") {
    return filepath.Join(dir, name)
    }
    }
    return ""
    }
    // writeDotFiles writes AviaNZ .data files for each audio file with calls
    // Uses parallel workers for improved performance on large batches
    func writeDotFiles(csvPath, filter string, calls []ClusteredCall, progress ProgressHandler) (int, int, error) {
    // Base directory is the directory containing the CSV file
    csvDir := filepath.Dir(csvPath)
    // Group calls by file (using extracted filename)
    callsByFile := make(map[string][]ClusteredCall)
    for _, call := range calls {
    filename := filepath.Base(call.File)
    callsByFile[filename] = append(callsByFile[filename], call)
    }
    // Report initial progress
    if progress != nil {
    progress(0, len(callsByFile), "Processing WAV files")
    }
    // If small batch, process sequentially (avoid goroutine overhead)
    if len(callsByFile) < 10 {
    return writeDotFilesSequential(csvDir, filter, callsByFile, progress)
    }
    // Parallel processing for larger batches
    return writeDotFilesParallel(csvDir, filter, callsByFile, progress)
    }
    // dotDataJob represents a single file to process
    type dotDataJob struct {
    filename string
    fileCalls []ClusteredCall
    }
    // dotDataResult represents the result of processing a single file
    type dotDataResult struct {
    filename string
    written bool
    err error
    }
    // writeDotFilesSequential processes files one at a time (for small batches)
    func writeDotFilesSequential(csvDir, filter string, callsByFile map[string][]ClusteredCall, progress ProgressHandler) (int, int, error) {
    dataFilesWritten := 0
    dataFilesSkipped := 0
    total := len(callsByFile)
    processed := 0
    for filename, fileCalls := range callsByFile {
    // Find WAV file with correct case
    baseName := strings.TrimSuffix(filename, filepath.Ext(filename))
    wavPath := findWAVFile(csvDir, baseName)
    if wavPath == "" {
    dataFilesSkipped++
    processed++
    if progress != nil {
    progress(processed, total, "")
    }
    continue
    }
    dataPath := wavPath + ".data"
    sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
    if err != nil {
    dataFilesSkipped++
    processed++
    if progress != nil {
    progress(processed, total, "")
    }
    continue
    }
    // Build segments and metadata
    meta, segments := buildAviaNZMetaAndSegments(fileCalls, filter, duration, sampleRate)
    if err := writeDotDataFileSafe(dataPath, segments, filter, meta); err != nil {
    return dataFilesWritten, dataFilesSkipped, fmt.Errorf("failed to write %s: %w", dataPath, err)
    }
    dataFilesWritten++
    processed++
    if progress != nil {
    progress(processed, total, "")
    }
    }
    return dataFilesWritten, dataFilesSkipped, nil
    }
    // writeDotFilesParallel processes files concurrently using a worker pool
    func writeDotFilesParallel(csvDir, filter string, callsByFile map[string][]ClusteredCall, progress ProgressHandler) (int, int, error) {
    total := len(callsByFile)
    var processed atomic.Int32
    // Create job channel
    jobs := make(chan dotDataJob, len(callsByFile))
    results := make(chan dotDataResult, len(callsByFile))
    // Start workers
    var wg sync.WaitGroup
    for range DOT_DATA_WORKERS {
    wg.Add(1)
    go dotDataWorker(csvDir, filter, jobs, results, &wg)
    }
    // Send jobs
    for filename, fileCalls := range callsByFile {
    jobs <- dotDataJob{filename: filename, fileCalls: fileCalls}
    }
    close(jobs)
    // Wait for workers to finish
    go func() {
    wg.Wait()
    close(results)
    }()
    // Collect results with progress reporting
    dataFilesWritten := 0
    dataFilesSkipped := 0
    var firstErr error
    for result := range results {
    if result.err != nil && firstErr == nil {
    firstErr = result.err
    }
    if result.written {
    dataFilesWritten++
    } else {
    dataFilesSkipped++
    }
    // Report progress
    if progress != nil {
    current := int(processed.Add(1))
    progress(current, total, "")
    }
    }
    return dataFilesWritten, dataFilesSkipped, firstErr
    }
    // dotDataWorker processes files from the jobs channel
    func dotDataWorker(csvDir, filter string, jobs <-chan dotDataJob, results chan<- dotDataResult, wg *sync.WaitGroup) {
    defer wg.Done()
    for job := range jobs {
    // Find WAV file with correct case
    baseName := strings.TrimSuffix(job.filename, filepath.Ext(job.filename))
    wavPath := findWAVFile(csvDir, baseName)
    if wavPath == "" {
    results <- dotDataResult{filename: job.filename, written: false, err: nil}
    continue
    }
    dataPath := wavPath + ".data"
    sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
    if err != nil {
    results <- dotDataResult{filename: job.filename, written: false, err: nil}
    continue
    }
    // Build segments and metadata
    meta, segments := buildAviaNZMetaAndSegments(job.fileCalls, filter, duration, sampleRate)
    if err := writeDotDataFileSafe(dataPath, segments, filter, meta); err != nil {
    results <- dotDataResult{filename: job.filename, written: false, err: fmt.Errorf("failed to write %s: %w", dataPath, err)}
    continue
    }
    results <- dotDataResult{filename: job.filename, written: true, err: nil}
    }
    }
    // buildAviaNZMetaAndSegments creates metadata and segments for a .data file
    func buildAviaNZMetaAndSegments(calls []ClusteredCall, filter string, duration float64, sampleRate int) (AviaNZMeta, []AviaNZSegment) {
    // Create metadata
    reviewer := "None"
    meta := AviaNZMeta{
    Operator: "Auto",
    Reviewer: &reviewer,
    Duration: duration,
    }
    // Build segments array
    var segments []AviaNZSegment
    for _, call := range calls {
    // Create labels for this segment
    labels := []AviaNZLabel{
    {
    Species: call.EbirdCode,
    Certainty: DEFAULT_CERTAINTY,
    Filter: filter,
    },
    }
    // Create segment: [start, end, freq_low, freq_high, labels]
    // freq_low=0, freq_high=sampleRate for full-band segments
    segment := AviaNZSegment{
    call.StartTime,
    call.EndTime,
    0, // freq_low
    sampleRate, // freq_high (full band)
    labels,
    }
    segments = append(segments, segment)
    }
    return meta, segments
    }
    // writeAviaNZDataFile writes a new .data file to disk (does not check for existing files)
    func writeAviaNZDataFile(path string, data []any) error {
    file, err := os.Create(path)
    if err != nil {
    return fmt.Errorf("failed to create file: %w", err)
    }
    defer func() { _ = file.Close() }()
    encoder := json.NewEncoder(file)
    encoder.SetIndent("", "") // No indentation for compact output
    if err := encoder.Encode(data); err != nil {
    return fmt.Errorf("failed to encode JSON: %w", err)
    }
    return nil
    }
    // writeDotDataFileSafe safely writes or merges .data files
    // - If file doesn't exist: write new file
    // - If file exists with same filter: return error (refuse to clobber)
    // - If file exists with different filter: merge segments and write
    // - If file exists but can't be parsed: return error (refuse to clobber)
    func writeDotDataFileSafe(path string, newSegments []AviaNZSegment, filter string, meta AviaNZMeta) error {
    // Check if file exists
    if _, err := os.Stat(path); err == nil {
    // File exists - parse and check
    existing, err := utils.ParseDataFile(path)
    if err != nil {
    return fmt.Errorf("cannot parse existing %s: %w (refusing to clobber)", path, err)
    }
    // Check for duplicate filter
    for _, seg := range existing.Segments {
    if seg.HasFilterLabel(filter) {
    return fmt.Errorf("%s already contains filter '%s' (refusing to clobber)", path, filter)
    }
    }
    // Append new segments (different filter - safe to merge)
    for _, newSeg := range newSegments {
    seg := convertAviaNZSegment(newSeg, filter)
    existing.Segments = append(existing.Segments, seg)
    }
    // Sort by start time
    sort.Slice(existing.Segments, func(i, j int) bool {
    return existing.Segments[i].StartTime < existing.Segments[j].StartTime
    })
    return existing.Write(path)
    }
    // File doesn't exist - write new
    data := buildDataFileFromSegments(meta, newSegments)
    return writeAviaNZDataFile(path, data)
    }
    // convertAviaNZSegment converts an AviaNZSegment to utils.Segment
    func convertAviaNZSegment(seg AviaNZSegment, filter string) *utils.Segment {
    labels := seg[4].([]AviaNZLabel)
    utilsLabels := make([]*utils.Label, len(labels))
    for i, l := range labels {
    utilsLabels[i] = &utils.Label{
    Species: l.Species,
    Certainty: l.Certainty,
    Filter: filter,
    }
    }
    // Handle freq values (could be int or float64 depending on how they were created)
    var freqLow, freqHigh float64
    switch v := seg[2].(type) {
    case int:
    freqLow = float64(v)
    case float64:
    freqLow = v
    }
    switch v := seg[3].(type) {
    case int:
    freqHigh = float64(v)
    case float64:
    freqHigh = v
    }
    return &utils.Segment{
    StartTime: seg[0].(float64),
    EndTime: seg[1].(float64),
    FreqLow: freqLow,
    FreqHigh: freqHigh,
    Labels: utilsLabels,
    }
    }
    // buildDataFileFromSegments builds the data file structure from meta and segments
    func buildDataFileFromSegments(meta AviaNZMeta, segments []AviaNZSegment) []any {
    result := make([]any, 0, 1+len(segments))
    result = append(result, meta)
    for _, seg := range segments {
    result = append(result, seg)
    }
    return result
    }
    // ParseFilterFromFilename extracts filter name from preds CSV filename
    // "predsST_opensoundscape-kiwi-1.2_2025-11-12.csv" -> "opensoundscape-kiwi-1.2"
    // Returns empty string if parsing fails
    func ParseFilterFromFilename(csvPath string) string {
    filename := filepath.Base(csvPath)
    // Remove .csv extension
    name := strings.TrimSuffix(filename, ".csv")
    // Split on underscore
    parts := strings.Split(name, "_")
    if len(parts) == 3 {
    return parts[1]
    }
    return ""
    }
    // clusterStartTimes groups consecutive start times into clusters
    // where the gap between consecutive times is <= gapThreshold
    func clusterStartTimes(startTimes []float64, gapThreshold float64) [][]float64 {
    if len(startTimes) == 0 {
    return nil
    }
    var clusters [][]float64
    currentCluster := []float64{startTimes[0]}
    for i := 1; i < len(startTimes); i++ {
    gap := startTimes[i] - startTimes[i-1]
    if gap <= gapThreshold {
    // Same cluster
    currentCluster = append(currentCluster, startTimes[i])
    } else {
    // New cluster
    clusters = append(clusters, currentCluster)
    currentCluster = []float64{startTimes[i]}
    }
    }
    // Don't forget the last cluster
    clusters = append(clusters, currentCluster)
    return clusters
    }
  • file addition: calls_from_common.go (----------)
    [0.67281]
    package calls
    import (
    "fmt"
    "os"
    "path/filepath"
    "sort"
    "sync"
    "sync/atomic"
    )
    // CallsFromSourceInput defines the common input for calls-from-source tools
    type CallsFromSourceInput struct {
    Folder string `json:"folder"`
    File string `json:"file"`
    Delete bool `json:"delete"`
    ProgressHandler ProgressHandler `json:"-"` // Optional progress callback
    }
    // CallsFromSourceOutput defines the common output for calls-from-source tools
    type CallsFromSourceOutput struct {
    Calls []ClusteredCall `json:"calls"`
    TotalCalls int `json:"total_calls"`
    SpeciesCount map[string]int `json:"species_count"`
    DataFilesWritten int `json:"data_files_written"`
    DataFilesSkipped int `json:"data_files_skipped"`
    FilesProcessed int `json:"files_processed"`
    FilesDeleted int `json:"files_deleted"`
    Filter string `json:"filter"`
    Error *string `json:"error,omitempty"`
    }
    // CallSource abstracts a source of bird call data (Raven, BirdNET, etc.)
    type CallSource interface {
    // Name returns the display name (e.g. "Raven", "BirdNET")
    Name() string
    // FindFiles discovers source files in the given folder
    FindFiles(folder string) ([]string, error)
    // ProcessFile processes a single source file and returns calls, write/skip status
    ProcessFile(path string, cache *DirCache) (calls []ClusteredCall, written, skipped bool, err error)
    }
    // callsFromSource is the shared entry point for all call source tools.
    func callsFromSource(src CallSource, input CallsFromSourceInput) (CallsFromSourceOutput, error) {
    var output CallsFromSourceOutput
    output.Filter = src.Name()
    // Collect source files to process
    var files []string
    if input.File != "" {
    files = []string{input.File}
    } else if input.Folder != "" {
    var err error
    files, err = src.FindFiles(input.Folder)
    if err != nil {
    errMsg := fmt.Sprintf("Failed to find %s files: %v", src.Name(), err)
    output.Error = &errMsg
    return output, fmt.Errorf("%s", errMsg)
    }
    } else {
    errMsg := "Either --folder or --file must be specified"
    output.Error = &errMsg
    return output, fmt.Errorf("%s", errMsg)
    }
    if len(files) == 0 {
    errMsg := fmt.Sprintf("No %s files found", src.Name())
    output.Error = &errMsg
    return output, fmt.Errorf("%s", errMsg)
    }
    // Single file or small batch: process sequentially (avoid goroutine overhead)
    if len(files) < 10 {
    return callsFromSourceSequential(src, input, files)
    }
    // Large batch: parallel processing with DirCache
    return callsFromSourceParallel(src, input, files)
    }
    // callsFromSourceSequential processes source files one at a time (for small batches)
    func callsFromSourceSequential(src CallSource, input CallsFromSourceInput, files []string) (CallsFromSourceOutput, error) {
    var output CallsFromSourceOutput
    output.Filter = src.Name()
    // Build DirCache once for the folder
    dirCaches := make(map[string]*DirCache)
    if input.Folder != "" {
    dirCaches[input.Folder] = NewDirCache(input.Folder)
    }
    speciesCount := make(map[string]int)
    var allCalls []ClusteredCall
    dataFilesWritten := 0
    dataFilesSkipped := 0
    filesProcessed := 0
    filesDeleted := 0
    for _, file := range files {
    dir := filepath.Dir(file)
    cache := dirCaches[dir]
    if cache == nil {
    cache = NewDirCache(dir)
    dirCaches[dir] = cache
    }
    calls, written, skipped, err := src.ProcessFile(file, cache)
    if err != nil {
    errMsg := fmt.Sprintf("Error processing %s: %v", file, err)
    output.Error = &errMsg
    return output, fmt.Errorf("%s", errMsg)
    }
    if written {
    dataFilesWritten++
    }
    if skipped {
    dataFilesSkipped++
    }
    for _, call := range calls {
    allCalls = append(allCalls, call)
    speciesCount[call.EbirdCode]++
    }
    filesProcessed++
    // Delete if requested and successfully processed
    if input.Delete && written {
    if err := os.Remove(file); err != nil {
    errMsg := fmt.Sprintf("Failed to delete %s: %v", file, err)
    output.Error = &errMsg
    return output, fmt.Errorf("%s", errMsg)
    }
    filesDeleted++
    }
    if input.ProgressHandler != nil {
    input.ProgressHandler(filesProcessed, len(files), filepath.Base(file))
    }
    }
    // Sort all calls by file, then start time
    sort.Slice(allCalls, func(i, j int) bool {
    if allCalls[i].File != allCalls[j].File {
    return allCalls[i].File < allCalls[j].File
    }
    return allCalls[i].StartTime < allCalls[j].StartTime
    })
    output.Calls = allCalls
    output.TotalCalls = len(allCalls)
    output.SpeciesCount = speciesCount
    output.DataFilesWritten = dataFilesWritten
    output.DataFilesSkipped = dataFilesSkipped
    output.FilesProcessed = filesProcessed
    output.FilesDeleted = filesDeleted
    return output, nil
    }
    // sourceJob represents a single file to process (generic over CallSource)
    type sourceJob struct {
    filePath string
    }
    // sourceResult represents the result of processing a single source file
    type sourceResult struct {
    path string
    calls []ClusteredCall
    written bool
    skipped bool
    err error
    }
    func (r sourceResult) filePath() string { return r.path }
    func (r sourceResult) getCalls() []ClusteredCall { return r.calls }
    func (r sourceResult) wasWritten() bool { return r.written }
    func (r sourceResult) wasSkipped() bool { return r.skipped }
    func (r sourceResult) getError() error { return r.err }
    // callsFromSourceParallel processes source files concurrently using a worker pool and DirCache
    func callsFromSourceParallel(src CallSource, input CallsFromSourceInput, files []string) (CallsFromSourceOutput, error) {
    var output CallsFromSourceOutput
    output.Filter = src.Name()
    total := len(files)
    var processed atomic.Int32
    // Build DirCache for the folder
    dirCaches := &sync.Map{}
    if input.Folder != "" {
    cache := NewDirCache(input.Folder)
    dirCaches.Store(input.Folder, cache)
    }
    // Create job and result channels
    jobs := make(chan sourceJob, total)
    results := make(chan parallelResult, total)
    // Start workers
    var wg sync.WaitGroup
    for range DOT_DATA_WORKERS {
    wg.Add(1)
    go sourceWorker(src, dirCaches, jobs, results, &wg)
    }
    // Send jobs
    for _, file := range files {
    jobs <- sourceJob{filePath: file}
    }
    close(jobs)
    // Wait for workers to finish, then close results
    go func() {
    wg.Wait()
    close(results)
    }()
    // Collect results with progress reporting
    stats := aggregateResults(results, total, &processed, input.Delete, input.ProgressHandler)
    if stats.firstErr != nil {
    errMsg := stats.firstErr.Error()
    output.Error = &errMsg
    return output, stats.firstErr
    }
    sortCallsByFileAndTime(stats.calls)
    output.Calls = stats.calls
    output.TotalCalls = len(stats.calls)
    output.SpeciesCount = stats.speciesCount
    output.DataFilesWritten = stats.dataFilesWritten
    output.DataFilesSkipped = stats.dataFilesSkipped
    output.FilesProcessed = stats.filesProcessed
    output.FilesDeleted = stats.filesDeleted
    return output, nil
    }
    // sourceWorker processes source files from the jobs channel
    func sourceWorker(src CallSource, dirCaches *sync.Map, jobs <-chan sourceJob, results chan<- parallelResult, wg *sync.WaitGroup) {
    defer wg.Done()
    for job := range jobs {
    dir := filepath.Dir(job.filePath)
    // Get or create DirCache for this directory
    var cache *DirCache
    if cached, ok := dirCaches.Load(dir); ok {
    cache = cached.(*DirCache)
    } else {
    cache = NewDirCache(dir)
    dirCaches.Store(dir, cache)
    }
    calls, written, skipped, err := src.ProcessFile(job.filePath, cache)
    results <- sourceResult{
    path: job.filePath,
    calls: calls,
    written: written,
    skipped: skipped,
    err: err,
    }
    }
    }
  • file addition: calls_from_birda_raven_test.go (----------)
    [0.67281]
    package calls
    import (
    "os"
    "path/filepath"
    "testing"
    "skraak/utils"
    )
    // ============================================
    // BirdNET Tests
    // ============================================
    func TestCallsFromBirda_NewDataFile(t *testing.T) {
    tmpDir := t.TempDir()
    // Create a minimal WAV file
    wavPath := filepath.Join(tmpDir, "test.WAV")
    createMinimalWAV(t, wavPath, 16000, 60.0)
    // Create BirdNET results file
    birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
    birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Turdus migratorius,American Robin,0.85,/some/path/test.WAV\n"
    if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
    t.Fatal(err)
    }
    input := CallsFromBirdaInput{
    File: birdaPath,
    }
    output, err := CallsFromBirda(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.DataFilesWritten != 1 {
    t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
    }
    if output.Filter != "BirdNET" {
    t.Errorf("expected filter 'BirdNET', got '%s'", output.Filter)
    }
    if output.TotalCalls != 1 {
    t.Errorf("expected 1 call, got %d", output.TotalCalls)
    }
    // Verify .data file was created
    dataPath := wavPath + ".data"
    df, err := utils.ParseDataFile(dataPath)
    if err != nil {
    t.Fatalf("failed to parse .data file: %v", err)
    }
    if len(df.Segments) != 1 {
    t.Errorf("expected 1 segment, got %d", len(df.Segments))
    }
    if df.Segments[0].Labels[0].Filter != "BirdNET" {
    t.Errorf("expected filter 'BirdNET', got '%s'", df.Segments[0].Labels[0].Filter)
    }
    if df.Segments[0].Labels[0].Certainty != 85 {
    t.Errorf("expected certainty 85, got %d", df.Segments[0].Labels[0].Certainty)
    }
    }
    func TestCallsFromBirda_ExistingSameFilter(t *testing.T) {
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.WAV")
    createMinimalWAV(t, wavPath, 16000, 60.0)
    dataPath := wavPath + ".data"
    existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Existing Bird", "certainty": 90, "filter": "BirdNET"}]]]`
    if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
    t.Fatal(err)
    }
    birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
    birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,New Bird,New Bird,0.85,test.WAV\n"
    if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
    t.Fatal(err)
    }
    input := CallsFromBirdaInput{File: birdaPath}
    output, err := CallsFromBirda(input)
    if err == nil {
    t.Error("expected error for same filter, got nil")
    }
    if output.Error == nil {
    t.Error("expected error message in output")
    }
    }
    func TestCallsFromBirda_ExistingDifferentFilter(t *testing.T) {
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.WAV")
    createMinimalWAV(t, wavPath, 16000, 60.0)
    dataPath := wavPath + ".data"
    existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Kiwi", "certainty": 90, "filter": "Manual"}]]]`
    if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
    t.Fatal(err)
    }
    birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
    birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Robin,Robin,0.85,test.WAV\n"
    if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
    t.Fatal(err)
    }
    input := CallsFromBirdaInput{File: birdaPath}
    output, err := CallsFromBirda(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.DataFilesWritten != 1 {
    t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
    }
    df, err := utils.ParseDataFile(dataPath)
    if err != nil {
    t.Fatalf("failed to parse .data file: %v", err)
    }
    if len(df.Segments) != 2 {
    t.Errorf("expected 2 segments after merge, got %d", len(df.Segments))
    }
    }
    func TestCallsFromBirda_DeleteOption(t *testing.T) {
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.WAV")
    createMinimalWAV(t, wavPath, 16000, 60.0)
    birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
    birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Robin,Robin,0.85,test.WAV\n"
    if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
    t.Fatal(err)
    }
    input := CallsFromBirdaInput{File: birdaPath, Delete: true}
    output, err := CallsFromBirda(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.FilesDeleted != 1 {
    t.Errorf("expected 1 file deleted, got %d", output.FilesDeleted)
    }
    if _, err := os.Stat(birdaPath); !os.IsNotExist(err) {
    t.Error("expected BirdNET file to be deleted")
    }
    }
    func TestCallsFromBirda_FolderMode(t *testing.T) {
    tmpDir := t.TempDir()
    for i := range 2 {
    wavPath := filepath.Join(tmpDir, "test"+string(rune('0'+i))+".WAV")
    createMinimalWAV(t, wavPath, 16000, 60.0)
    birdaPath := filepath.Join(tmpDir, "test"+string(rune('0'+i))+".BirdNET.results.csv")
    birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Bird,Bird,0.85,test.WAV\n"
    if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
    t.Fatal(err)
    }
    }
    input := CallsFromBirdaInput{Folder: tmpDir}
    output, err := CallsFromBirda(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.FilesProcessed != 2 {
    t.Errorf("expected 2 files processed, got %d", output.FilesProcessed)
    }
    if output.DataFilesWritten != 2 {
    t.Errorf("expected 2 data files written, got %d", output.DataFilesWritten)
    }
    }
    // ============================================
    // Raven Tests
    // ============================================
    func TestCallsFromRaven_NewDataFile(t *testing.T) {
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.WAV")
    createMinimalWAV(t, wavPath, 16000, 60.0)
    ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
    ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tKiwi\n"
    if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
    t.Fatal(err)
    }
    input := CallsFromRavenInput{File: ravenPath}
    output, err := CallsFromRaven(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.DataFilesWritten != 1 {
    t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
    }
    if output.Filter != "Raven" {
    t.Errorf("expected filter 'Raven', got '%s'", output.Filter)
    }
    dataPath := wavPath + ".data"
    df, err := utils.ParseDataFile(dataPath)
    if err != nil {
    t.Fatalf("failed to parse .data file: %v", err)
    }
    if df.Segments[0].FreqLow != 1000 {
    t.Errorf("expected freq_low 1000, got %f", df.Segments[0].FreqLow)
    }
    if df.Segments[0].FreqHigh != 5000 {
    t.Errorf("expected freq_high 5000, got %f", df.Segments[0].FreqHigh)
    }
    }
    func TestCallsFromRaven_ExistingSameFilter(t *testing.T) {
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.WAV")
    createMinimalWAV(t, wavPath, 16000, 60.0)
    dataPath := wavPath + ".data"
    existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Existing", "certainty": 90, "filter": "Raven"}]]]`
    if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
    t.Fatal(err)
    }
    ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
    ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tNew\n"
    if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
    t.Fatal(err)
    }
    input := CallsFromRavenInput{File: ravenPath}
    output, err := CallsFromRaven(input)
    if err == nil {
    t.Error("expected error for same filter, got nil")
    }
    if output.Error == nil {
    t.Error("expected error message in output")
    }
    }
    func TestCallsFromRaven_ExistingDifferentFilter(t *testing.T) {
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.WAV")
    createMinimalWAV(t, wavPath, 16000, 60.0)
    dataPath := wavPath + ".data"
    existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Kiwi", "certainty": 90, "filter": "BirdNET"}]]]`
    if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
    t.Fatal(err)
    }
    ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
    ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tMorepork\n"
    if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
    t.Fatal(err)
    }
    input := CallsFromRavenInput{File: ravenPath}
    output, err := CallsFromRaven(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.DataFilesWritten != 1 {
    t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
    }
    df, err := utils.ParseDataFile(dataPath)
    if err != nil {
    t.Fatalf("failed to parse .data file: %v", err)
    }
    if len(df.Segments) != 2 {
    t.Errorf("expected 2 segments after merge, got %d", len(df.Segments))
    }
    }
    func TestCallsFromRaven_DeleteOption(t *testing.T) {
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.WAV")
    createMinimalWAV(t, wavPath, 16000, 60.0)
    ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
    ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tKiwi\n"
    if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
    t.Fatal(err)
    }
    input := CallsFromRavenInput{File: ravenPath, Delete: true}
    output, err := CallsFromRaven(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.FilesDeleted != 1 {
    t.Errorf("expected 1 file deleted, got %d", output.FilesDeleted)
    }
    if _, err := os.Stat(ravenPath); !os.IsNotExist(err) {
    t.Error("expected Raven file to be deleted")
    }
    }
    func TestCallsFromRaven_MultipleSelections(t *testing.T) {
    tmpDir := t.TempDir()
    wavPath := filepath.Join(tmpDir, "test.WAV")
    createMinimalWAV(t, wavPath, 16000, 60.0)
    ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
    ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tKiwi\n2\tSpectrogram 1\t1\t10.0\t15.0\t2000\t6000\tMorepork\n3\tSpectrogram 1\t1\t20.0\t25.0\t1500\t4500\tTui\n"
    if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
    t.Fatal(err)
    }
    input := CallsFromRavenInput{File: ravenPath}
    output, err := CallsFromRaven(input)
    if err != nil {
    t.Fatalf("unexpected error: %v", err)
    }
    if output.TotalCalls != 3 {
    t.Errorf("expected 3 calls, got %d", output.TotalCalls)
    }
    if output.SpeciesCount["Kiwi"] != 1 || output.SpeciesCount["Morepork"] != 1 || output.SpeciesCount["Tui"] != 1 {
    t.Errorf("unexpected species count: %v", output.SpeciesCount)
    }
    }
  • file addition: calls_from_birda.go (----------)
    [0.67281]
    package calls
    import (
    "encoding/csv"
    "fmt"
    "io"
    "os"
    "path/filepath"
    "strconv"
    "strings"
    "skraak/utils"
    )
    // CallsFromBirdaInput defines the input for the calls-from-birda tool
    type CallsFromBirdaInput struct {
    Folder string `json:"folder"`
    File string `json:"file"`
    Delete bool `json:"delete"`
    ProgressHandler ProgressHandler `json:"-"` // Optional progress callback
    }
    // CallsFromBirdaOutput defines the output for the calls-from-birda tool
    type CallsFromBirdaOutput struct {
    Calls []ClusteredCall `json:"calls"`
    TotalCalls int `json:"total_calls"`
    SpeciesCount map[string]int `json:"species_count"`
    DataFilesWritten int `json:"data_files_written"`
    DataFilesSkipped int `json:"data_files_skipped"`
    FilesProcessed int `json:"files_processed"`
    FilesDeleted int `json:"files_deleted"`
    Filter string `json:"filter"`
    Error *string `json:"error,omitempty"`
    }
    // birdaSource implements CallSource for BirdNET results files
    type birdaSource struct{}
    func (birdaSource) Name() string { return "BirdNET" }
    func (birdaSource) FindFiles(folder string) ([]string, error) {
    var files []string
    entries, err := os.ReadDir(folder)
    if err != nil {
    return nil, err
    }
    for _, entry := range entries {
    name := entry.Name()
    if strings.HasSuffix(name, ".BirdNET.results.csv") {
    files = append(files, filepath.Join(folder, name))
    }
    }
    return files, nil
    }
    func (birdaSource) ProcessFile(birdaFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
    return processBirdaFileCached(birdaFile, cache)
    }
    // CallsFromBirda processes BirdNET results files and writes .data files
    func CallsFromBirda(input CallsFromBirdaInput) (CallsFromBirdaOutput, error) {
    src := birdaSource{}
    commonInput := CallsFromSourceInput(input)
    commonOutput, err := callsFromSource(src, commonInput)
    // Convert to Birda-specific output type
    var output CallsFromBirdaOutput
    output.Calls = commonOutput.Calls
    output.TotalCalls = commonOutput.TotalCalls
    output.SpeciesCount = commonOutput.SpeciesCount
    output.DataFilesWritten = commonOutput.DataFilesWritten
    output.DataFilesSkipped = commonOutput.DataFilesSkipped
    output.FilesProcessed = commonOutput.FilesProcessed
    output.FilesDeleted = commonOutput.FilesDeleted
    output.Filter = commonOutput.Filter
    output.Error = commonOutput.Error
    return output, err
    }
    // BirdNETDetection represents a single BirdNET detection
    type BirdNETDetection struct {
    StartTime float64
    EndTime float64
    ScientificName string
    CommonName string
    Confidence float64
    WAVPath string
    }
    // birdaColumnIndices holds the parsed column positions from a BirdNET CSV header.
    type birdaColumnIndices struct {
    startIdx int
    endIdx int
    commonNameIdx int
    confidenceIdx int
    fileIdx int
    }
    // parseBirdaCSVHeader reads the CSV header row and returns column indices.
    func parseBirdaCSVHeader(reader *csv.Reader) (birdaColumnIndices, error) {
    header, err := reader.Read()
    if err != nil {
    return birdaColumnIndices{}, fmt.Errorf("failed to read header: %w", err)
    }
    idx := birdaColumnIndices{startIdx: -1, endIdx: -1, commonNameIdx: -1, confidenceIdx: -1, fileIdx: -1}
    for i, col := range header {
    col = strings.TrimPrefix(col, "\ufeff")
    switch col {
    case "Start (s)":
    idx.startIdx = i
    case "End (s)":
    idx.endIdx = i
    case "Common name":
    idx.commonNameIdx = i
    case "Confidence":
    idx.confidenceIdx = i
    case "File":
    idx.fileIdx = i
    }
    }
    if idx.startIdx == -1 || idx.endIdx == -1 || idx.commonNameIdx == -1 || idx.confidenceIdx == -1 {
    return birdaColumnIndices{}, fmt.Errorf("missing required columns in BirdNET file")
    }
    return idx, nil
    }
    // readBirdaDetections reads all detection records from a BirdNET CSV.
    func readBirdaDetections(reader *csv.Reader, idx birdaColumnIndices) ([]BirdNETDetection, error) {
    var detections []BirdNETDetection
    for {
    record, err := reader.Read()
    if err == io.EOF {
    break
    }
    if err != nil {
    return nil, fmt.Errorf("failed to read record: %w", err)
    }
    var det BirdNETDetection
    startTime, perr := strconv.ParseFloat(record[idx.startIdx], 64)
    if perr != nil {
    return nil, fmt.Errorf("failed to parse start time %q: %w", record[idx.startIdx], perr)
    }
    det.StartTime = startTime
    endTime, perr := strconv.ParseFloat(record[idx.endIdx], 64)
    if perr != nil {
    return nil, fmt.Errorf("failed to parse end time %q: %w", record[idx.endIdx], perr)
    }
    det.EndTime = endTime
    det.CommonName = record[idx.commonNameIdx]
    confidence, perr := strconv.ParseFloat(record[idx.confidenceIdx], 64)
    if perr != nil {
    return nil, fmt.Errorf("failed to parse confidence %q: %w", record[idx.confidenceIdx], perr)
    }
    det.Confidence = confidence
    if idx.fileIdx >= 0 && idx.fileIdx < len(record) {
    det.WAVPath = record[idx.fileIdx]
    }
    detections = append(detections, det)
    }
    return detections, nil
    }
    // resolveBirdaWAVPath finds the WAV file associated with a BirdNET results file.
    func resolveBirdaWAVPath(birdaFile string, firstWAVPath string, cache *DirCache) string {
    if firstWAVPath != "" {
    if _, err := os.Stat(firstWAVPath); err == nil {
    return firstWAVPath
    }
    }
    dir := filepath.Dir(birdaFile)
    base := filepath.Base(birdaFile)
    baseName := strings.TrimSuffix(base, ".BirdNET.results.csv")
    if cache != nil {
    return cache.FindWAV(baseName)
    }
    return findWAVFile(dir, baseName)
    }
    // processBirdaFileCached processes a single BirdNET results file using a DirCache for WAV lookup
    func processBirdaFileCached(birdaFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
    file, err := os.Open(birdaFile)
    if err != nil {
    return nil, false, false, fmt.Errorf("failed to open file: %w", err)
    }
    defer func() { _ = file.Close() }()
    reader := csv.NewReader(file)
    idx, err := parseBirdaCSVHeader(reader)
    if err != nil {
    return nil, false, false, err
    }
    detections, err := readBirdaDetections(reader, idx)
    if err != nil {
    return nil, false, false, err
    }
    if len(detections) == 0 {
    return nil, false, true, nil
    }
    wavPath := resolveBirdaWAVPath(birdaFile, detections[0].WAVPath, cache)
    if wavPath == "" {
    return nil, false, true, nil
    }
    sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
    if err != nil {
    return nil, false, true, nil
    }
    dataPath := wavPath + ".data"
    segments := buildBirdNETSegments(detections, sampleRate)
    meta := AviaNZMeta{Operator: "BirdNET", Duration: duration}
    reviewer := "None"
    meta.Reviewer = &reviewer
    if err := writeDotDataFileSafe(dataPath, segments, "BirdNET", meta); err != nil {
    return nil, false, false, err
    }
    var calls []ClusteredCall
    for _, det := range detections {
    calls = append(calls, ClusteredCall{
    File: wavPath,
    StartTime: det.StartTime,
    EndTime: det.EndTime,
    EbirdCode: det.CommonName,
    Segments: 1,
    })
    }
    return calls, true, false, nil
    }
    // buildBirdNETSegments converts BirdNET detections to AviaNZ segments
    func buildBirdNETSegments(detections []BirdNETDetection, sampleRate int) []AviaNZSegment {
    var segments []AviaNZSegment
    for _, det := range detections {
    // Convert confidence (0.0-1.0) to certainty (0-100)
    certainty := min(max(int(det.Confidence*100), 0), 100)
    labels := []AviaNZLabel{
    {
    Species: det.CommonName,
    Certainty: certainty,
    Filter: "BirdNET",
    },
    }
    segment := AviaNZSegment{
    det.StartTime,
    det.EndTime,
    0, // freq_low
    sampleRate, // freq_high (full band)
    labels,
    }
    segments = append(segments, segment)
    }
    return segments
    }
  • file addition: calls_detect_anomalies_test.go (----------)
    [0.67281]
    package calls
    import (
    "os"
    "path/filepath"
    "testing"
    )
    func TestDetectAnomalies_LabelMismatch(t *testing.T) {
    dir := t.TempDir()
    // Same time range, different calltypes across two models
    data := `[{"Operator":"test"},` +
    `[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-a"},` +
    `{"species":"Kiwi","calltype":"Male","certainty":100,"filter":"model-b"}]]]`
    if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
    t.Fatal(err)
    }
    out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
    if err != nil {
    t.Fatal(err)
    }
    if out.LabelMismatches != 1 {
    t.Errorf("expected 1 label mismatch, got %d", out.LabelMismatches)
    }
    if out.CertaintyMismatches != 0 {
    t.Errorf("expected 0 certainty mismatches, got %d", out.CertaintyMismatches)
    }
    if out.Anomalies[0].Type != "label_mismatch" {
    t.Errorf("expected label_mismatch, got %s", out.Anomalies[0].Type)
    }
    }
    func TestDetectAnomalies_CertaintyMismatch(t *testing.T) {
    dir := t.TempDir()
    // Same time range, same labels, different certainty
    data := `[{"Operator":"test"},` +
    `[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":90,"filter":"model-a"},` +
    `{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-b"}]]]`
    if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
    t.Fatal(err)
    }
    out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
    if err != nil {
    t.Fatal(err)
    }
    if out.CertaintyMismatches != 1 {
    t.Errorf("expected 1 certainty mismatch, got %d", out.CertaintyMismatches)
    }
    if out.LabelMismatches != 0 {
    t.Errorf("expected 0 label mismatches, got %d", out.LabelMismatches)
    }
    }
    func TestDetectAnomalies_NoAnomalyWhenAgreement(t *testing.T) {
    dir := t.TempDir()
    data := `[{"Operator":"test"},` +
    `[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-a"},` +
    `{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-b"}]]]`
    if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
    t.Fatal(err)
    }
    out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
    if err != nil {
    t.Fatal(err)
    }
    if out.AnomaliesTotal != 0 {
    t.Errorf("expected 0 anomalies, got %d", out.AnomaliesTotal)
    }
    }
    func TestDetectAnomalies_LonelySegmentSkipped(t *testing.T) {
    dir := t.TempDir()
    // model-a has a segment, model-b has no segment in this file
    data := `[{"Operator":"test"},` +
    `[0,10,100,1000,[{"species":"Kiwi","certainty":100,"filter":"model-a"}]]]`
    if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
    t.Fatal(err)
    }
    out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
    if err != nil {
    t.Fatal(err)
    }
    if out.AnomaliesTotal != 0 {
    t.Errorf("lonely segment should be skipped, got %d anomalies", out.AnomaliesTotal)
    }
    if out.FilesWithAllModels != 0 {
    t.Errorf("file missing a model should not count as FilesWithAllModels")
    }
    }
    func TestDetectAnomalies_FailsWithOneModel(t *testing.T) {
    dir := t.TempDir()
    _, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a"}})
    if err == nil {
    t.Error("expected error with only 1 model")
    }
    }
  • file addition: calls_detect_anomalies.go (----------)
    [0.67281]
    package calls
    import (
    "fmt"
    "os"
    "path/filepath"
    "skraak/utils"
    )
    type DetectAnomaliesInput struct {
    Folder string
    Models []string // at least 2 filter names
    Species []string // optional scope; empty = all species
    }
    type DetectAnomaliesOutput struct {
    Folder string `json:"folder"`
    Models []string `json:"models"`
    FilesExamined int `json:"files_examined"`
    FilesWithAllModels int `json:"files_with_all_models"`
    AnomaliesTotal int `json:"anomalies_total"`
    LabelMismatches int `json:"label_mismatches"`
    CertaintyMismatches int `json:"certainty_mismatches"`
    Anomalies []Anomaly `json:"anomalies,omitempty"`
    Error string `json:"error,omitempty"`
    }
    type Anomaly struct {
    File string `json:"file"`
    Type string `json:"type"` // "label_mismatch" | "certainty_mismatch"
    Segments []AnomalySegment `json:"segments"`
    }
    type AnomalySegment struct {
    Model string `json:"model"`
    Start float64 `json:"start"`
    End float64 `json:"end"`
    Species string `json:"species"`
    CallType string `json:"calltype,omitempty"`
    Certainty int `json:"certainty"`
    }
    // DetectAnomalies compares corresponding segments across multiple ML model filters
    // within each .data file. Segments are matched by time overlap (same logic as propagate).
    // Lonely segments (no overlap in one or more models) are silently skipped.
    // Anomalies are flagged when overlapping segments disagree on species+calltype,
    // or when labels match but certainty values differ.
    // validateAnomalyInput validates the input parameters for DetectAnomalies.
    func validateAnomalyInput(input DetectAnomaliesInput) error {
    if len(input.Models) < 2 {
    return fmt.Errorf("at least 2 --model values required")
    }
    for i, a := range input.Models {
    for j, b := range input.Models {
    if i != j && a == b {
    return fmt.Errorf("duplicate --model values are not allowed")
    }
    }
    }
    info, err := os.Stat(input.Folder)
    if err != nil {
    return fmt.Errorf("folder not found: %s", input.Folder)
    }
    if !info.IsDir() {
    return fmt.Errorf("not a directory: %s", input.Folder)
    }
    return nil
    }
    func DetectAnomalies(input DetectAnomaliesInput) (DetectAnomaliesOutput, error) {
    folder := filepath.Clean(input.Folder)
    output := DetectAnomaliesOutput{
    Folder: folder,
    Models: input.Models,
    }
    if err := validateAnomalyInput(input); err != nil {
    output.Error = err.Error()
    return output, err
    }
    files, err := utils.FindDataFiles(folder)
    if err != nil {
    output.Error = fmt.Sprintf("list .data files: %v", err)
    return output, fmt.Errorf("%s", output.Error)
    }
    scopeSet := make(map[string]bool, len(input.Species))
    for _, s := range input.Species {
    scopeSet[s] = true
    }
    for _, path := range files {
    df, err := utils.ParseDataFile(path)
    if err != nil {
    continue
    }
    output.FilesExamined++
    anomalies := detectAnomaliesInFile(df, path, input.Models, scopeSet)
    if anomalies == nil {
    // file didn't have all models present
    continue
    }
    output.FilesWithAllModels++
    for _, a := range anomalies {
    if a.Type == "label_mismatch" {
    output.LabelMismatches++
    } else {
    output.CertaintyMismatches++
    }
    }
    output.Anomalies = append(output.Anomalies, anomalies...)
    }
    output.AnomaliesTotal = len(output.Anomalies)
    return output, nil
    }
    // labeledSeg pairs a segment with the specific label matching the model filter.
    type labeledSeg struct {
    seg *utils.Segment
    label *utils.Label
    }
    // detectAnomaliesInFile returns nil if the file doesn't contain all required models.
    func detectAnomaliesInFile(df *utils.DataFile, path string, models []string, scope map[string]bool) []Anomaly {
    modelSegs := collectModelSegments(df, models)
    // Skip file if any model is entirely absent.
    for _, model := range models {
    if len(modelSegs[model]) == 0 {
    return nil
    }
    }
    var anomalies []Anomaly
    for _, anchor := range modelSegs[models[0]] {
    if !inScope(anchor, scope) {
    continue
    }
    if matches := findOverlappingMatches(anchor, models, modelSegs); matches == nil {
    continue
    } else {
    group := buildComparisonGroup(anchor, models, matches)
    if a := checkGroupAnomaly(group, path, models); a != nil {
    anomalies = append(anomalies, *a)
    }
    }
    }
    return anomalies
    }
    // collectModelSegments groups labeled segments by model filter name.
    func collectModelSegments(df *utils.DataFile, models []string) map[string][]labeledSeg {
    modelSegs := make(map[string][]labeledSeg, len(models))
    for _, seg := range df.Segments {
    for _, lbl := range seg.Labels {
    for _, model := range models {
    if lbl.Filter == model {
    modelSegs[model] = append(modelSegs[model], labeledSeg{seg: seg, label: lbl})
    break
    }
    }
    }
    }
    return modelSegs
    }
    // inScope returns true if the anchor's label is within the species scope filter.
    func inScope(anchor labeledSeg, scope map[string]bool) bool {
    if len(scope) == 0 {
    return true
    }
    key := anchor.label.Species
    if anchor.label.CallType != "" {
    key += "+" + anchor.label.CallType
    }
    return scope[key] || scope[anchor.label.Species]
    }
    // findOverlappingMatches returns matches[model] = overlapping segments from that model,
    // or nil if any model has no overlap (lonely anchor).
    func findOverlappingMatches(anchor labeledSeg, models []string, modelSegs map[string][]labeledSeg) map[string][]labeledSeg {
    matches := make(map[string][]labeledSeg, len(models)-1)
    for _, model := range models[1:] {
    for _, candidate := range modelSegs[model] {
    if overlaps(anchor.seg, candidate.seg) {
    matches[model] = append(matches[model], candidate)
    }
    }
    if len(matches[model]) == 0 {
    return nil
    }
    }
    return matches
    }
    // buildComparisonGroup assembles anchor + first match per other model.
    func buildComparisonGroup(anchor labeledSeg, models []string, matches map[string][]labeledSeg) []labeledSeg {
    group := []labeledSeg{anchor}
    for _, model := range models[1:] {
    group = append(group, matches[model][0])
    }
    return group
    }
    // checkGroupAnomaly checks a comparison group for label or certainty mismatches.
    func checkGroupAnomaly(group []labeledSeg, path string, models []string) *Anomaly {
    refSpecies := group[0].label.Species
    refCallType := group[0].label.CallType
    for _, ls := range group[1:] {
    if ls.label.Species != refSpecies || ls.label.CallType != refCallType {
    a := Anomaly{File: path, Type: "label_mismatch", Segments: buildAnomalySegs(group, models)}
    return &a
    }
    }
    refCertainty := group[0].label.Certainty
    for _, ls := range group[1:] {
    if ls.label.Certainty != refCertainty {
    a := Anomaly{File: path, Type: "certainty_mismatch", Segments: buildAnomalySegs(group, models)}
    return &a
    }
    }
    return nil
    }
    func buildAnomalySegs(group []labeledSeg, models []string) []AnomalySegment {
    segs := make([]AnomalySegment, len(group))
    for i, ls := range group {
    segs[i] = AnomalySegment{
    Model: models[i],
    Start: ls.seg.StartTime,
    End: ls.seg.EndTime,
    Species: ls.label.Species,
    CallType: ls.label.CallType,
    Certainty: ls.label.Certainty,
    }
    }
    return segs
    }
    // overlaps returns true if two segments share any time overlap.
    func overlaps(a, b *utils.Segment) bool {
    return a.StartTime < b.EndTime && b.StartTime < a.EndTime
    }
  • file addition: calls_clip_labels_test.go (----------)
    [0.67281]
    package calls
    import (
    "encoding/csv"
    "os"
    "path/filepath"
    "strings"
    "testing"
    "skraak/utils"
    )
    // --- test helpers (test file only) ---
    func writeDataFile(t *testing.T, dir, name string, df *utils.DataFile) {
    t.Helper()
    if err := df.Write(filepath.Join(dir, name)); err != nil {
    t.Fatalf("write .data file %s: %v", name, err)
    }
    }
    func writeMapping(t *testing.T, dir, json string) {
    t.Helper()
    if err := os.WriteFile(filepath.Join(dir, "mapping.json"), []byte(json), 0644); err != nil {
    t.Fatalf("write mapping.json: %v", err)
    }
    }
    // parseCSV reads the output CSV, returning header and rows.
    func parseCSV(t *testing.T, path string) ([]string, [][]string) {
    t.Helper()
    f, err := os.Open(path)
    if err != nil {
    t.Fatalf("open CSV %s: %v", path, err)
    }
    defer f.Close()
    r := csv.NewReader(f)
    header, err := r.Read()
    if err != nil {
    t.Fatalf("read header: %v", err)
    }
    rows, err := r.ReadAll()
    if err != nil {
    t.Fatalf("read rows: %v", err)
    }
    return header, rows
    }
    // clipLabels calls CallsClipLabels with standard test parameters.
    func clipLabels(t *testing.T, dir string, extra ...func(*CallsClipLabelsInput)) CallsClipLabelsOutput {
    t.Helper()
    input := CallsClipLabelsInput{
    Folder: dir,
    MappingPath: filepath.Join(dir, "mapping.json"),
    OutputPath: filepath.Join(dir, "clip_labels.csv"),
    ClipDuration: 5,
    ClipOverlap: 0,
    MinLabelOverlap: 0.25,
    FinalClip: "full",
    }
    for _, fn := range extra {
    fn(&input)
    }
    out, err := CallsClipLabels(input)
    if err != nil {
    t.Fatalf("CallsClipLabels: %v", err)
    }
    return out
    }
    // --- tests ---
    func TestClipLabels_RealClassTrue(t *testing.T) {
    dir := t.TempDir()
    writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
    Meta: &utils.DataMeta{Duration: 20},
    Segments: []*utils.Segment{
    {
    StartTime: 3, EndTime: 8, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
    },
    },
    })
    writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
    out := clipLabels(t, dir)
    header, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
    // Header: file, start_time, end_time, Kiwi
    if len(header) != 4 || header[3] != "Kiwi" {
    t.Fatalf("header = %v, want [..., Kiwi]", header)
    }
    // Clip 0-5 overlaps segment 3-8 by 2s ≥ 0.25 → Kiwi=True
    // Clip 5-10 overlaps segment 3-8 by 3s ≥ 0.25 → Kiwi=True
    // Clip 10-15, 15-20 → Kiwi=False
    kiwiCol := 3
    for i, row := range rows {
    switch row[1] {
    case "0.0", "5.0":
    if row[kiwiCol] != "True" {
    t.Errorf("row %d (start=%s): Kiwi=%s, want True", i, row[1], row[kiwiCol])
    }
    case "10.0", "15.0":
    if row[kiwiCol] != "False" {
    t.Errorf("row %d (start=%s): Kiwi=%s, want False", i, row[1], row[kiwiCol])
    }
    }
    }
    if out.PerClassTrueCount["Kiwi"] != 2 {
    t.Errorf("PerClassTrueCount[Kiwi] = %d, want 2", out.PerClassTrueCount["Kiwi"])
    }
    }
    func TestClipLabels_GapClipsAllFalse(t *testing.T) {
    dir := t.TempDir()
    // 15s file, Kiwi segment 0-5 only → clips 5-10 and 10-15 are gaps
    writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
    Meta: &utils.DataMeta{Duration: 15},
    Segments: []*utils.Segment{
    {
    StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
    },
    },
    })
    writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
    out := clipLabels(t, dir)
    if out.ClipsAllFalseGap != 2 {
    t.Errorf("ClipsAllFalseGap = %d, want 2", out.ClipsAllFalseGap)
    }
    if out.PerClassTrueCount["Kiwi"] != 1 {
    t.Errorf("PerClassTrueCount[Kiwi] = %d, want 1", out.PerClassTrueCount["Kiwi"])
    }
    if out.RowsWritten != 3 {
    t.Errorf("RowsWritten = %d, want 3", out.RowsWritten)
    }
    }
    func TestClipLabels_NegativeOverridesPositive(t *testing.T) {
    dir := t.TempDir()
    // Kiwi segment 0-8, Not segment 0-4 → clip 0-5 overlaps both → __NEGATIVE__ wins
    // Clip 5-10 overlaps only Kiwi (3s) → True
    writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
    Meta: &utils.DataMeta{Duration: 10},
    Segments: []*utils.Segment{
    {
    StartTime: 0, EndTime: 8, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
    },
    {
    StartTime: 0, EndTime: 4, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{{Species: "Not", Certainty: 100, Filter: "f1"}},
    },
    },
    })
    writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Not":{"species":"__NEGATIVE__"}}`)
    out := clipLabels(t, dir)
    if out.ClipsNegative != 1 {
    t.Errorf("ClipsNegative = %d, want 1", out.ClipsNegative)
    }
    _, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
    // Clip 0-5: negative hit → all-False (Not overlaps 0-4 by 4s)
    if rows[0][3] != "False" {
    t.Errorf("clip 0-5 Kiwi = %s, want False (overridden by __NEGATIVE__)", rows[0][3])
    }
    // Clip 5-10: only Kiwi overlaps (3s) → True
    if rows[1][3] != "True" {
    t.Errorf("clip 5-10 Kiwi = %s, want True", rows[1][3])
    }
    }
    func TestClipLabels_IgnoreExcludesClip(t *testing.T) {
    dir := t.TempDir()
    // Don't Know segment 0-5, Kiwi segment 6-10
    // Clip 0-5 overlaps __IGNORE__ → excluded
    // Clip 5-10 overlaps Kiwi → emitted with True
    writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
    Meta: &utils.DataMeta{Duration: 15},
    Segments: []*utils.Segment{
    {
    StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{{Species: "Don't Know", Certainty: 0, Filter: "f1"}},
    },
    {
    StartTime: 6, EndTime: 10, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
    },
    },
    })
    writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Don't Know":{"species":"__IGNORE__"}}`)
    out := clipLabels(t, dir)
    if out.ClipsIgnored != 1 {
    t.Errorf("ClipsIgnored = %d, want 1", out.ClipsIgnored)
    }
    if out.SegmentsIgnored != 1 {
    t.Errorf("SegmentsIgnored = %d, want 1", out.SegmentsIgnored)
    }
    // Only 2 rows: clip 5-10 (Kiwi=True) and clip 10-15 (gap)
    if out.RowsWritten != 2 {
    t.Errorf("RowsWritten = %d, want 2", out.RowsWritten)
    }
    }
    func TestClipLabels_FilterRestrictsLabels(t *testing.T) {
    dir := t.TempDir()
    // Same time range, two filters. Only "wanted" should contribute.
    writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
    Meta: &utils.DataMeta{Duration: 10},
    Segments: []*utils.Segment{
    {
    StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{
    {Species: "Kiwi", Certainty: 100, Filter: "wanted"},
    {Species: "Not", Certainty: 100, Filter: "unwanted"},
    },
    },
    },
    })
    writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Not":{"species":"__NEGATIVE__"}}`)
    out := clipLabels(t, dir, func(in *CallsClipLabelsInput) { in.Filter = "wanted" })
    // Only Kiwi from "wanted" filter → clip 0-5 should be Kiwi=True
    // Not from "unwanted" filter should be ignored → no __NEGATIVE__ override
    if out.ClipsNegative != 0 {
    t.Errorf("ClipsNegative = %d, want 0 (Not filter excluded)", out.ClipsNegative)
    }
    if out.PerClassTrueCount["Kiwi"] != 1 {
    t.Errorf("PerClassTrueCount[Kiwi] = %d, want 1", out.PerClassTrueCount["Kiwi"])
    }
    }
    func TestClipLabels_MappingCoverageError(t *testing.T) {
    dir := t.TempDir()
    writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
    Meta: &utils.DataMeta{Duration: 10},
    Segments: []*utils.Segment{
    {
    StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{{Species: "Mystery", Certainty: 100, Filter: "f1"}},
    },
    },
    })
    writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
    input := CallsClipLabelsInput{
    Folder: dir,
    MappingPath: filepath.Join(dir, "mapping.json"),
    OutputPath: filepath.Join(dir, "clip_labels.csv"),
    ClipDuration: 5,
    ClipOverlap: 0,
    MinLabelOverlap: 0.25,
    FinalClip: "full",
    }
    _, err := CallsClipLabels(input)
    if err == nil {
    t.Fatal("expected error for missing species in mapping")
    }
    if !strings.Contains(err.Error(), "Mystery") {
    t.Errorf("error should mention missing species, got: %v", err)
    }
    }
    func TestClipLabels_AppendMode(t *testing.T) {
    dir := t.TempDir()
    writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
    // First file
    writeDataFile(t, dir, "a.wav.data", &utils.DataFile{
    Meta: &utils.DataMeta{Duration: 5},
    Segments: []*utils.Segment{
    {
    StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
    },
    },
    })
    out1 := clipLabels(t, dir)
    if out1.RowsWritten != 1 {
    t.Fatalf("first run: RowsWritten = %d, want 1", out1.RowsWritten)
    }
    // Second run on same output file but with a different input folder
    // Simulate append by running again — should fail on duplicate
    _, err := CallsClipLabels(CallsClipLabelsInput{
    Folder: dir,
    MappingPath: filepath.Join(dir, "mapping.json"),
    OutputPath: filepath.Join(dir, "clip_labels.csv"),
    ClipDuration: 5,
    ClipOverlap: 0,
    MinLabelOverlap: 0.25,
    FinalClip: "full",
    })
    if err == nil {
    t.Fatal("expected duplicate error on second run with same folder")
    }
    if !strings.Contains(err.Error(), "duplicate") {
    t.Errorf("error should mention duplicate, got: %v", err)
    }
    }
    func TestClipLabels_MultipleFiles(t *testing.T) {
    dir := t.TempDir()
    writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
    writeDataFile(t, dir, "a.wav.data", &utils.DataFile{
    Meta: &utils.DataMeta{Duration: 10},
    Segments: []*utils.Segment{
    {
    StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
    },
    },
    })
    writeDataFile(t, dir, "b.wav.data", &utils.DataFile{
    Meta: &utils.DataMeta{Duration: 5},
    Segments: []*utils.Segment{
    {
    StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
    Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
    },
    },
    })
    out := clipLabels(t, dir)
    if out.DataFilesParsed != 2 {
    t.Errorf("DataFilesParsed = %d, want 2", out.DataFilesParsed)
    }
    // a: 2 clips (0-5, 5-10), b: 1 clip (0-5) = 3 total
    if out.RowsWritten != 3 {
    t.Errorf("RowsWritten = %d, want 3", out.RowsWritten)
    }
    _, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
    files := map[string]int{}
    for _, r := range rows {
    files[r[0]]++
    }
    if len(files) != 2 {
    t.Errorf("expected 2 distinct files in CSV, got %d", len(files))
    }
    }
  • file addition: calls_clip_labels.go (----------)
    [0.67281]
    package calls
    import (
    "encoding/csv"
    "fmt"
    "io"
    "os"
    "path/filepath"
    "slices"
    "sort"
    "strconv"
    "strings"
    "skraak/utils"
    )
    // CallsClipLabelsInput configures the clip-labels exporter.
    type CallsClipLabelsInput struct {
    Folder string `json:"folder"`
    MappingPath string `json:"mapping"`
    Filter string `json:"filter,omitempty"`
    OutputPath string `json:"output"`
    ClipDuration float64 `json:"clip_duration"`
    ClipOverlap float64 `json:"clip_overlap"`
    MinLabelOverlap float64 `json:"min_label_overlap"`
    FinalClip string `json:"final_clip"`
    }
    // CallsClipLabelsOutput summarises a run.
    type CallsClipLabelsOutput struct {
    Folder string `json:"folder"`
    OutputPath string `json:"output"`
    Filter string `json:"filter,omitempty"`
    Classes []string `json:"classes"`
    DataFilesParsed int `json:"data_files_parsed"`
    ClipsNegative int `json:"clips_negative"` // emitted, all-False because of __NEGATIVE__
    ClipsIgnored int `json:"clips_ignored"` // excluded from output because of __IGNORE__ overlap
    SegmentsIgnored int `json:"segments_ignored"` // segments whose species maps to __IGNORE__
    ClipsAllFalseGap int `json:"clips_all_false_gap"` // emitted, all-False because no overlap
    PerClassTrueCount map[string]int `json:"per_class_true_count"`
    AppendedToFile bool `json:"appended_to_file"`
    ExistingRowsFound int `json:"existing_rows_found"`
    RowsWritten int `json:"rows_written"`
    }
    // resolvedSeg is a segment that has been classified by the mapping and is
    // ready for overlap-checking against clip windows.
    type resolvedSeg struct {
    start, end float64
    kind utils.MappingKind
    classIdx int // valid only when kind == utils.MappingReal
    }
    // clipDisposition describes the outcome for a single clip window.
    type clipDisposition int
    const (
    dispoLabelled clipDisposition = iota // at least one class column is True
    dispoNegative // __NEGATIVE__ hit, all class columns False
    dispoGap // no segment overlaps, all class columns False
    dispoIgnored // __IGNORE__ hit, clip excluded from output
    )
    // clipLabelsRow is one row of the output CSV.
    type clipLabelsRow struct {
    file string
    start float64
    end float64
    flags []bool
    }
    // rowKey is used for duplicate detection.
    type rowKey struct {
    file string
    start string
    end string
    }
    // CallsClipLabels reads .data files from a single folder and writes a CSV in
    // OpenSoundScape's clip_labels format: one row per clip per file, with one
    // True/False column per class in the mapping.
    //
    // Mirrors BoxedAnnotations.clip_labels(): every clip window is emitted; a
    // column is True when any annotation of that class overlaps the window by
    // ≥ min_label_overlap seconds. Sentinel mappings (__NEGATIVE__, __IGNORE__)
    // get no column and contribute no labels.
    // parsedClipFile holds a parsed .data file for clip-labels processing.
    type parsedClipFile struct {
    path string
    df *utils.DataFile
    }
    // validateClipLabelsInput validates the input parameters and returns the parsed finalClipMode.
    func validateClipLabelsInput(input CallsClipLabelsInput) (utils.FinalClipMode, error) {
    finalClipMode, err := utils.ParseFinalClipMode(input.FinalClip)
    if err != nil {
    return 0, err
    }
    if input.ClipDuration <= 0 {
    return 0, fmt.Errorf("--clip-duration must be > 0, got %v", input.ClipDuration)
    }
    if input.ClipOverlap < 0 || input.ClipOverlap >= input.ClipDuration {
    return 0, fmt.Errorf("--clip-overlap must be in [0, clip-duration), got %v", input.ClipOverlap)
    }
    if input.MinLabelOverlap <= 0 {
    return 0, fmt.Errorf("--min-label-overlap must be > 0, got %v", input.MinLabelOverlap)
    }
    return finalClipMode, nil
    }
    // parseClipLabelsDataFiles finds and parses .data files, collecting species seen.
    func parseClipLabelsDataFiles(folder, filter string, mapping utils.MappingFile) ([]parsedClipFile, error) {
    dataPaths, err := utils.FindDataFiles(folder)
    if err != nil {
    return nil, fmt.Errorf("scan folder %s: %w", folder, err)
    }
    if len(dataPaths) == 0 {
    return nil, fmt.Errorf("no .data files found in %s", folder)
    }
    speciesSeen := map[string]bool{}
    parsed := make([]parsedClipFile, 0, len(dataPaths))
    for _, p := range dataPaths {
    df, err := utils.ParseDataFile(p)
    if err != nil {
    return nil, fmt.Errorf("parse %s: %w", p, err)
    }
    if df.Meta == nil || df.Meta.Duration <= 0 {
    return nil, fmt.Errorf("missing or non-positive Duration in %s (cannot generate clips)", p)
    }
    for _, seg := range df.Segments {
    for _, lbl := range seg.Labels {
    if filter != "" && lbl.Filter != filter {
    continue
    }
    speciesSeen[lbl.Species] = true
    }
    }
    parsed = append(parsed, parsedClipFile{path: p, df: df})
    }
    if missing := mapping.ValidateCoversSpecies(speciesSeen); len(missing) > 0 {
    return nil, fmt.Errorf("mapping.json is missing entries for species: %s\n(run /data-mapping to regenerate)", strings.Join(missing, ", "))
    }
    return parsed, nil
    }
    // dedupClipLabelsRows checks for duplicate rows within new rows and against existing CSV rows.
    func dedupClipLabelsRows(rows []clipLabelsRow, existing map[rowKey]bool) error {
    dedup := make(map[rowKey]bool, len(existing)+len(rows))
    for k := range existing {
    dedup[k] = true
    }
    for _, r := range rows {
    k := rowKey{file: r.file, start: formatTime(r.start), end: formatTime(r.end)}
    if dedup[k] {
    return fmt.Errorf("duplicate clip detected: file=%s start=%s end=%s", k.file, k.start, k.end)
    }
    dedup[k] = true
    }
    return nil
    }
    func CallsClipLabels(input CallsClipLabelsInput) (CallsClipLabelsOutput, error) {
    out := CallsClipLabelsOutput{
    Folder: input.Folder,
    OutputPath: input.OutputPath,
    PerClassTrueCount: map[string]int{},
    }
    finalClipMode, err := validateClipLabelsInput(input)
    if err != nil {
    return out, err
    }
    mapping, err := utils.LoadMappingFile(input.MappingPath)
    if err != nil {
    return out, fmt.Errorf("load mapping %s: %w", input.MappingPath, err)
    }
    classes := mapping.Classes()
    if len(classes) == 0 {
    return out, fmt.Errorf("mapping.json has no real (non-sentinel) classes")
    }
    out.Classes = classes
    out.Filter = input.Filter
    classIdx := map[string]int{}
    for i, c := range classes {
    classIdx[c] = i
    }
    parsed, err := parseClipLabelsDataFiles(input.Folder, input.Filter, mapping)
    if err != nil {
    return out, err
    }
    out.DataFilesParsed = len(parsed)
    expectedHeader := append([]string{"file", "start_time", "end_time"}, classes...)
    existing, appendMode, err := loadExistingRows(input.OutputPath, expectedHeader)
    if err != nil {
    return out, err
    }
    out.AppendedToFile = appendMode
    out.ExistingRowsFound = len(existing)
    cwd, err := os.Getwd()
    if err != nil {
    return out, fmt.Errorf("getwd: %w", err)
    }
    folderAbs, err := filepath.Abs(input.Folder)
    if err != nil {
    return out, fmt.Errorf("abs %s: %w", input.Folder, err)
    }
    rows := make([]clipLabelsRow, 0, 1024)
    for _, pf := range parsed {
    fileRows, err := processClipLabelsFile(pf.path, pf.df, mapping, classIdx, classes, input, finalClipMode, cwd, folderAbs, &out)
    if err != nil {
    return out, err
    }
    rows = append(rows, fileRows...)
    }
    if err := dedupClipLabelsRows(rows, existing); err != nil {
    return out, err
    }
    if err := writeRows(input.OutputPath, expectedHeader, rows, appendMode); err != nil {
    return out, err
    }
    out.RowsWritten = len(rows)
    sort.Strings(out.Classes)
    return out, nil
    }
    // processClipLabelsFile generates clip-labels rows for a single .data file.
    func processClipLabelsFile(
    path string,
    df *utils.DataFile,
    mapping utils.MappingFile,
    classIdx map[string]int,
    classes []string,
    input CallsClipLabelsInput,
    finalClipMode utils.FinalClipMode,
    cwd, folderAbs string,
    out *CallsClipLabelsOutput,
    ) ([]clipLabelsRow, error) {
    windows, err := utils.GenerateClipTimes(
    df.Meta.Duration,
    input.ClipDuration,
    input.ClipOverlap,
    finalClipMode,
    10,
    )
    if err != nil {
    return nil, fmt.Errorf("generate clip windows for %s: %w", path, err)
    }
    if len(windows) == 0 {
    return nil, nil
    }
    segs := resolveSegments(df.Segments, input.Filter, input.MinLabelOverlap, mapping, classIdx, out)
    rel, err := computeWavRelPath(path, cwd, folderAbs)
    if err != nil {
    return nil, err
    }
    return labelClipWindows(windows, segs, rel, classes, input.MinLabelOverlap, out), nil
    }
    // resolveSegments maps segments to their classification and filters out mismatches.
    func resolveSegments(
    segments []*utils.Segment,
    filter string,
    minLabelOverlap float64,
    mapping utils.MappingFile,
    classIdx map[string]int,
    out *CallsClipLabelsOutput,
    ) []resolvedSeg {
    segs := make([]resolvedSeg, 0, len(segments))
    for _, seg := range segments {
    if seg.EndTime-seg.StartTime < minLabelOverlap {
    continue
    }
    for _, lbl := range seg.Labels {
    if filter != "" && lbl.Filter != filter {
    continue
    }
    canon, kind, ok := mapping.Classify(lbl.Species)
    if !ok {
    continue
    }
    switch kind {
    case utils.MappingIgn:
    out.SegmentsIgnored++
    segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind})
    case utils.MappingNeg:
    segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind})
    case utils.MappingReal:
    idx, present := classIdx[canon]
    if !present {
    continue
    }
    segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind, classIdx: idx})
    }
    }
    }
    return segs
    }
    // computeWavRelPath computes the relative path from cwd to the WAV file corresponding to a .data file.
    func computeWavRelPath(dataPath, cwd, folderAbs string) (string, error) {
    wavName := strings.TrimSuffix(filepath.Base(dataPath), ".data")
    wavAbs := filepath.Join(folderAbs, wavName)
    rel, err := filepath.Rel(cwd, wavAbs)
    if err != nil {
    rel = wavAbs
    }
    // Ensure relative paths start with ./ to match OPSO / pandas convention.
    if rel != "" && !filepath.IsAbs(rel) && !strings.HasPrefix(rel, "."+string(filepath.Separator)) {
    rel = "." + string(filepath.Separator) + rel
    }
    return rel, nil
    }
    // labelClipWindows classifies each clip window and builds the output rows.
    func labelClipWindows(windows []utils.ClipWindow, segs []resolvedSeg, rel string, classes []string, minLabelOverlap float64, out *CallsClipLabelsOutput) []clipLabelsRow {
    var rows []clipLabelsRow
    for _, w := range windows {
    dispo, classHits := classifyClip(w, segs, minLabelOverlap, len(classes))
    if dispo == dispoIgnored {
    out.ClipsIgnored++
    continue
    }
    row := clipLabelsRow{
    file: rel,
    start: w.Start,
    end: w.End,
    flags: make([]bool, len(classes)),
    }
    switch dispo {
    case dispoNegative:
    out.ClipsNegative++
    case dispoGap:
    out.ClipsAllFalseGap++
    case dispoLabelled:
    for i, hit := range classHits {
    if hit {
    row.flags[i] = true
    out.PerClassTrueCount[classes[i]]++
    }
    }
    }
    rows = append(rows, row)
    }
    return rows
    }
    // classifyClip determines the disposition of a single clip window against
    // the resolved segments. Priority: __IGNORE__ > __NEGATIVE__ > class labels.
    func classifyClip(w utils.ClipWindow, segs []resolvedSeg, minLabelOverlap float64, nClasses int) (clipDisposition, []bool) {
    ignoreHit := false
    negativeHit := false
    classHits := make([]bool, nClasses)
    for _, s := range segs {
    if overlapSeconds(s.start, s.end, w.Start, w.End) < minLabelOverlap {
    continue
    }
    switch s.kind {
    case utils.MappingIgn:
    ignoreHit = true
    case utils.MappingNeg:
    negativeHit = true
    case utils.MappingReal:
    classHits[s.classIdx] = true
    }
    }
    if ignoreHit {
    return dispoIgnored, nil
    }
    if negativeHit {
    return dispoNegative, classHits
    }
    for _, hit := range classHits {
    if hit {
    return dispoLabelled, classHits
    }
    }
    return dispoGap, classHits
    }
    // loadExistingRows reads an existing output CSV and returns its row keys
    // (for deduplication) and whether we're in append mode.
    func loadExistingRows(outputPath string, expectedHeader []string) (map[rowKey]bool, bool, error) {
    fi, err := os.Stat(outputPath)
    if err != nil {
    if os.IsNotExist(err) {
    return nil, false, nil
    }
    return nil, false, fmt.Errorf("stat %s: %w", outputPath, err)
    }
    if fi.Size() == 0 {
    return nil, false, nil
    }
    f, err := os.Open(outputPath)
    if err != nil {
    return nil, false, fmt.Errorf("open existing %s: %w", outputPath, err)
    }
    defer func() { _ = f.Close() }()
    r := csv.NewReader(f)
    r.FieldsPerRecord = -1
    header, err := r.Read()
    if err != nil {
    return nil, false, fmt.Errorf("read header of existing %s: %w", outputPath, err)
    }
    if !slices.Equal(header, expectedHeader) {
    return nil, false, fmt.Errorf("column-set mismatch in existing %s\n existing: %s\n new: %s",
    outputPath, strings.Join(header, ","), strings.Join(expectedHeader, ","))
    }
    existing := map[rowKey]bool{}
    for {
    rec, err := r.Read()
    if err == io.EOF {
    break
    }
    if err != nil {
    return nil, false, fmt.Errorf("read row of existing %s: %w", outputPath, err)
    }
    if len(rec) < 3 {
    return nil, false, fmt.Errorf("malformed row in existing %s: %v", outputPath, rec)
    }
    existing[rowKey{file: rec[0], start: rec[1], end: rec[2]}] = true
    }
    return existing, true, nil
    }
    // overlapSeconds returns the duration of overlap between two half-open intervals.
    func overlapSeconds(aStart, aEnd, bStart, bEnd float64) float64 {
    lo := max(aStart, bStart)
    hi := min(aEnd, bEnd)
    if hi <= lo {
    return 0
    }
    return hi - lo
    }
    // formatTime renders a float to match pandas' default float repr in to_csv:
    // always at least one decimal place, no trailing zeros beyond what's needed.
    // e.g. 5 → "5.0", 5.5 → "5.5", 3.5001250000 → "3.500125".
    func formatTime(v float64) string {
    s := strconv.FormatFloat(v, 'f', -1, 64)
    if !strings.ContainsRune(s, '.') {
    s += ".0"
    }
    return s
    }
    // writeRows writes the clip-labels rows to a CSV file.
    func writeRows(path string, header []string, rows []clipLabelsRow, appendMode bool) error {
    var f *os.File
    var err error
    if appendMode {
    f, err = os.OpenFile(path, os.O_APPEND|os.O_WRONLY, 0644)
    } else {
    f, err = os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
    }
    if err != nil {
    return fmt.Errorf("open %s for write: %w", path, err)
    }
    defer func() { _ = f.Close() }()
    w := csv.NewWriter(f)
    if !appendMode {
    if err := w.Write(header); err != nil {
    return fmt.Errorf("write header: %w", err)
    }
    }
    if len(rows) == 0 {
    w.Flush()
    return w.Error()
    }
    rec := make([]string, 3+len(rows[0].flags))
    for _, r := range rows {
    rec[0] = r.file
    rec[1] = formatTime(r.start)
    rec[2] = formatTime(r.end)
    for i, b := range r.flags {
    if b {
    rec[3+i] = "True"
    } else {
    rec[3+i] = "False"
    }
    }
    if err := w.Write(rec); err != nil {
    return fmt.Errorf("write row: %w", err)
    }
    }
    w.Flush()
    return w.Error()
    }
  • file addition: calls_clip_bench_test.go (----------)
    [0.67281]
    package calls
    import (
    "encoding/binary"
    "math"
    "os"
    "testing"
    "skraak/utils"
    )
    const benchWAV = "../../audio/20211028_211500.WAV"
    // ==================== WAV I/O ====================
    func BenchmarkReadWAV(b *testing.B) {
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    _, _, err := utils.ReadWAVSamples(benchWAV)
    if err != nil {
    b.Fatal(err)
    }
    }
    }
    func BenchmarkConvertToFloat64_16bit(b *testing.B) {
    // Simulate 16-bit mono WAV data (same size as test file: 14.32M samples)
    numSamples := 14320000
    data := make([]byte, numSamples*2)
    for i := range numSamples {
    binary.LittleEndian.PutUint16(data[i*2:], uint16(i%65536))
    }
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    _ = convertToFloat64Bench(data, 16, 1)
    }
    }
    // Duplicate of convertToFloat64 for benchmarking (unexported in utils)
    func convertToFloat64Bench(data []byte, bitsPerSample, channels int) []float64 {
    bytesPerSample := bitsPerSample / 8
    blockAlign := bytesPerSample * channels
    numSamples := len(data) / blockAlign
    samples := make([]float64, numSamples)
    for i := range numSamples {
    offset := i * blockAlign
    sample := int16(binary.LittleEndian.Uint16(data[offset : offset+2]))
    samples[i] = float64(sample) / 32768.0
    }
    return samples
    }
    func BenchmarkWriteWAV(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    b.Logf("segment samples=%d", len(segSamples))
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    f, _ := os.CreateTemp("", "bench_*.wav")
    utils.WriteWAVFile(f.Name(), segSamples, sr)
    f.Close()
    os.Remove(f.Name())
    }
    }
    // ==================== Resample ====================
    func BenchmarkResampleRate_48k(b *testing.B) {
    samples, _, _ := utils.ReadWAVSamples(benchWAV)
    b.Logf("resampling %d samples 48000->16000", len(samples))
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    utils.ResampleRate(samples, 48000, 16000)
    }
    }
    func BenchmarkResampleRate_250k(b *testing.B) {
    samples, _, _ := utils.ReadWAVSamples(benchWAV)
    b.Logf("resampling %d samples 250000->16000", len(samples))
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    utils.ResampleRate(samples, 250000, 16000)
    }
    }
    // ==================== Spectrogram pipeline ====================
    func BenchmarkExtractSegment(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    b.Logf("full file: %d samples, sr=%d", len(samples), sr)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    seg := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    if len(seg) == 0 {
    b.Fatal("empty segment")
    }
    }
    }
    func BenchmarkPowerSpectrumFFT_512(b *testing.B) {
    n := 512
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    frameData := make([]float64, n)
    power := make([]float64, n/2+1)
    scratch := make([]complex128, n)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    // Simulate the windowing step (Hann) + FFT
    for j := range n {
    frameData[j] = segSamples[j] * 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(j)/float64(n-1)))
    }
    utils.PowerSpectrumFFT(frameData, power, scratch)
    }
    }
    func BenchmarkSpectrogram_23s(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    cfg := utils.DefaultSpectrogramConfig(16000)
    b.Logf("segment samples=%d, windowSize=%d, hopSize=%d", len(segSamples), cfg.WindowSize, cfg.HopSize)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    if spect == nil {
    b.Fatal("nil spectrogram")
    }
    }
    }
    func BenchmarkSpectrogram_60s(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 0, 60)
    cfg := utils.DefaultSpectrogramConfig(16000)
    b.Logf("60s segment samples=%d", len(segSamples))
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    if spect == nil {
    b.Fatal("nil spectrogram")
    }
    }
    }
    // ==================== Image creation & resize ====================
    func BenchmarkCreateGrayscaleImage(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    cfg := utils.DefaultSpectrogramConfig(16000)
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    img := utils.CreateGrayscaleImage(spect)
    if img == nil {
    b.Fatal("nil image")
    }
    }
    }
    func BenchmarkCreateRGBImage(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    cfg := utils.DefaultSpectrogramConfig(16000)
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    colorData := utils.ApplyL4Colormap(spect)
    img := utils.CreateRGBImage(colorData)
    if img == nil {
    b.Fatal("nil image")
    }
    }
    }
    func BenchmarkApplyL4Colormap(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    cfg := utils.DefaultSpectrogramConfig(16000)
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    colorData := utils.ApplyL4Colormap(spect)
    if colorData == nil {
    b.Fatal("nil colormap")
    }
    }
    }
    func BenchmarkResizeGray224(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    cfg := utils.DefaultSpectrogramConfig(16000)
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    img := utils.CreateGrayscaleImage(spect)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    resized := utils.ResizeImage(img, 224, 224)
    if resized == nil {
    b.Fatal("nil resize")
    }
    }
    }
    func BenchmarkResizeGray448(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    cfg := utils.DefaultSpectrogramConfig(16000)
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    img := utils.CreateGrayscaleImage(spect)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    resized := utils.ResizeImage(img, 448, 448)
    if resized == nil {
    b.Fatal("nil resize")
    }
    }
    }
    // ==================== PNG write ====================
    func BenchmarkWritePNG_224(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    cfg := utils.DefaultSpectrogramConfig(16000)
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    img := utils.CreateGrayscaleImage(spect)
    resized := utils.ResizeImage(img, 224, 224)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    f, _ := os.CreateTemp("", "bench_*.png")
    utils.WritePNG(resized, f)
    f.Close()
    os.Remove(f.Name())
    }
    }
    // ==================== Full pipeline ====================
    func BenchmarkFullPipelineGray224(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    outputSR := sr
    if sr > 16000 {
    segSamples = utils.ResampleRate(segSamples, sr, 16000)
    outputSR = 16000
    }
    cfg := utils.DefaultSpectrogramConfig(outputSR)
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    img := utils.CreateGrayscaleImage(spect)
    resized := utils.ResizeImage(img, 224, 224)
    f, _ := os.CreateTemp("", "bench_*.png")
    utils.WritePNG(resized, f)
    f.Close()
    os.Remove(f.Name())
    utils.WriteWAVFile(f.Name(), segSamples, outputSR)
    os.Remove(f.Name())
    _ = resized
    }
    }
    func BenchmarkFullPipelineColor448(b *testing.B) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    outputSR := sr
    if sr > 16000 {
    segSamples = utils.ResampleRate(segSamples, sr, 16000)
    outputSR = 16000
    }
    cfg := utils.DefaultSpectrogramConfig(outputSR)
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    colorData := utils.ApplyL4Colormap(spect)
    img := utils.CreateRGBImage(colorData)
    resized := utils.ResizeImage(img, 448, 448)
    f, _ := os.CreateTemp("", "bench_*.png")
    utils.WritePNG(resized, f)
    f.Close()
    os.Remove(f.Name())
    utils.WriteWAVFile(f.Name(), segSamples, outputSR)
    os.Remove(f.Name())
    _ = resized
    }
    }
    // ==================== Data dimension report ====================
    func TestPipelineDimensions(t *testing.T) {
    samples, sr, _ := utils.ReadWAVSamples(benchWAV)
    segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
    t.Logf("Input: %d samples, sr=%d, segment=%d samples (%.1fs)",
    len(samples), sr, len(segSamples), float64(len(segSamples))/float64(sr))
    cfg := utils.DefaultSpectrogramConfig(16000)
    numFrames := (len(segSamples)-cfg.WindowSize)/cfg.HopSize + 1
    numBins := cfg.WindowSize/2 + 1
    t.Logf("Spectrogram: %d freq bins x %d time frames = %d values",
    numBins, numFrames, numBins*numFrames)
    spect := utils.GenerateSpectrogram(segSamples, cfg)
    t.Logf("Output: %d x %d (freq x time)", len(spect), len(spect[0]))
    img := utils.CreateGrayscaleImage(spect)
    t.Logf("Grayscale image: %dx%d pixels, %d bytes",
    img.Bounds().Dx(), img.Bounds().Dy(), img.Bounds().Dx()*img.Bounds().Dy())
    resized := utils.ResizeImage(img, 224, 224)
    t.Logf("Resized 224: %dx%d", resized.Bounds().Dx(), resized.Bounds().Dy())
    resized448 := utils.ResizeImage(img, 448, 448)
    t.Logf("Resized 448: %dx%d", resized448.Bounds().Dx(), resized448.Bounds().Dy())
    }
  • file addition: calls_clip.go (----------)
    [0.67281]
    package calls
    import (
    "fmt"
    "image"
    "math"
    "os"
    "path/filepath"
    "runtime"
    "strings"
    "sync"
    "skraak/utils"
    )
    // CallsClipInput defines the input for the clip tool
    type CallsClipInput struct {
    File string `json:"file"`
    Folder string `json:"folder"`
    Output string `json:"output"`
    Prefix string `json:"prefix"`
    Filter string `json:"filter"`
    Species string `json:"species"`
    Certainty int `json:"certainty"`
    Size int `json:"size"`
    Color bool `json:"color"`
    Night bool `json:"night"`
    Day bool `json:"day"`
    Location string `json:"location,omitempty"`
    }
    // CallsClipOutput defines the output for the clip tool
    type CallsClipOutput struct {
    FilesProcessed int `json:"files_processed"`
    SegmentsClipped int `json:"segments_clipped"`
    NightSkipped int `json:"night_skipped,omitempty"`
    DaySkipped int `json:"day_skipped,omitempty"`
    OutputFiles []string `json:"output_files"`
    Errors []string `json:"errors,omitempty"`
    }
    // CallsClip processes .data files and generates audio/image clips for matching segments
    func CallsClip(input CallsClipInput) (CallsClipOutput, error) {
    var output CallsClipOutput
    // Validate required flags
    if err := validateClipInput(&output, input); err != nil {
    return output, err
    }
    // Parse species+calltype
    speciesName, callType := utils.ParseSpeciesCallType(input.Species)
    // Get list of .data files
    filePaths, err := resolveClipFiles(&output, input)
    if err != nil {
    return output, err
    }
    // Create output folder if it doesn't exist
    if err := os.MkdirAll(input.Output, 0755); err != nil {
    output.Errors = append(output.Errors, fmt.Sprintf("failed to create output folder: %v", err))
    return output, err
    }
    // Clamp image size to valid range
    imgSize := utils.ClampImageSize(input.Size)
    // Parse location into lat/lng/timezone
    var lat, lng float64
    var timezone string
    if input.Location != "" {
    var err error
    lat, lng, timezone, err = utils.ParseLocation(input.Location)
    if err != nil {
    output.Errors = append(output.Errors, err.Error())
    return output, err
    }
    }
    // Process .data files (parallel for larger batches)
    if len(filePaths) <= 2 {
    processFilesSequential(&output, filePaths, input, speciesName, callType, imgSize, lat, lng, timezone)
    } else {
    processFilesParallel(&output, filePaths, input, speciesName, callType, imgSize, lat, lng, timezone)
    }
    return output, nil
    }
    // validateClipInput validates required flags for clip generation.
    func validateClipInput(output *CallsClipOutput, input CallsClipInput) error {
    if input.File == "" && input.Folder == "" {
    output.Errors = append(output.Errors, "either --file or --folder is required")
    return fmt.Errorf("missing required flag: --file or --folder")
    }
    if input.Output == "" {
    output.Errors = append(output.Errors, "--output is required")
    return fmt.Errorf("missing required flag: --output")
    }
    if input.Prefix == "" {
    output.Errors = append(output.Errors, "--prefix is required")
    return fmt.Errorf("missing required flag: --prefix")
    }
    return nil
    }
    // resolveClipFiles returns the list of .data file paths from input.
    func resolveClipFiles(output *CallsClipOutput, input CallsClipInput) ([]string, error) {
    if input.File != "" {
    return []string{input.File}, nil
    }
    filePaths, err := utils.FindDataFiles(input.Folder)
    if err != nil {
    output.Errors = append(output.Errors, fmt.Sprintf("failed to find .data files: %v", err))
    return nil, err
    }
    if len(filePaths) == 0 {
    output.Errors = append(output.Errors, "no .data files found")
    return nil, fmt.Errorf("no .data files found")
    }
    return filePaths, nil
    }
    // processFilesSequential processes .data files one at a time.
    func processFilesSequential(output *CallsClipOutput, filePaths []string, input CallsClipInput, speciesName, callType string, imgSize int, lat, lng float64, timezone string) {
    for _, dataPath := range filePaths {
    clips, skipped, errs := processFile(dataPath, input.Output, input.Prefix, input.Filter, speciesName, callType, input.Certainty, imgSize, input.Color, input.Night, input.Day, lat, lng, timezone)
    accumulateFileResult(output, clips, skipped, errs, input.Night)
    }
    }
    // processFilesParallel processes .data files using worker goroutines.
    func processFilesParallel(output *CallsClipOutput, filePaths []string, input CallsClipInput, speciesName, callType string, imgSize int, lat, lng float64, timezone string) {
    type fileResult struct {
    clips []string
    skipped int
    errs []string
    }
    workers := min(runtime.NumCPU(), 8, len(filePaths))
    jobs := make(chan string, len(filePaths))
    results := make(chan fileResult, len(filePaths))
    var wg sync.WaitGroup
    for range workers {
    wg.Go(func() {
    for dataPath := range jobs {
    clips, skipped, errs := processFile(dataPath, input.Output, input.Prefix, input.Filter, speciesName, callType, input.Certainty, imgSize, input.Color, input.Night, input.Day, lat, lng, timezone)
    results <- fileResult{clips: clips, skipped: skipped, errs: errs}
    }
    })
    }
    for _, dataPath := range filePaths {
    jobs <- dataPath
    }
    close(jobs)
    go func() {
    wg.Wait()
    close(results)
    }()
    for r := range results {
    accumulateFileResult(output, r.clips, r.skipped, r.errs, input.Night)
    }
    }
    // accumulateFileResult merges a single file's results into the output.
    func accumulateFileResult(output *CallsClipOutput, clips []string, skipped int, errs []string, night bool) {
    output.SegmentsClipped += len(clips)
    if night {
    output.NightSkipped += skipped
    } else {
    output.DaySkipped += skipped
    }
    output.OutputFiles = append(output.OutputFiles, clips...)
    output.Errors = append(output.Errors, errs...)
    if len(clips) > 0 || len(errs) == 0 {
    output.FilesProcessed++
    }
    }
    // processFile processes a single .data file and returns generated clips, time-filter-skipped count, and errors
    func processFile(dataPath, outputDir, prefix, filter, speciesName, callType string, certainty, imgSize int, color, night, day bool, lat, lng float64, timezone string) ([]string, int, []string) {
    var clips []string
    var errors []string
    // Parse .data file
    dataFile, err := utils.ParseDataFile(dataPath)
    if err != nil {
    errors = append(errors, fmt.Sprintf("%s: failed to parse: %v", dataPath, err))
    return nil, 0, errors
    }
    // Get WAV basename (without path and extensions)
    wavPath := filepath.Clean(strings.TrimSuffix(dataPath, ".data"))
    basename := filepath.Base(wavPath)
    basename = strings.TrimSuffix(basename, filepath.Ext(basename))
    // Filter segments
    matchingSegments := filterSegments(dataFile.Segments, filter, speciesName, callType, certainty)
    if len(matchingSegments) == 0 {
    return nil, 0, nil
    }
    // Day/night filter: check WAV header only (cheaper than reading full audio).
    if night || day {
    skipped, err := checkDayNightFilter(wavPath, night, day, lat, lng, timezone)
    if err != nil || skipped {
    if skipped {
    return nil, 1, nil
    }
    return nil, 0, nil
    }
    }
    // Read WAV samples once
    samples, sampleRate, err := utils.ReadWAVSamples(wavPath)
    if err != nil {
    errors = append(errors, fmt.Sprintf("%s: failed to read WAV: %v", dataPath, err))
    return nil, 0, errors
    }
    // Process matching segments
    clips, errors = processSegments(matchingSegments, dataPath, samples, sampleRate, outputDir, prefix, basename, imgSize, color)
    return clips, 0, errors
    }
    // filterSegments returns segments matching the given filter criteria.
    func filterSegments(segments []*utils.Segment, filter, speciesName, callType string, certainty int) []*utils.Segment {
    var matching []*utils.Segment
    for _, seg := range segments {
    if seg.SegmentMatchesFilters(filter, speciesName, callType, certainty) {
    matching = append(matching, seg)
    }
    }
    return matching
    }
    // checkDayNightFilter applies day/night filtering. Returns (skipped=true, nil) if the
    // recording should be skipped, (false, nil) if it passes, or (false, err) on failure.
    func checkDayNightFilter(wavPath string, night, day bool, lat, lng float64, timezone string) (bool, error) {
    result, err := IsNight(IsNightInput{
    FilePath: wavPath,
    Lat: lat,
    Lng: lng,
    Timezone: timezone,
    })
    if err != nil {
    fmt.Fprintf(os.Stderr, "warning: skipping %s (isnight error: %v)\n", wavPath, err)
    return false, err
    }
    if night && !result.SolarNight {
    fmt.Fprintf(os.Stderr, "skipped (daytime): %s\n", wavPath)
    return true, nil
    }
    if day && !result.DiurnalActive {
    fmt.Fprintf(os.Stderr, "skipped (nighttime): %s\n", wavPath)
    return true, nil
    }
    return false, nil
    }
    // processSegments generates clips for matching segments, using parallel processing for larger batches.
    func processSegments(segments []*utils.Segment, dataPath string, samples []float64, sampleRate int, outputDir, prefix, basename string, imgSize int, color bool) ([]string, []string) {
    var clips []string
    var errors []string
    if len(segments) <= 2 {
    for _, seg := range segments {
    clipFiles, err := generateClip(samples, sampleRate, outputDir, prefix, basename, seg.StartTime, seg.EndTime, imgSize, color)
    if err != nil {
    errors = append(errors, fmt.Sprintf("%s: segment %.0f-%.0f: %v", dataPath, seg.StartTime, seg.EndTime, err))
    continue
    }
    clips = append(clips, clipFiles...)
    }
    } else {
    clips, errors = processSegmentsParallel(segments, dataPath, samples, sampleRate, outputDir, prefix, basename, imgSize, color)
    }
    return clips, errors
    }
    // processSegmentsParallel generates clips for segments using worker goroutines.
    func processSegmentsParallel(segments []*utils.Segment, dataPath string, samples []float64, sampleRate int, outputDir, prefix, basename string, imgSize int, color bool) ([]string, []string) {
    type segResult struct {
    clips []string
    err string
    }
    workers := min(runtime.NumCPU(), len(segments))
    jobs := make(chan *utils.Segment, len(segments))
    results := make(chan segResult, len(segments))
    var wg sync.WaitGroup
    for range workers {
    wg.Go(func() {
    for seg := range jobs {
    clipFiles, err := generateClip(samples, sampleRate, outputDir, prefix, basename, seg.StartTime, seg.EndTime, imgSize, color)
    if err != nil {
    results <- segResult{err: fmt.Sprintf("%s: segment %.0f-%.0f: %v", dataPath, seg.StartTime, seg.EndTime, err)}
    } else {
    results <- segResult{clips: clipFiles}
    }
    }
    })
    }
    for _, seg := range segments {
    jobs <- seg
    }
    close(jobs)
    go func() {
    wg.Wait()
    close(results)
    }()
    var clips []string
    var errors []string
    for r := range results {
    if r.err != "" {
    errors = append(errors, r.err)
    } else {
    clips = append(clips, r.clips...)
    }
    }
    return clips, errors
    }
    // generateClip generates PNG and WAV files for a segment
    func generateClip(samples []float64, sampleRate int, outputDir, prefix, basename string, startTime, endTime float64, imgSize int, color bool) ([]string, error) {
    var files []string
    // Calculate integer times for filename
    startInt := int(math.Floor(startTime))
    endInt := int(math.Ceil(endTime))
    // Build base filename
    baseName := fmt.Sprintf("%s_%s_%d_%d", prefix, basename, startInt, endInt)
    wavPath := filepath.Join(outputDir, baseName+".wav")
    // Extract segment samples
    segSamples := utils.ExtractSegmentSamples(samples, sampleRate, startTime, endTime)
    if len(segSamples) == 0 {
    return nil, fmt.Errorf("no samples in segment")
    }
    // Determine output sample rate (downsample if > 16kHz)
    outputSampleRate := sampleRate
    if sampleRate > utils.DefaultMaxSampleRate {
    segSamples = utils.ResampleRate(segSamples, sampleRate, utils.DefaultMaxSampleRate)
    outputSampleRate = utils.DefaultMaxSampleRate
    }
    pngPath := filepath.Join(outputDir, baseName+".png")
    spectSampleRate := outputSampleRate
    config := utils.DefaultSpectrogramConfig(spectSampleRate)
    spectrogram := utils.GenerateSpectrogram(segSamples, config)
    if spectrogram == nil {
    return nil, fmt.Errorf("failed to generate spectrogram")
    }
    // Create image (grayscale or color)
    var img image.Image
    if color {
    colorData := utils.ApplyL4Colormap(spectrogram)
    img = utils.CreateRGBImage(colorData)
    } else {
    img = utils.CreateGrayscaleImage(spectrogram)
    }
    if img == nil {
    return nil, fmt.Errorf("failed to create image")
    }
    resized := utils.ResizeImage(img, imgSize, imgSize)
    // Write PNG (O_EXCL fails atomically if file exists)
    pngFile, err := os.OpenFile(pngPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0644)
    if err != nil {
    if os.IsExist(err) {
    return nil, fmt.Errorf("file already exists: %s", pngPath)
    }
    return nil, fmt.Errorf("failed to create PNG: %w", err)
    }
    if err := utils.WritePNG(resized, pngFile); err != nil {
    _ = pngFile.Close()
    return nil, fmt.Errorf("failed to write PNG: %w", err)
    }
    if err := pngFile.Close(); err != nil {
    return nil, fmt.Errorf("failed to close PNG: %w", err)
    }
    files = append(files, pngPath)
    // Write WAV
    if err := utils.WriteWAVFile(wavPath, segSamples, outputSampleRate); err != nil {
    return nil, fmt.Errorf("failed to write WAV: %w", err)
    }
    files = append(files, wavPath)
    return files, nil
    }
  • file addition: calls_classify_test.go (----------)
    [0.67281]
    package calls
    import (
    "testing"
    "skraak/utils"
    )
    func NewClassifyState(config ClassifyConfig, dataFiles []*utils.DataFile) *ClassifyState {
    hasFilter := config.Filter != "" || config.Species != "" || config.Certainty >= 0
    cached := make([][]*utils.Segment, len(dataFiles))
    for i, df := range dataFiles {
    if !hasFilter {
    cached[i] = df.Segments
    } else {
    for _, seg := range df.Segments {
    if seg.SegmentMatchesFilters(config.Filter, config.Species, config.CallType, config.Certainty) {
    cached[i] = append(cached[i], seg)
    }
    }
    }
    }
    total := 0
    for _, segs := range cached {
    total += len(segs)
    }
    return &ClassifyState{
    Config: config,
    DataFiles: dataFiles,
    filteredSegs: cached,
    totalSegs: total,
    }
    }
    func TestParseKeyBuffer(t *testing.T) {
    bindings := []KeyBinding{
    {Key: "k", Species: "Kiwi"},
    {Key: "d", Species: "Kiwi", CallType: "Duet"},
    {Key: "n", Species: "Don't Know"},
    {Key: "p", Species: "Morepork"},
    }
    state := NewClassifyState(ClassifyConfig{Bindings: bindings, Certainty: -1}, nil)
    tests := []struct {
    key string
    want *BindingResult
    wantNil bool
    }{
    {"k", &BindingResult{Species: "Kiwi"}, false},
    {"d", &BindingResult{Species: "Kiwi", CallType: "Duet"}, false},
    {"n", &BindingResult{Species: "Don't Know"}, false},
    {"p", &BindingResult{Species: "Morepork"}, false},
    {"x", nil, true}, // unknown key
    }
    for _, tt := range tests {
    got := state.ParseKeyBuffer(tt.key)
    if tt.wantNil {
    if got != nil {
    t.Errorf("ParseKeyBuffer(%q) = %v, want nil", tt.key, got)
    }
    } else {
    if got == nil {
    t.Errorf("ParseKeyBuffer(%q) = nil, want %+v", tt.key, tt.want)
    continue
    }
    if got.Species != tt.want.Species {
    t.Errorf("ParseKeyBuffer(%q).Species = %q, want %q", tt.key, got.Species, tt.want.Species)
    }
    if got.CallType != tt.want.CallType {
    t.Errorf("ParseKeyBuffer(%q).CallType = %q, want %q", tt.key, got.CallType, tt.want.CallType)
    }
    }
    }
    }
    func TestApplyBinding(t *testing.T) {
    bindings := []KeyBinding{
    {Key: "k", Species: "Kiwi"},
    {Key: "n", Species: "Don't Know"},
    {Key: "d", Species: "Kiwi", CallType: "Duet"},
    }
    df := &utils.DataFile{
    Meta: &utils.DataMeta{},
    Segments: []*utils.Segment{
    {
    StartTime: 10.0,
    EndTime: 20.0,
    Labels: []*utils.Label{
    {Species: "Unknown", Certainty: 50, Filter: "test-filter", CallType: "OldType"},
    },
    },
    },
    }
    state := NewClassifyState(ClassifyConfig{
    Filter: "test-filter",
    Reviewer: "David",
    Bindings: bindings,
    Certainty: -1,
    }, []*utils.DataFile{df})
    // Apply "k" = Kiwi (no calltype, should remove existing calltype)
    result := &BindingResult{Species: "Kiwi"}
    state.ApplyBinding(result)
    // Check label was updated
    if len(df.Segments[0].Labels) != 1 {
    t.Errorf("expected 1 label, got %d", len(df.Segments[0].Labels))
    }
    if df.Segments[0].Labels[0].Species != "Kiwi" {
    t.Errorf("expected Species=Kiwi, got %s", df.Segments[0].Labels[0].Species)
    }
    if df.Segments[0].Labels[0].Certainty != 100 {
    t.Errorf("expected Certainty=100, got %d", df.Segments[0].Labels[0].Certainty)
    }
    if df.Segments[0].Labels[0].CallType != "" {
    t.Errorf("expected CallType='', got %s (should be removed)", df.Segments[0].Labels[0].CallType)
    }
    if df.Meta.Reviewer != "David" {
    t.Errorf("expected Reviewer=David, got %s", df.Meta.Reviewer)
    }
    // Apply "d" = Kiwi/Duet (should set calltype)
    result = &BindingResult{Species: "Kiwi", CallType: "Duet"}
    state.ApplyBinding(result)
    if df.Segments[0].Labels[0].CallType != "Duet" {
    t.Errorf("expected CallType=Duet, got %s", df.Segments[0].Labels[0].CallType)
    }
    // Apply "n" = Don't Know (certainty should be 0)
    result = &BindingResult{Species: "Don't Know"}
    state.ApplyBinding(result)
    if df.Segments[0].Labels[0].Species != "Don't Know" {
    t.Errorf("expected Species=Don't Know, got %s", df.Segments[0].Labels[0].Species)
    }
    if df.Segments[0].Labels[0].Certainty != 0 {
    t.Errorf("expected Certainty=0 for Don't Know, got %d", df.Segments[0].Labels[0].Certainty)
    }
    }
    func TestApplyBindingCallTypeRemoval(t *testing.T) {
    bindings := []KeyBinding{
    {Key: "k", Species: "Kiwi"}, // no calltype
    }
    df := &utils.DataFile{
    Meta: &utils.DataMeta{},
    Segments: []*utils.Segment{
    {
    StartTime: 10.0,
    EndTime: 20.0,
    Labels: []*utils.Label{
    {Species: "Kiwi", Certainty: 100, Filter: "test-filter", CallType: "Male"},
    },
    },
    },
    }
    state := NewClassifyState(ClassifyConfig{
    Filter: "test-filter",
    Reviewer: "David",
    Bindings: bindings,
    Certainty: -1,
    }, []*utils.DataFile{df})
    // Apply "k" = Kiwi (should remove Male calltype)
    result := &BindingResult{Species: "Kiwi"}
    state.ApplyBinding(result)
    if df.Segments[0].Labels[0].CallType != "" {
    t.Errorf("expected CallType='', got %s (should be removed)", df.Segments[0].Labels[0].CallType)
    }
    }
    func TestConfirmLabelDontKnow(t *testing.T) {
    df := &utils.DataFile{
    Meta: &utils.DataMeta{},
    Segments: []*utils.Segment{
    {
    StartTime: 10.0,
    EndTime: 20.0,
    Labels: []*utils.Label{
    {Species: "Don't Know", Certainty: 0, Filter: "test-filter"},
    },
    },
    },
    }
    state := NewClassifyState(ClassifyConfig{
    Filter: "test-filter",
    Reviewer: "David",
    Certainty: -1,
    }, []*utils.DataFile{df})
    // ConfirmLabel on Don't Know should be a no-op
    if state.ConfirmLabel() {
    t.Error("ConfirmLabel() should return false for Don't Know (certainty=0)")
    }
    label := df.Segments[0].Labels[0]
    if label.Species != "Don't Know" {
    t.Errorf("Species should remain Don't Know, got %s", label.Species)
    }
    if label.Certainty != 0 {
    t.Errorf("Certainty should remain 0, got %d", label.Certainty)
    }
    if state.Dirty {
    t.Error("State should not be dirty after confirming Don't Know")
    }
    }
  • file addition: calls_classify_load_test.go (----------)
    [0.67281]
    package calls
    import (
    "os"
    "path/filepath"
    "testing"
    )
    // writeDataFileContent creates a .data file in dir with the given raw content.
    func writeDataFileContent(t *testing.T, dir, name, content string) {
    t.Helper()
    if err := os.WriteFile(filepath.Join(dir, name), []byte(content), 0644); err != nil {
    t.Fatal(err)
    }
    }
    // mustLoadDataFiles is a test helper that calls LoadDataFiles and fatals on error.
    func mustLoadDataFiles(t *testing.T, config ClassifyConfig) *ClassifyState {
    t.Helper()
    state, err := LoadDataFiles(config)
    if err != nil {
    t.Fatal(err)
    }
    return state
    }
    // assertFileSegCounts checks file count and total segment count match expected values.
    func assertFileSegCounts(t *testing.T, state *ClassifyState, wantFiles, wantSegs int, label string) {
    t.Helper()
    if len(state.DataFiles) != wantFiles {
    t.Errorf("%s: expected %d files, got %d", label, wantFiles, len(state.DataFiles))
    }
    if state.TotalSegments() != wantSegs {
    t.Errorf("%s: expected %d segments total, got %d", label, wantSegs, state.TotalSegments())
    }
    }
    const (
    kiwiSeg = `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]]]`
    tomtitSeg = `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Tomtit", "certainty": 90}]]]`
    )
    func TestLoadDataFilesFiltersFilesWithNoMatchingSegments(t *testing.T) {
    tempDir := t.TempDir()
    writeDataFileContent(t, tempDir, "file1.data", kiwiSeg)
    writeDataFileContent(t, tempDir, "file2.data", tomtitSeg)
    writeDataFileContent(t, tempDir, "file3.data", kiwiSeg)
    t.Run("no_filter", func(t *testing.T) {
    state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Certainty: -1})
    assertFileSegCounts(t, state, 3, 3, "No filter")
    })
    t.Run("species_kiwi", func(t *testing.T) {
    state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})
    assertFileSegCounts(t, state, 2, 2, "Species=Kiwi")
    })
    t.Run("species_tomtit", func(t *testing.T) {
    state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Tomtit", Certainty: -1})
    assertFileSegCounts(t, state, 1, 1, "Species=Tomtit")
    })
    t.Run("species_nonexistent", func(t *testing.T) {
    state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "NonExistent", Certainty: -1})
    assertFileSegCounts(t, state, 0, 0, "Species=NonExistent")
    })
    }
    func TestLoadDataFilesWithMixedSegments(t *testing.T) {
    tempDir := t.TempDir()
    file := `[
    {"Operator": "test"},
    [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]],
    [10, 20, 100, 1000, [{"species": "Tomtit", "certainty": 80}]],
    [20, 30, 100, 1000, [{"species": "Kiwi", "certainty": 95}]]
    ]`
    writeDataFileContent(t, tempDir, "mixed.data", file)
    state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})
    if len(state.DataFiles) != 1 {
    t.Errorf("Expected 1 file, got %d", len(state.DataFiles))
    }
    if state.TotalSegments() != 2 {
    t.Errorf("Species=Kiwi: expected 2 segments, got %d", state.TotalSegments())
    }
    // The DataFile should still have all 3 segments internally
    // but cached filtered segments should return only the Kiwi ones
    if len(state.DataFiles[0].Segments) != 3 {
    t.Errorf("DataFile should have 3 segments internally, got %d", len(state.DataFiles[0].Segments))
    }
    // TotalSegments uses cached filtered segments
    if state.TotalSegments() != 2 {
    t.Errorf("TotalSegments should return 2 Kiwi segments, got %d", state.TotalSegments())
    }
    }
    // Test that the original DataFile segments are not modified (immutable filtering)
    func TestFilteringDoesNotModifyOriginalSegments(t *testing.T) {
    tempDir := t.TempDir()
    file := `[
    {"Operator": "test"},
    [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]],
    [10, 20, 100, 1000, [{"species": "Tomtit", "certainty": 80}]]
    ]`
    writeDataFileContent(t, tempDir, "test.data", file)
    state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})
    // Original segments should be untouched
    originalSegments := state.DataFiles[0].Segments
    if len(originalSegments) != 2 {
    t.Errorf("Original should have 2 segments, got %d", len(originalSegments))
    }
    // Verify all original segments are preserved
    species := []string{}
    for _, seg := range originalSegments {
    if len(seg.Labels) > 0 {
    species = append(species, seg.Labels[0].Species)
    }
    }
    if len(species) != 2 || species[0] != "Kiwi" || species[1] != "Tomtit" {
    t.Errorf("Original segments should have both species, got %v", species)
    }
    }
    func TestLoadDataFilesCertaintyPruning(t *testing.T) {
    tempDir := t.TempDir()
    writeDataFileContent(t, tempDir, "file1.data", `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 70}]]]`)
    writeDataFileContent(t, tempDir, "file2.data", `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 100}]]]`)
    state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Certainty: 100})
    assertFileSegCounts(t, state, 1, 1, "Certainty=100")
    // CurrentSegment should work (not nil) because file1 was pruned
    seg := state.CurrentSegment()
    if seg == nil {
    t.Error("CurrentSegment should not be nil after pruning")
    }
    }
  • file addition: calls_classify_filter_test.go (----------)
    [0.67281]
    package calls
    import (
    "math/rand"
    "testing"
    "skraak/utils"
    )
    func TestTotalSegmentsRespectsFilters(t *testing.T) {
    // Create test data files with different species and filters
    df1 := &utils.DataFile{
    FilePath: "/test/file1.data",
    Segments: []*utils.Segment{
    {
    StartTime: 0,
    EndTime: 10,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0"},
    },
    },
    {
    StartTime: 10,
    EndTime: 20,
    Labels: []*utils.Label{
    {Species: "Tomtit", Filter: "model-1.0"},
    },
    },
    },
    }
    df2 := &utils.DataFile{
    FilePath: "/test/file2.data",
    Segments: []*utils.Segment{
    {
    StartTime: 0,
    EndTime: 10,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0"},
    },
    },
    },
    }
    // Test 1: No filters - should count all segments (3)
    state1 := NewClassifyState(ClassifyConfig{Certainty: -1}, []*utils.DataFile{df1, df2})
    if got := state1.TotalSegments(); got != 3 {
    t.Errorf("No filters: expected 3 segments, got %d", got)
    }
    // Test 2: Filter by species "Kiwi" - should count only Kiwi segments (2)
    state2 := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df1, df2})
    if got := state2.TotalSegments(); got != 2 {
    t.Errorf("Species=Kiwi: expected 2 segments, got %d", got)
    }
    // Test 3: Filter by species "Tomtit" - should count only Tomtit segments (1)
    state3 := NewClassifyState(ClassifyConfig{Species: "Tomtit", Certainty: -1}, []*utils.DataFile{df1, df2})
    if got := state3.TotalSegments(); got != 1 {
    t.Errorf("Species=Tomtit: expected 1 segment, got %d", got)
    }
    // Test 4: Filter by filter name "model-1.0" - should count all segments (3)
    state4 := NewClassifyState(ClassifyConfig{Filter: "model-1.0", Certainty: -1}, []*utils.DataFile{df1, df2})
    if got := state4.TotalSegments(); got != 3 {
    t.Errorf("Filter=model-1.0: expected 3 segments, got %d", got)
    }
    // Test 5: Filter by non-existent species - should count 0
    state5 := NewClassifyState(ClassifyConfig{Species: "NonExistent", Certainty: -1}, []*utils.DataFile{df1, df2})
    if got := state5.TotalSegments(); got != 0 {
    t.Errorf("Species=NonExistent: expected 0 segments, got %d", got)
    }
    // Test 6: Combined filter + species
    df3 := &utils.DataFile{
    FilePath: "/test/file3.data",
    Segments: []*utils.Segment{
    {
    StartTime: 0,
    EndTime: 10,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0", CallType: "Duet"},
    },
    },
    {
    StartTime: 10,
    EndTime: 20,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-2.0", CallType: "Male"},
    },
    },
    },
    }
    state6 := NewClassifyState(ClassifyConfig{Filter: "model-1.0", Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df3})
    if got := state6.TotalSegments(); got != 1 {
    t.Errorf("Filter=model-1.0 + Species=Kiwi: expected 1 segment, got %d", got)
    }
    }
    func TestCurrentSegmentNumberWithFilters(t *testing.T) {
    // Create test data files
    df1 := &utils.DataFile{
    FilePath: "/test/file1.data",
    Segments: []*utils.Segment{
    {
    StartTime: 0,
    EndTime: 10,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0"},
    },
    },
    {
    StartTime: 10,
    EndTime: 20,
    Labels: []*utils.Label{
    {Species: "Tomtit", Filter: "model-1.0"},
    },
    },
    },
    }
    df2 := &utils.DataFile{
    FilePath: "/test/file2.data",
    Segments: []*utils.Segment{
    {
    StartTime: 0,
    EndTime: 10,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0"},
    },
    },
    },
    }
    // Test: Filter by species "Kiwi", at file 2, segment 0
    // Should report current segment as 2 (first Kiwi in df1 + first Kiwi in df2)
    state := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df1, df2})
    state.FileIdx = 1 // at df2
    state.SegmentIdx = 0
    if got := state.CurrentSegmentNumber(); got != 2 {
    t.Errorf("Species=Kiwi, at file 2, seg 0: expected current segment 2, got %d", got)
    }
    }
    func TestCertaintyFiltering(t *testing.T) {
    // Create test data files with different certainty levels
    df := &utils.DataFile{
    FilePath: "/test/file1.data",
    Segments: []*utils.Segment{
    {
    StartTime: 0,
    EndTime: 10,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0", Certainty: 70},
    },
    },
    {
    StartTime: 10,
    EndTime: 20,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0", Certainty: 100},
    },
    },
    {
    StartTime: 20,
    EndTime: 30,
    Labels: []*utils.Label{
    {Species: "Tomtit", Filter: "model-1.0", Certainty: 70},
    },
    },
    },
    }
    // Test 1: Filter by certainty 70 - should get 2 segments
    state1 := NewClassifyState(ClassifyConfig{Certainty: 70}, []*utils.DataFile{df})
    if got := state1.TotalSegments(); got != 2 {
    t.Errorf("Certainty=70: expected 2 segments, got %d", got)
    }
    // Test 2: Filter by certainty 100 - should get 1 segment
    state2 := NewClassifyState(ClassifyConfig{Certainty: 100}, []*utils.DataFile{df})
    if got := state2.TotalSegments(); got != 1 {
    t.Errorf("Certainty=100: expected 1 segment, got %d", got)
    }
    // Test 3: Filter by certainty 0 - should get 0 segments
    state3 := NewClassifyState(ClassifyConfig{Certainty: 0}, []*utils.DataFile{df})
    if got := state3.TotalSegments(); got != 0 {
    t.Errorf("Certainty=0: expected 0 segments, got %d", got)
    }
    // Test 4: Combined species + certainty
    state4 := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: 70}, []*utils.DataFile{df})
    if got := state4.TotalSegments(); got != 1 {
    t.Errorf("Species=Kiwi + Certainty=70: expected 1 segment, got %d", got)
    }
    }
    func TestSampling(t *testing.T) {
    makeSegs := func(n int) []*utils.Segment {
    s := make([]*utils.Segment, n)
    for i := range s {
    s[i] = &utils.Segment{StartTime: float64(i), EndTime: float64(i + 1)}
    }
    return s
    }
    df1 := &utils.DataFile{FilePath: "/test/f1.data", Segments: makeSegs(6)}
    df2 := &utils.DataFile{FilePath: "/test/f2.data", Segments: makeSegs(4)}
    kept := []*utils.DataFile{df1, df2}
    cached := [][]*utils.Segment{df1.Segments, df2.Segments}
    countTotal := func(c [][]*utils.Segment) int {
    n := 0
    for _, s := range c {
    n += len(s)
    }
    return n
    }
    // 50% of 10 → 5
    k, c := applySampling(kept, cached, 50, rand.New(rand.NewSource(42)))
    if got := countTotal(c); got != 5 {
    t.Errorf("sample 50%%: expected 5, got %d", got)
    }
    // Files must be in original chronological order
    for i := 1; i < len(k); i++ {
    if k[i].FilePath < k[i-1].FilePath {
    t.Errorf("sample 50%%: files out of order at index %d", i)
    }
    }
    // 10% of 10 → 1
    _, c2 := applySampling(kept, cached, 10, rand.New(rand.NewSource(42)))
    if got := countTotal(c2); got != 1 {
    t.Errorf("sample 10%%: expected 1, got %d", got)
    }
    // 1% of 10 → clamp to 1
    _, c3 := applySampling(kept, cached, 1, rand.New(rand.NewSource(42)))
    if got := countTotal(c3); got != 1 {
    t.Errorf("sample 1%%: expected 1 (clamped), got %d", got)
    }
    // 99% of 10 → 9
    _, c4 := applySampling(kept, cached, 99, rand.New(rand.NewSource(42)))
    if got := countTotal(c4); got != 9 {
    t.Errorf("sample 99%%: expected 9, got %d", got)
    }
    }
    func TestCertaintyPruning(t *testing.T) {
    // Simulate the bug: first file has no matching certainty segments
    df1 := &utils.DataFile{
    FilePath: "/test/file1.data",
    Segments: []*utils.Segment{
    {
    StartTime: 0,
    EndTime: 10,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0", Certainty: 70},
    },
    },
    },
    }
    df2 := &utils.DataFile{
    FilePath: "/test/file2.data",
    Segments: []*utils.Segment{
    {
    StartTime: 0,
    EndTime: 10,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0", Certainty: 100},
    },
    },
    },
    }
    // Without pruning (old bug): file1 is first, has no certainty=100 segments
    // CurrentSegment() would return nil even though TotalSegments() > 0
    state := NewClassifyState(ClassifyConfig{Certainty: 100}, []*utils.DataFile{df1, df2})
    // TotalSegments should be 1 (only file2 has certainty 100)
    if got := state.TotalSegments(); got != 1 {
    t.Errorf("Certainty=100: expected 1 segment, got %d", got)
    }
    // CurrentSegment should work if files are properly pruned
    // Note: this test assumes LoadDataFiles does the pruning
    // Here we test the state after manual construction
    }
    func TestCallTypeNoneFiltering(t *testing.T) {
    // Create test data: Kiwi with calltype, Kiwi without, Tomtit without
    df := &utils.DataFile{
    FilePath: "/test/file1.data",
    Segments: []*utils.Segment{
    {
    StartTime: 0,
    EndTime: 10,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0", CallType: "Male"},
    },
    },
    {
    StartTime: 10,
    EndTime: 20,
    Labels: []*utils.Label{
    {Species: "Kiwi", Filter: "model-1.0"}, // no calltype
    },
    },
    {
    StartTime: 20,
    EndTime: 30,
    Labels: []*utils.Label{
    {Species: "Tomtit", Filter: "model-1.0"}, // no calltype, wrong species
    },
    },
    },
    }
    // Test 1: --species Kiwi+_ should match only Kiwi with no calltype (1 segment)
    state1 := NewClassifyState(ClassifyConfig{Species: "Kiwi", CallType: utils.CallTypeNone, Certainty: -1}, []*utils.DataFile{df})
    if got := state1.TotalSegments(); got != 1 {
    t.Errorf("Species=Kiwi+_: expected 1 segment, got %d", got)
    }
    // Test 2: --species Kiwi should still match all Kiwi (2 segments)
    state2 := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df})
    if got := state2.TotalSegments(); got != 2 {
    t.Errorf("Species=Kiwi: expected 2 segments, got %d", got)
    }
    // Test 3: --species Kiwi+Male should still work as before (1 segment)
    state3 := NewClassifyState(ClassifyConfig{Species: "Kiwi", CallType: "Male", Certainty: -1}, []*utils.DataFile{df})
    if got := state3.TotalSegments(); got != 1 {
    t.Errorf("Species=Kiwi+Male: expected 1 segment, got %d", got)
    }
    }
  • file addition: calls_classify.go (----------)
    [0.67281]
    package calls
    import (
    "fmt"
    "math/rand"
    "os"
    "path/filepath"
    "slices"
    "sort"
    "strings"
    "time"
    "skraak/utils"
    )
    // KeyBinding maps a key to a species/calltype
    type KeyBinding struct {
    Key string // single char: "k", "n", "p"
    Species string // "Kiwi", "Don't Know", "Morepork"
    CallType string // "Duet", "Female", "Male" (optional)
    }
    // ClassifyConfig holds the configuration for classification
    type ClassifyConfig struct {
    Folder string
    File string
    Filter string
    Species string // scope to this species (optional)
    CallType string // scope to this calltype within species (optional)
    Certainty int // scope to this certainty value, -1 = no filter (optional)
    Sample int // random sample percentage 1-99, -1 = no sampling, 100 = no-op
    Goto string // goto this file on startup (optional, basename match)
    Reviewer string
    Color bool
    ImageSize int // spectrogram display size in pixels (0 = default)
    Sixel bool
    ITerm bool
    Bindings []KeyBinding
    // SecondaryBindings maps a primary binding key to per-species calltype
    // keys. Invoked via Shift+primary-key: the species is labeled without
    // advancing, and the next key is interpreted as a calltype.
    SecondaryBindings map[string]map[string]string
    Night bool
    Day bool
    Lat float64
    Lng float64
    Timezone string
    }
    // ClassifyState holds the current state for TUI
    type ClassifyState struct {
    Config ClassifyConfig
    DataFiles []*utils.DataFile
    filteredSegs [][]*utils.Segment // cached at load time, parallel to DataFiles
    totalSegs int // pre-computed total segment count
    FileIdx int
    SegmentIdx int
    Dirty bool
    Player *utils.AudioPlayer
    PlaybackSpeed float64 // Current playback speed (1.0 = normal, 0.5 = half speed)
    TimeFilteredCount int // files skipped by --night or --day filter
    }
    // BindingResult represents parsed key result
    type BindingResult struct {
    Species string
    CallType string // empty string = remove calltype
    }
    // LoadDataFiles loads all .data files for classification
    // findDataFilePaths resolves the list of .data file paths from config.
    func findDataFilePaths(config ClassifyConfig) ([]string, error) {
    if config.File != "" {
    return []string{config.File}, nil
    }
    paths, err := utils.FindDataFiles(config.Folder)
    if err != nil {
    return nil, fmt.Errorf("find data files: %w", err)
    }
    return paths, nil
    }
    // filterDataFileSegments applies segment and day/night filters to a single data file.
    // Returns the filtered segments and whether the file should be kept.
    // If the file is filtered out (no matching segments, or time-of-day), returns nil, false.
    func filterDataFileSegments(df *utils.DataFile, config ClassifyConfig) ([]*utils.Segment, bool, int) {
    segs := filterSegmentsByLabel(df.Segments, config)
    if segs == nil {
    return nil, false, 0
    }
    timeFiltered := 0
    if config.Night || config.Day {
    keep, tf := filterByTimeOfDay(df.FilePath, config)
    if !keep {
    return nil, false, tf
    }
    }
    return segs, true, timeFiltered
    }
    // filterSegmentsByLabel applies label/species/certainty filters, returning matching segments.
    // Returns nil if no segments match (caller should skip the file).
    func filterSegmentsByLabel(segments []*utils.Segment, config ClassifyConfig) []*utils.Segment {
    hasFilter := config.Filter != "" || config.Species != "" || config.Certainty >= 0
    if !hasFilter {
    return segments
    }
    var segs []*utils.Segment
    for _, seg := range segments {
    if seg.SegmentMatchesFilters(config.Filter, config.Species, config.CallType, config.Certainty) {
    segs = append(segs, seg)
    }
    }
    return segs // nil if empty, caller treats as "skip"
    }
    // filterByTimeOfDay checks --night/--day time-of-day filter for a .data file.
    // Returns (keep, timeFilteredCount).
    func filterByTimeOfDay(dataFilePath string, config ClassifyConfig) (bool, int) {
    wavPath := filepath.Clean(strings.TrimSuffix(dataFilePath, ".data"))
    result, err := IsNight(IsNightInput{
    FilePath: wavPath,
    Lat: config.Lat,
    Lng: config.Lng,
    Timezone: config.Timezone,
    })
    if err != nil {
    fmt.Fprintf(os.Stderr, "warning: skipping %s (isnight error: %v)\n", wavPath, err)
    return false, 1
    }
    if config.Night && !result.SolarNight {
    return false, 1
    }
    if config.Day && !result.DiurnalActive {
    return false, 1
    }
    return true, 0
    }
    func LoadDataFiles(config ClassifyConfig) (*ClassifyState, error) {
    dataFiles, err := parseAndSortDataFiles(config)
    if err != nil {
    return nil, err
    }
    kept, cachedSegs, timeFiltered := filterDataFiles(dataFiles, config)
    if config.Sample > 0 && config.Sample < 100 {
    rng := rand.New(rand.NewSource(time.Now().UnixNano()))
    kept, cachedSegs = applySampling(kept, cachedSegs, config.Sample, rng)
    }
    return buildClassifyState(config, kept, cachedSegs, timeFiltered)
    }
    // parseAndSortDataFiles finds, parses, and sorts .data files from the config.
    func parseAndSortDataFiles(config ClassifyConfig) ([]*utils.DataFile, error) {
    filePaths, err := findDataFilePaths(config)
    if err != nil {
    return nil, err
    }
    if len(filePaths) == 0 {
    return nil, fmt.Errorf("no .data files found")
    }
    var dataFiles []*utils.DataFile
    for _, path := range filePaths {
    df, err := utils.ParseDataFile(path)
    if err != nil {
    continue
    }
    dataFiles = append(dataFiles, df)
    }
    if len(dataFiles) == 0 {
    return nil, fmt.Errorf("no valid .data files")
    }
    sort.Slice(dataFiles, func(i, j int) bool {
    return dataFiles[i].FilePath < dataFiles[j].FilePath
    })
    return dataFiles, nil
    }
    // filterDataFiles applies segment filters to each data file, returning kept files and their segments.
    func filterDataFiles(dataFiles []*utils.DataFile, config ClassifyConfig) ([]*utils.DataFile, [][]*utils.Segment, int) {
    var kept []*utils.DataFile
    var cachedSegs [][]*utils.Segment
    var timeFiltered int
    for _, df := range dataFiles {
    segs, keep, tf := filterDataFileSegments(df, config)
    timeFiltered += tf
    if !keep {
    continue
    }
    kept = append(kept, df)
    cachedSegs = append(cachedSegs, segs)
    }
    return kept, cachedSegs, timeFiltered
    }
    // buildClassifyState constructs the ClassifyState, handling --goto file positioning.
    func buildClassifyState(config ClassifyConfig, dataFiles []*utils.DataFile, filteredSegs [][]*utils.Segment, timeFiltered int) (*ClassifyState, error) {
    total := 0
    for _, segs := range filteredSegs {
    total += len(segs)
    }
    state := &ClassifyState{
    Config: config,
    DataFiles: dataFiles,
    filteredSegs: filteredSegs,
    totalSegs: total,
    TimeFilteredCount: timeFiltered,
    }
    if config.Goto == "" {
    return state, nil
    }
    for i, df := range state.DataFiles {
    base := df.FilePath[strings.LastIndex(df.FilePath, "/")+1:]
    if base == config.Goto {
    state.FileIdx = i
    return state, nil
    }
    }
    return nil, fmt.Errorf("goto file not found (or has no matching segments): %s", config.Goto)
    }
    // applySampling randomly selects sample% of segments from the filtered set.
    // The returned files and segments preserve the original chronological order.
    func applySampling(kept []*utils.DataFile, cachedSegs [][]*utils.Segment, sample int, rng *rand.Rand) ([]*utils.DataFile, [][]*utils.Segment) {
    flat := make([]struct{ fileIdx, segIdx int }, 0)
    for fi, segs := range cachedSegs {
    for si := range segs {
    flat = append(flat, struct{ fileIdx, segIdx int }{fi, si})
    }
    }
    targetCount := max(len(flat)*sample/100, 1)
    rng.Shuffle(len(flat), func(i, j int) { flat[i], flat[j] = flat[j], flat[i] })
    selected := flat[:targetCount]
    // Restore chronological order before rebuilding
    sort.Slice(selected, func(i, j int) bool {
    if selected[i].fileIdx != selected[j].fileIdx {
    return selected[i].fileIdx < selected[j].fileIdx
    }
    return selected[i].segIdx < selected[j].segIdx
    })
    newCached := make([][]*utils.Segment, len(cachedSegs))
    for _, ref := range selected {
    newCached[ref.fileIdx] = append(newCached[ref.fileIdx], cachedSegs[ref.fileIdx][ref.segIdx])
    }
    var newKept []*utils.DataFile
    var finalCached [][]*utils.Segment
    for i, segs := range newCached {
    if len(segs) > 0 {
    newKept = append(newKept, kept[i])
    finalCached = append(finalCached, segs)
    }
    }
    return newKept, finalCached
    }
    // FilteredSegs returns the cached filtered segments parallel to DataFiles.
    func (s *ClassifyState) FilteredSegs() [][]*utils.Segment {
    return s.filteredSegs
    }
    // CurrentFile returns the current data file
    func (s *ClassifyState) CurrentFile() *utils.DataFile {
    if s.FileIdx >= len(s.DataFiles) {
    return nil
    }
    return s.DataFiles[s.FileIdx]
    }
    // CurrentSegment returns the current segment
    func (s *ClassifyState) CurrentSegment() *utils.Segment {
    if s.FileIdx >= len(s.filteredSegs) {
    return nil
    }
    segs := s.filteredSegs[s.FileIdx]
    if s.SegmentIdx >= len(segs) {
    return nil
    }
    return segs[s.SegmentIdx]
    }
    // TotalSegments returns total segments to review
    func (s *ClassifyState) TotalSegments() int {
    return s.totalSegs
    }
    // CurrentSegmentNumber returns 1-based segment number
    func (s *ClassifyState) CurrentSegmentNumber() int {
    count := 0
    for i := 0; i < s.FileIdx; i++ {
    count += len(s.filteredSegs[i])
    }
    return count + s.SegmentIdx + 1
    }
    // NextSegment moves to the next segment, returns false if at end
    func (s *ClassifyState) NextSegment() bool {
    if s.FileIdx >= len(s.filteredSegs) {
    return false
    }
    segs := s.filteredSegs[s.FileIdx]
    if s.SegmentIdx+1 < len(segs) {
    s.SegmentIdx++
    return true
    }
    // Move to next file
    if s.FileIdx+1 < len(s.DataFiles) {
    s.FileIdx++
    s.SegmentIdx = 0
    return true
    }
    return false
    }
    // PrevSegment moves to the previous segment, returns false if at start
    func (s *ClassifyState) PrevSegment() bool {
    if s.SegmentIdx > 0 {
    s.SegmentIdx--
    return true
    }
    // Move to previous file
    if s.FileIdx > 0 {
    s.FileIdx--
    segs := s.filteredSegs[s.FileIdx]
    s.SegmentIdx = max(len(segs)-1, 0)
    return true
    }
    return false
    }
    // ParseKeyBuffer parses a single key into binding result
    func (s *ClassifyState) ParseKeyBuffer(key string) *BindingResult {
    for _, b := range s.Config.Bindings {
    if b.Key == key {
    return &BindingResult{
    Species: b.Species,
    CallType: b.CallType,
    }
    }
    }
    return nil
    }
    // SetComment sets the comment on the current segment's filter label.
    // Returns the previous comment (for undo) or empty string if none.
    func (s *ClassifyState) SetComment(comment string) string {
    seg := s.CurrentSegment()
    if seg == nil {
    return ""
    }
    df := s.CurrentFile()
    if df == nil {
    return ""
    }
    // Set reviewer
    df.Meta.Reviewer = s.Config.Reviewer
    // Get labels matching filter
    filterLabels := seg.GetFilterLabels(s.Config.Filter)
    var oldComment string
    if len(filterLabels) == 0 {
    // No matching labels, add new one with comment
    label := &utils.Label{
    Species: "Don't Know",
    Certainty: 0,
    Filter: s.Config.Filter,
    Comment: comment,
    }
    seg.Labels = append(seg.Labels, label)
    } else {
    // Set comment on first matching label
    oldComment = filterLabels[0].Comment
    filterLabels[0].Comment = comment
    }
    s.Dirty = true
    return oldComment
    }
    // GetCurrentComment returns the comment on the current segment's filter label.
    func (s *ClassifyState) GetCurrentComment() string {
    seg := s.CurrentSegment()
    if seg == nil {
    return ""
    }
    filterLabels := seg.GetFilterLabels(s.Config.Filter)
    if len(filterLabels) == 0 {
    return ""
    }
    return filterLabels[0].Comment
    }
    // ApplyBinding applies a binding result to the current segment
    func (s *ClassifyState) ApplyBinding(result *BindingResult) {
    seg := s.CurrentSegment()
    if seg == nil {
    return
    }
    df := s.CurrentFile()
    if df == nil {
    return
    }
    // Set reviewer
    df.Meta.Reviewer = s.Config.Reviewer
    // Get labels matching filter
    filterLabels := seg.GetFilterLabels(s.Config.Filter)
    // Determine certainty: 0 for Don't Know, 100 for others
    certainty := 100
    if result.Species == "Don't Know" {
    certainty = 0
    }
    if len(filterLabels) == 0 {
    // No matching labels, add new one
    seg.Labels = append(seg.Labels, &utils.Label{
    Species: result.Species,
    Certainty: certainty,
    Filter: s.Config.Filter,
    CallType: result.CallType,
    })
    } else {
    // Edit first matching label, remove rest
    filterLabels[0].Species = result.Species
    filterLabels[0].Certainty = certainty
    filterLabels[0].CallType = result.CallType // always set (empty = remove)
    // Remove extra matching labels
    if len(filterLabels) > 1 {
    var newLabels []*utils.Label
    for _, l := range seg.Labels {
    keep := !slices.Contains(filterLabels[1:], l)
    if keep {
    newLabels = append(newLabels, l)
    }
    }
    seg.Labels = newLabels
    }
    }
    // Re-sort labels
    sort.Slice(seg.Labels, func(i, j int) bool {
    return seg.Labels[i].Species < seg.Labels[j].Species
    })
    s.Dirty = true
    }
    // ApplyCallTypeOnly sets the CallType on the current segment's first
    // filter-matching label. Used after a Shift+primary keypress labeled the
    // species and we now receive the secondary key for the calltype.
    // No-op if there is no matching label to update.
    func (s *ClassifyState) ApplyCallTypeOnly(callType string) {
    seg := s.CurrentSegment()
    if seg == nil {
    return
    }
    df := s.CurrentFile()
    if df == nil {
    return
    }
    filterLabels := seg.GetFilterLabels(s.Config.Filter)
    if len(filterLabels) == 0 {
    return
    }
    df.Meta.Reviewer = s.Config.Reviewer
    filterLabels[0].CallType = callType
    s.Dirty = true
    }
    // HasSecondary reports whether the given primary key has any secondary
    // (calltype) bindings configured.
    func (s *ClassifyState) HasSecondary(primaryKey string) bool {
    return len(s.Config.SecondaryBindings[primaryKey]) > 0
    }
    // ConfirmLabel upgrades the current segment's existing filter label certainty
    // to 100. Returns true if a write is needed (label existed and was below 100).
    // Returns false for Don't Know (certainty=0) — confirming a Don't Know is a no-op;
    // the caller should just advance to the next segment.
    func (s *ClassifyState) ConfirmLabel() bool {
    seg := s.CurrentSegment()
    if seg == nil {
    return false
    }
    filterLabels := seg.GetFilterLabels(s.Config.Filter)
    if len(filterLabels) == 0 {
    return false
    }
    if filterLabels[0].Certainty == 0 {
    return false
    }
    if filterLabels[0].Certainty == 100 {
    return false
    }
    df := s.CurrentFile()
    if df == nil {
    return false
    }
    df.Meta.Reviewer = s.Config.Reviewer
    filterLabels[0].Certainty = 100
    s.Dirty = true
    return true
    }
    // Save saves the current file
    func (s *ClassifyState) Save() error {
    df := s.CurrentFile()
    if df == nil {
    return nil
    }
    if !s.Dirty {
    return nil
    }
    err := df.Write(df.FilePath)
    if err != nil {
    return err
    }
    s.Dirty = false
    return nil
    }
    // getFilterLabel returns the label matching the current filter, or first label if no filter.
    func (s *ClassifyState) getFilterLabel(seg *utils.Segment) *utils.Label {
    if s.Config.Filter == "" {
    if len(seg.Labels) > 0 {
    return seg.Labels[0]
    }
    return nil
    }
    for _, label := range seg.Labels {
    if label.Filter == s.Config.Filter {
    return label
    }
    }
    return nil
    }
    // getOrCreateFilterLabel gets existing label or creates new one for the current filter.
    func (s *ClassifyState) getOrCreateFilterLabel(seg *utils.Segment) *utils.Label {
    label := s.getFilterLabel(seg)
    if label != nil {
    return label
    }
    // Create new label
    label = &utils.Label{
    Species: "Don't Know",
    Certainty: 0,
    Filter: s.Config.Filter,
    }
    seg.Labels = append(seg.Labels, label)
    s.Dirty = true
    return label
    }
    // HasBookmark returns true if current segment has a bookmark on the filter label.
    func (s *ClassifyState) HasBookmark() bool {
    seg := s.CurrentSegment()
    if seg == nil {
    return false
    }
    label := s.getFilterLabel(seg)
    return label != nil && label.Bookmark
    }
    // ToggleBookmark toggles the bookmark on the current segment's filter label.
    func (s *ClassifyState) ToggleBookmark() {
    seg := s.CurrentSegment()
    if seg == nil {
    return
    }
    df := s.CurrentFile()
    if df == nil {
    return
    }
    // Set reviewer
    df.Meta.Reviewer = s.Config.Reviewer
    label := s.getOrCreateFilterLabel(seg)
    label.Bookmark = !label.Bookmark
    s.Dirty = true
    }
    // NextBookmark navigates to the next bookmark, wrapping around if needed.
    // Returns false if no bookmarks found (back at start position).
    func (s *ClassifyState) NextBookmark() bool {
    startFile := s.FileIdx
    startSeg := s.SegmentIdx
    first := true
    for {
    // Advance to next segment
    if !s.NextSegment() {
    // Wrap to start of folder
    s.FileIdx = 0
    s.SegmentIdx = 0
    }
    // Check if we've looped back to start
    if !first && s.FileIdx == startFile && s.SegmentIdx == startSeg {
    return false // full circle, no bookmark found
    }
    first = false
    // Check if current segment has bookmark
    if s.hasFilterBookmark() {
    return true
    }
    }
    }
    // PrevBookmark navigates to the previous bookmark, wrapping around if needed.
    // Returns false if no bookmarks found (back at start position).
    func (s *ClassifyState) PrevBookmark() bool {
    startFile := s.FileIdx
    startSeg := s.SegmentIdx
    first := true
    for {
    // Move to previous segment
    if !s.PrevSegment() {
    // Wrap to end of folder
    s.FileIdx = len(s.DataFiles) - 1
    segs := s.filteredSegs[s.FileIdx]
    s.SegmentIdx = max(len(segs)-1, 0)
    }
    // Check if we've looped back to start
    if !first && s.FileIdx == startFile && s.SegmentIdx == startSeg {
    return false // full circle, no bookmark found
    }
    first = false
    // Check if current segment has bookmark
    if s.hasFilterBookmark() {
    return true
    }
    }
    }
    // hasFilterBookmark checks if current segment has bookmark on filter-matching label.
    func (s *ClassifyState) hasFilterBookmark() bool {
    seg := s.CurrentSegment()
    if seg == nil {
    return false
    }
    label := s.getFilterLabel(seg)
    return label != nil && label.Bookmark
    }
    // FormatLabels formats labels for display
    func FormatLabels(labels []*utils.Label, filter string) string {
    var parts []string
    for _, l := range labels {
    if filter != "" && l.Filter != filter {
    continue
    }
    part := l.Species
    if l.CallType != "" {
    part += "/" + l.CallType
    }
    part += fmt.Sprintf(" (%d%%)", l.Certainty)
    if l.Filter != "" {
    part += " [" + l.Filter + "]"
    }
    if l.Comment != "" {
    part += fmt.Sprintf(" \"%s\"", l.Comment)
    }
    parts = append(parts, part)
    }
    return strings.Join(parts, ", ")
    }
  • file addition: avianz_types.go (----------)
    [0.67281]
    package calls
    // AviaNZMeta is the metadata element in a .data file
    type AviaNZMeta struct {
    Operator string `json:"Operator"`
    Reviewer *string `json:"Reviewer,omitempty"`
    Duration float64 `json:"Duration"`
    }
    // AviaNZLabel represents a species label in a segment
    type AviaNZLabel struct {
    Species string `json:"species"`
    Certainty int `json:"certainty"`
    Filter string `json:"filter"`
    }
    // AviaNZSegment represents a detection segment [start, end, freq_low, freq_high, labels]
    type AviaNZSegment [5]any
  • file addition: resolve.go (----------)
    [6.790921]
    package db
    // ResolveDBPath returns the inputPath if non-empty, otherwise returns the
    // fallback path. This is used by tools that accept an explicit DBPath in
    // their Input struct but need a default when not provided.
    func ResolveDBPath(inputPath, fallback string) string {
    if inputPath != "" {
    return inputPath
    }
    return fallback
    }
  • edit in cmd/sql.go at line 57
    [6.1043575][6.1043575:1043602]()
    tools.SetDBPath(*dbPath)
  • edit in cmd/pattern.go at line 65
    [6.1055793][6.1055793:1055819]()
    tools.SetDBPath(*dbPath)
  • edit in cmd/pattern.go at line 124
    [6.1057759][6.1057759:1057785]()
    tools.SetDBPath(*dbPath)
  • edit in cmd/location.go at line 68
    [6.1063427][6.1063427:1063453]()
    tools.SetDBPath(*dbPath)
  • edit in cmd/location.go at line 118
    [6.1065252][6.19731:19757]()
    tools.SetDBPath(*dbPath)
  • replacement in cmd/isnight.go at line 9
    [6.1066387][6.1066387:1066403]()
    "skraak/tools"
    [6.1066387]
    [6.1066403]
    "skraak/tools/calls"
  • replacement in cmd/isnight.go at line 72
    [6.1069377][6.1069377:1069427]()
    output, err := tools.IsNight(tools.IsNightInput{
    [6.1069377]
    [6.1069427]
    output, err := calls.IsNight(calls.IsNightInput{
  • replacement in cmd/import.go at line 10
    [6.1070091][6.1070091:1070107]()
    "skraak/tools"
    [6.1070091]
    [6.1070107]
    imp "skraak/tools/import"
  • edit in cmd/import.go at line 94
    [6.1074064][6.1074064:1074090]()
    tools.SetDBPath(*dbPath)
  • replacement in cmd/import.go at line 97
    [6.1074123][6.1074123:1074160]()
    input := tools.BulkFileImportInput{
    [6.1074123]
    [4.7134]
    input := imp.BulkFileImportInput{
  • replacement in cmd/import.go at line 111
    [6.1074584][6.1074584:1074650]()
    output, err := tools.BulkFileImport(context.Background(), input)
    [6.1074584]
    [6.1074650]
    output, err := imp.BulkFileImport(context.Background(), input)
  • edit in cmd/import.go at line 164
    [6.1115][6.1077012:1077039](),[6.3606][6.1077012:1077039](),[6.1077012][6.1077012:1077039]()
    tools.SetDBPath(*dbPath)
  • replacement in cmd/import.go at line 167
    [6.1077072][6.1077072:1077105]()
    input := tools.ImportFileInput{
    [6.1077072]
    [4.7159]
    input := imp.ImportFileInput{
  • replacement in cmd/import.go at line 177
    [6.1077273][6.1077273:1077335]()
    output, err := tools.ImportFile(context.Background(), input)
    [6.1077273]
    [6.1077335]
    output, err := imp.ImportFile(context.Background(), input)
  • edit in cmd/import.go at line 230
    [6.1459][6.1079771:1079798](),[6.3744][6.1079771:1079798](),[6.1079771][6.1079771:1079798]()
    tools.SetDBPath(*dbPath)
  • replacement in cmd/import.go at line 233
    [6.1079831][6.1079831:1079870]()
    input := tools.ImportAudioFilesInput{
    [6.1079831]
    [4.7183]
    input := imp.ImportAudioFilesInput{
  • replacement in cmd/import.go at line 247
    [6.1080148][6.1080148:1080216]()
    output, err := tools.ImportAudioFiles(context.Background(), input)
    [6.1080148]
    [6.1080216]
    output, err := imp.ImportAudioFiles(context.Background(), input)
  • edit in cmd/import.go at line 334
    [6.1860][6.1084474:1084501](),[6.3909][6.1084474:1084501](),[6.1084474][6.1084474:1084501]()
    tools.SetDBPath(*dbPath)
  • replacement in cmd/import.go at line 337
    [6.1084534][6.1084534:1084571]()
    input := tools.ImportSegmentsInput{
    [6.1084534]
    [4.7207]
    input := imp.ImportSegmentsInput{
  • replacement in cmd/import.go at line 358
    [6.1085170][6.1085170:1085236]()
    output, err := tools.ImportSegments(context.Background(), input)
    [6.1085170]
    [6.1085236]
    output, err := imp.ImportSegments(context.Background(), input)
  • edit in cmd/import.go at line 419
    [6.2220][6.1088110:1088137](),[6.3995][6.1088110:1088137](),[6.1088110][6.1088110:1088137]()
    tools.SetDBPath(*dbPath)
  • replacement in cmd/import.go at line 422
    [6.1088170][6.1088170:1088211]()
    input := tools.ImportUnstructuredInput{
    [6.1088170]
    [4.7231]
    input := imp.ImportUnstructuredInput{
  • replacement in cmd/import.go at line 435
    [6.1088511][6.1088511:1088581]()
    output, err := tools.ImportUnstructured(context.Background(), input)
    [6.1088511]
    [6.1088581]
    output, err := imp.ImportUnstructured(context.Background(), input)
  • edit in cmd/export.go at line 76
    [6.4073][6.1091992:1092019](),[6.6873][6.1091992:1092019](),[6.1091992][6.1091992:1092019]()
    tools.SetDBPath(*dbPath)
  • edit in cmd/dataset.go at line 53
    [6.1094501][6.1094501:1094527]()
    tools.SetDBPath(*dbPath)
  • edit in cmd/dataset.go at line 99
    [6.1096234][6.1096234:1096260]()
    tools.SetDBPath(*dbPath)
  • edit in cmd/cluster.go at line 63
    [6.1101538][6.1101538:1101564]()
    tools.SetDBPath(*dbPath)
  • edit in cmd/cluster.go at line 119
    [6.1103457][6.1103457:1103483]()
    tools.SetDBPath(*dbPath)
  • replacement in cmd/calls_push_certainty.go at line 8
    [6.1104046][6.1104046:1104062]()
    "skraak/tools"
    [6.1104046]
    [6.1104062]
    "skraak/tools/calls"
  • replacement in cmd/calls_push_certainty.go at line 135
    [6.1572][6.1108868:1108906](),[6.1108868][6.1108868:1108906]()
    config := tools.PushCertaintyConfig{
    [6.1572]
    [6.26591]
    config := calls.PushCertaintyConfig{
  • replacement in cmd/calls_push_certainty.go at line 149
    [6.1109142][6.1109142:1109186]()
    result, err := tools.PushCertainty(config)
    [6.1109142]
    [6.1109186]
    result, err := calls.PushCertainty(config)
  • replacement in cmd/calls_propagate.go at line 9
    [6.1109860][6.1109860:1109876]()
    "skraak/tools"
    [6.1109860]
    [6.1109876]
    "skraak/tools/calls"
  • replacement in cmd/calls_propagate.go at line 121
    [6.1115880][6.1115880:1115945]()
    result, err := tools.CallsPropagate(tools.CallsPropagateInput{
    [6.1115880]
    [6.1115945]
    result, err := calls.CallsPropagate(calls.CallsPropagateInput{
  • replacement in cmd/calls_propagate.go at line 136
    [6.1116268][6.1116268:1116344]()
    result, err := tools.CallsPropagateFolder(tools.CallsPropagateFolderInput{
    [6.1116268]
    [6.1116344]
    result, err := calls.CallsPropagateFolder(calls.CallsPropagateFolderInput{
  • replacement in cmd/calls_modify.go at line 10
    [6.1117201][6.1117201:1117217]()
    "skraak/tools"
    [6.1117201]
    [6.1117217]
    "skraak/tools/calls"
  • replacement in cmd/calls_modify.go at line 159
    [6.27390][6.1123339:1123373](),[6.1123339][6.1123339:1123373]()
    input := tools.CallsModifyInput{
    [6.27390]
    [6.27391]
    input := calls.CallsModifyInput{
  • replacement in cmd/calls_modify.go at line 172
    [6.1123589][6.1123589:1123630]()
    result, err := tools.CallsModify(input)
    [6.1123577]
    [6.1123630]
    result, err := calls.CallsModify(input)
  • replacement in cmd/calls_detect_anomalies.go at line 8
    [6.1123892][6.1123892:1123908]()
    "skraak/tools"
    [6.1123892]
    [6.1123908]
    "skraak/tools/calls"
  • replacement in cmd/calls_detect_anomalies.go at line 110
    [6.1127861][6.1127861:1127927]()
    output, err := tools.DetectAnomalies(tools.DetectAnomaliesInput{
    [6.1127861]
    [6.1127927]
    output, err := calls.DetectAnomalies(calls.DetectAnomaliesInput{
  • replacement in cmd/calls_clip_labels.go at line 10
    [6.1128642][6.1128642:1128658]()
    "skraak/tools"
    [6.1128642]
    [6.1128658]
    "skraak/tools/calls"
  • replacement in cmd/calls_clip_labels.go at line 52
    [6.1131079][6.1131079:1131117]()
    input := tools.CallsClipLabelsInput{
    [6.1131079]
    [6.1131117]
    input := calls.CallsClipLabelsInput{
  • replacement in cmd/calls_clip_labels.go at line 72
    [6.1131766][6.1131766:1131808]()
    out, err := tools.CallsClipLabels(input)
    [6.1131766]
    [6.1131808]
    out, err := calls.CallsClipLabels(input)
  • replacement in cmd/calls_clip.go at line 9
    [6.1133159][6.1133159:1133175]()
    "skraak/tools"
    [6.1133159]
    [6.1133175]
    "skraak/tools/calls"
  • replacement in cmd/calls_clip.go at line 148
    [6.1141025][6.1141025:1141057]()
    input := tools.CallsClipInput{
    [6.1141025]
    [6.28933]
    input := calls.CallsClipInput{
  • replacement in cmd/calls_clip.go at line 164
    [6.1141380][6.1141380:1141419]()
    result, err := tools.CallsClip(input)
    [6.1141380]
    [6.1141419]
    result, err := calls.CallsClip(input)
  • replacement in cmd/calls_classify.go at line 10
    [6.1141784][6.1141784:1141800]()
    "skraak/tools"
    [6.1141784]
    [6.1141800]
    "skraak/tools/calls"
  • replacement in cmd/calls_classify.go at line 155
    [6.8424][6.8424:8511]()
    func validateBindings(cfg *utils.Config, cfgPath string) ([]tools.KeyBinding, error) {
    [6.8424]
    [6.1149871]
    func validateBindings(cfg *utils.Config, cfgPath string) ([]calls.KeyBinding, error) {
  • replacement in cmd/calls_classify.go at line 157
    [6.1149949][6.1149949:1150018]()
    bindings := make([]tools.KeyBinding, 0, len(cfg.Classify.Bindings))
    [6.1149949]
    [6.1150018]
    bindings := make([]calls.KeyBinding, 0, len(cfg.Classify.Bindings))
  • replacement in cmd/calls_classify.go at line 236
    [6.1151645][6.1151645:1151678]()
    config := tools.ClassifyConfig{
    [6.1151645]
    [6.12323]
    config := calls.ClassifyConfig{
  • replacement in cmd/calls_classify.go at line 260
    [6.1152375][6.1152375:1152418]()
    state, err := tools.LoadDataFiles(config)
    [6.1152375]
    [6.1152418]
    state, err := calls.LoadDataFiles(config)
  • replacement in cmd/calls_classify.go at line 290
    [6.1153232][6.1153232:1153276]()
    func parseBind(s string) tools.KeyBinding {
    [6.1153232]
    [6.1153276]
    func parseBind(s string) calls.KeyBinding {
  • replacement in cmd/calls_classify.go at line 302
    [6.1153592][6.1153592:1153619]()
    return tools.KeyBinding{
    [6.1153592]
    [6.1153619]
    return calls.KeyBinding{
  • replacement in cmd/calls_classify.go at line 310
    [6.1153718][6.1153718:1153744]()
    return tools.KeyBinding{
    [6.1153718]
    [6.1153744]
    return calls.KeyBinding{
  • replacement in cmd/calls.go at line 9
    [6.1153889][6.1153889:1153905]()
    "skraak/tools"
    [6.1153889]
    [6.1153905]
    "skraak/tools/calls"
  • replacement in cmd/calls.go at line 148
    [6.1160560][6.1160560:1160615]()
    filterName = tools.ParseFilterFromFilename(*csvPath)
    [6.1160560]
    [6.1160615]
    filterName = calls.ParseFilterFromFilename(*csvPath)
  • replacement in cmd/calls.go at line 154
    [6.1160882][6.1160882:1160919]()
    input := tools.CallsFromPredsInput{
    [6.1160882]
    [6.1160919]
    input := calls.CallsFromPredsInput{
  • replacement in cmd/calls.go at line 181
    [6.1161814][6.1161814:1161858]()
    output, err := tools.CallsFromPreds(input)
    [6.1161814]
    [6.1161858]
    output, err := calls.CallsFromPreds(input)
  • replacement in cmd/calls.go at line 234
    [6.1163871][6.1163871:1163909]()
    input := tools.CallsShowImagesInput{
    [6.1163871]
    [6.1163909]
    input := calls.CallsShowImagesInput{
  • replacement in cmd/calls.go at line 247
    [6.1164198][6.1164198:1164243]()
    output, err := tools.CallsShowImages(input)
    [6.1164198]
    [6.1164243]
    output, err := calls.CallsShowImages(input)
  • replacement in cmd/calls.go at line 312
    [6.1167078][6.1167078:1167115]()
    input := tools.CallsFromBirdaInput{
    [6.1167078]
    [6.1167115]
    input := calls.CallsFromBirdaInput{
  • replacement in cmd/calls.go at line 337
    [6.1167752][6.1167752:1167796]()
    output, err := tools.CallsFromBirda(input)
    [6.1167752]
    [6.1167796]
    output, err := calls.CallsFromBirda(input)
  • replacement in cmd/calls.go at line 417
    [6.1171205][6.1171205:1171242]()
    input := tools.CallsFromRavenInput{
    [6.1171205]
    [6.1171242]
    input := calls.CallsFromRavenInput{
  • replacement in cmd/calls.go at line 442
    [6.1171878][6.1171878:1171922]()
    output, err := tools.CallsFromRaven(input)
    [6.1171878]
    [6.1171922]
    output, err := calls.CallsFromRaven(input)
  • replacement in cmd/calls.go at line 544
    [6.1176445][6.1176445:1176482]()
    input := tools.CallsSummariseInput{
    [6.1176445]
    [6.1176482]
    input := calls.CallsSummariseInput{
  • replacement in cmd/calls.go at line 555
    [6.1176685][6.1176685:1176729]()
    output, err := tools.CallsSummarise(input)
    [6.1176685]
    [6.1176729]
    output, err := calls.CallsSummarise(input)
  • replacement in CLAUDE.md at line 20
    [6.1196332][3.9861:9940]()
    tools/*.go → CLI tools (one file per tool, defines input/output types)
    [6.1196332]
    [6.7389]
    tools/*.go → CLI tools: sql, export, cluster, dataset, location, pattern, time, prepend
    tools/calls/ → Call processing (filesystem .data/WAV, NO database access)
    tools/import/ → Import operations (bulk, file, files, segments, unstructured)
  • edit in CHANGELOG.md at line 4
    [6.1198010]
    [5.173]
    ## [2026-05-12] Stream 7: tools/ package split + SetDBPath removal
    Split tools/ into three packages to improve navigation and reduce coupling:
    ### tools/calls/ (13 source + 11 test + 3 utility files, 4563 lines)
    - All calls_* processing — purely filesystem-based, NO database access
    - avianz_types.go, parallel_aggregate.go, isnight.go
    - Package name: `calls` (import: `skraak/tools/calls`)
    ### tools/import/ (5 source + 1 test files, 2078 lines)
    - import_file, import_files, import_segments, import_unstructured, bulk_file_import
    - Package name: `imp` (import: `imp "skraak/tools/import"`)
    (`import` is a Go keyword, so `imp` is used as the package identifier)
    ### tools/ (8 source + 4 test files, remaining ~1700 lines)
    - sql, export, cluster, dataset, location, pattern, time, prepend
  • edit in CHANGELOG.md at line 22
    [5.174]
    [5.174]
    ### SetDBPath removal
    - Removed global `var dbPath string` and `SetDBPath()` from tools/sql.go
    - All callers already pass `Input.DBPath` — the global was redundant
    - Test files updated: `SetDBPath(testDB)` → `DBPath: testDB` in Input structs
    - Added `db.ResolveDBPath()` helper for the resolveDBPath pattern
    ### depguard updates
    - New rules for tools/calls/ and tools/import/ packages
    - tui/ may import tools/calls but not tools
    - tools/ may not import sub-packages
    - tools/calls/ and tools/import/ may not import parent tools/ package
    ### Cross-boundary dependency resolution
    - `resolveDBPath()` → each package calls `db.ResolveDBPath()` directly
    - `calls_clip_bench_test.go` path fix: `../audio/` → `../../audio/`
    - No unexported symbols cross package boundaries (verified by analysis)
  • replacement in .golangci.yml at line 39
    [6.3780][6.3780:3822]()
    # cmd → tools, tui, utils, db
    [6.3780]
    [6.3822]
    # cmd → tools, tools/calls, tools/import, tui, utils, db
    # tools/calls → utils, db
    # tools/import → utils, db
  • replacement in .golangci.yml at line 43
    [6.3854][6.3854:3887]()
    # tui → tools, utils
    [6.3854]
    [6.3887]
    # tui → tools/calls, utils
  • edit in .golangci.yml at line 76
    [6.4899]
    [6.4899]
    - pkg: "skraak/tools$"
    desc: "tui must import from tools/calls, not tools"
    calls:
    files:
    - "**/tools/calls/*.go"
    deny:
    - pkg: "skraak/cmd"
    desc: "tools/calls must not import cmd"
    - pkg: "skraak/tools"
    desc: "tools/calls must not import parent package"
    - pkg: "skraak/tui"
    desc: "tools/calls must not import tui"
    import:
    files:
    - "**/tools/import/*.go"
    deny:
    - pkg: "skraak/cmd"
    desc: "tools/import must not import cmd"
    - pkg: "skraak/tools"
    desc: "tools/import must not import parent package"
    - pkg: "skraak/tui"
    desc: "tools/import must not import tui"
  • edit in .golangci.yml at line 106
    [6.5137]
    [6.5137]
    - pkg: "skraak/tools/calls"
    desc: "tools must not import tools/calls (sub-package)"
    - pkg: "skraak/tools/import"
    desc: "tools must not import tools/import (sub-package)"